import React from 'react'; 
import {Link} from 'react-router-dom'; 
import {useRCustomEffect} from '../../../useCustomEffect'; 
import imgPivotLongerPart4 from '../tidyr_graphics/pivot_longer_plot.png';

export default function PivotLongerPart4(){
useRCustomEffect()
return ( <div>
<div className="page-columns page-rows-contents page-layout-article" id="quarto-content">
<main className="content" id="quarto-document-content">
<header className="quarto-title-block default" id="title-block-header">
<div className="quarto-title">
<h1 className="title">Gather Columns into Longer and Narrower Dataset (4/4): <em>Deal with Multiple observations Per Row</em></h1>
</div>
<div className="quarto-title-meta">
</div>
</header>
<p>So far, we have been working with data frames that have one observation per row, but many important pivoting problems involve multiple observations per row. You can usually recognize this case when the input column names contain both variable names (as they correctly should be) and observation names (which should otherwise be recorded in different rows). In this section, you’ll learn how to pivot this sort of data.</p>
<p><strong>e.g. 1.</strong> In the following dataset, each child has two pieces of record, <code>gender</code> and <code>dob</code> (date of birth). <code>child1</code> and <code>child2</code> are essentially different observations; instead of occupying different rows as they should be in a tidy dataset, they occupy different columns, rending the dataset not tidy enough.</p>
<div className="cell" data-layout-align="center">
<div className="sourceCode cell-code" id="cb1"><pre className="sourceCode r code-with-copy"><code className="sourceCode r"><span id="cb1-1"><a aria-hidden="true" href="#cb1-1" tabindex="-1"></a><span className="fu">library</span>(tidyr)</span>
<span id="cb1-2"><a aria-hidden="true" href="#cb1-2" tabindex="-1"></a><span className="fu">library</span>(dplyr)</span>
<span id="cb1-3"><a aria-hidden="true" href="#cb1-3" tabindex="-1"></a></span><br/>
<span id="cb1-4"><a aria-hidden="true" href="#cb1-4" tabindex="-1"></a>family <span className="ot">&lt;-</span> <span className="fu">tribble</span>(</span>
<span id="cb1-5"><a aria-hidden="true" href="#cb1-5" tabindex="-1"></a>  <span className="sc">~</span>family,  <span className="sc">~</span>dob_child1,  <span className="sc">~</span>dob_child2, <span className="sc">~</span>gender_child1, <span className="sc">~</span>gender_child2,</span>
<span id="cb1-6"><a aria-hidden="true" href="#cb1-6" tabindex="-1"></a>  <span className="dv">1</span>L, <span className="st">"2021-02-23"</span>, <span className="st">"2024-06-03"</span>,             <span className="dv">1</span>L,             <span className="dv">2</span>L,</span>
<span id="cb1-7"><a aria-hidden="true" href="#cb1-7" tabindex="-1"></a>  <span className="dv">2</span>L, <span className="st">"1920-06-22"</span>,           <span className="cn">NA</span>,             <span className="dv">2</span>L,             <span className="cn">NA</span>,</span>
<span id="cb1-8"><a aria-hidden="true" href="#cb1-8" tabindex="-1"></a>  <span className="dv">3</span>L, <span className="st">"2019-07-11"</span>, <span className="st">"2023-12-26"</span>,             <span className="dv">2</span>L,             <span className="dv">2</span>L,</span>
<span id="cb1-9"><a aria-hidden="true" href="#cb1-9" tabindex="-1"></a>  <span className="dv">4</span>L, <span className="st">"2024-10-10"</span>, <span className="st">"2024-10-10"</span>,             <span className="dv">1</span>L,             <span className="dv">1</span>L,</span>
<span id="cb1-10"><a aria-hidden="true" href="#cb1-10" tabindex="-1"></a>  <span className="dv">5</span>L, <span className="st">"2018-12-05"</span>, <span className="st">"2023-02-28"</span>,             <span className="dv">2</span>L,             <span className="dv">1</span>L,</span>
<span id="cb1-11"><a aria-hidden="true" href="#cb1-11" tabindex="-1"></a>)</span>
<span id="cb1-12"><a aria-hidden="true" href="#cb1-12" tabindex="-1"></a>family <span className="ot">&lt;-</span> family <span className="sc">%&gt;%</span> <span className="fu">mutate_at</span>(<span className="fu">vars</span>(<span className="fu">starts_with</span>(<span className="st">"dob"</span>)), readr<span className="sc">::</span>parse_date)</span>
<span id="cb1-13"><a aria-hidden="true" href="#cb1-13" tabindex="-1"></a>family</span></code></pre></div>
<div className="cell-output cell-output-stdout">
<pre className="demo-highlight sourceCode r rcss"><code className="sourceCode r"># A tibble: 5 × 5
<br/>  family dob_child1 dob_child2 gender_child1 gender_child2
<br/>   &lt;int&gt; &lt;date&gt;     &lt;date&gt;             &lt;int&gt;         &lt;int&gt;
<br/>1      1 2021-02-23 2024-06-03             1             2
<br/>2      2 1920-06-22 NA                     2            NA
<br/>3      3 2019-07-11 2023-12-26             2             2
<br/>4      4 2024-10-10 2024-10-10             1             1
<br/>5      5 2018-12-05 2023-02-28             2             1</code></pre>
</div>
</div>
<p>To tidy up the dataset, <code>child1</code> and <code>child2</code> should be cell values in the same column, and <code>gender</code> and <code>dob</code> should be reserved as separate columns. The following script can be conceptualized as pivoting the data at the <code>child</code> level while maintaining the original structure of <code>dob</code> and <code>gender</code>.</p>
<div className="cell" data-layout-align="center">
<div className="sourceCode cell-code" id="cb3"><pre className="sourceCode r code-with-copy"><code className="sourceCode r"><span id="cb3-1"><a aria-hidden="true" href="#cb3-1" tabindex="-1"></a>family <span className="sc">%&gt;%</span> </span>
<span id="cb3-2"><a aria-hidden="true" href="#cb3-2" tabindex="-1"></a>  <span className="fu">pivot_longer</span>(</span>
<span id="cb3-3"><a aria-hidden="true" href="#cb3-3" tabindex="-1"></a>    <span className="sc">-</span>family, </span>
<span id="cb3-4"><a aria-hidden="true" href="#cb3-4" tabindex="-1"></a>    <span className="at">names_sep =</span> <span className="st">"_"</span>, </span>
<span id="cb3-5"><a aria-hidden="true" href="#cb3-5" tabindex="-1"></a>    <span className="at">names_to =</span> <span className="fu">c</span>(<span className="st">".value"</span>, <span className="st">"child"</span>), </span>
<span id="cb3-6"><a aria-hidden="true" href="#cb3-6" tabindex="-1"></a>    <span className="at">values_drop_na =</span> <span className="cn">TRUE</span></span>
<span id="cb3-7"><a aria-hidden="true" href="#cb3-7" tabindex="-1"></a>  )</span></code></pre></div>
<div className="cell-output cell-output-stdout">
<pre className="demo-highlight sourceCode r rcss"><code className="sourceCode r"># A tibble: 9 × 4
<br/>  family child  dob        gender
<br/>   &lt;int&gt; &lt;chr&gt;  &lt;date&gt;      &lt;int&gt;
<br/>1      1 child1 2021-02-23      1
<br/>2      1 child2 2024-06-03      2
<br/>3      2 child1 1920-06-22      2
<br/>4      3 child1 2019-07-11      2
<br/>5      3 child2 2023-12-26      2
<br/>6      4 child1 2024-10-10      1
<br/>7      4 child2 2024-10-10      1
<br/>8      5 child1 2018-12-05      2
<br/>9      5 child2 2023-02-28      1</code></pre>
</div>
</div>
<ul>
<li><p><code>names_sep = "_"</code> indicates that the underscore <code>_</code> is used as the separator to split the original column names into two parts: the part before the underscore, and the part after.</p></li>
<li><p>The generic function of <code>names_to = "x"</code> is to turn the input column names as cell values under the <code>x</code> variable. In this case, the second part of the column names, <code>child1</code> and <code>child2</code>, are turned into cell values under the new column <code>child</code>. The string <code>.value</code> is a special placeholder that, in this case, matches the first part of column names, i.e., <code>dob</code> and <code>gender</code>, and serves two roles: 1) the matched part are reserved as new column names, and 2) <code>.value</code> specifies the values being measured for the new columns in the output (reminiscent to the argument <code>values_to</code>).</p></li>
</ul>
<p><strong>e.g.2.</strong> Anscombe’s quartet is a famous example in statistics illustrating the importance of visualizing data distribution rather than relying solely on summary statistics. It consists of four datasets, each containing two variables (x and y). Despite having different data distribution, the four datasets share identical or very similar summary statistics such as mean, standard deviation, correlation, and regression lines. It highlights the importance of visualizing the data distribution, as different datasets with the same summary statistics can have vastly different characteristics.</p>
<div className="cell" data-layout-align="center">
<div className="sourceCode cell-code" id="cb5"><pre className="sourceCode r code-with-copy"><code className="sourceCode r"><span id="cb5-1"><a aria-hidden="true" href="#cb5-1" tabindex="-1"></a>anscombe</span></code></pre></div>
<div className="cell-output cell-output-stdout">
<pre className="demo-highlight sourceCode r rcss"><code className="sourceCode r">   x1 x2 x3 x4    y1   y2    y3    y4
<br/>1  10 10 10  8  8.04 9.14  7.46  6.58
<br/>2   8  8  8  8  6.95 8.14  6.77  5.76
<br/>3  13 13 13  8  7.58 8.74 12.74  7.71
<br/>4   9  9  9  8  8.81 8.77  7.11  8.84
<br/>5  11 11 11  8  8.33 9.26  7.81  8.47
<br/>6  14 14 14  8  9.96 8.10  8.84  7.04
<br/>7   6  6  6  8  7.24 6.13  6.08  5.25
<br/>8   4  4  4 19  4.26 3.10  5.39 12.50
<br/>9  12 12 12  8 10.84 9.13  8.15  5.56
<br/>10  7  7  7  8  4.82 7.26  6.42  7.91
<br/>11  5  5  5  8  5.68 4.74  5.73  6.89</code></pre>
</div>
</div>
<p>Below we’ll produce a dataset with columns set, <code>x</code> and <code>y</code> only. For the <Link to="/R/data-wrangling/regular-expression/0-introduction">regular expression</Link> <code>(.)(.)</code>, each dot is a <Link to="/R/data-wrangling/regular-expression/4-wildcards">wildcard</Link> representing any character, and the entire expression matches any two single consecutive characters, with each character being an individual <Link to="/R/data-wrangling/regular-expression/8-capture-group">capture group</Link>.</p>
<div className="cell" data-layout-align="center">
<div className="sourceCode cell-code" id="cb7"><pre className="sourceCode r code-with-copy"><code className="sourceCode r"><span id="cb7-1"><a aria-hidden="true" href="#cb7-1" tabindex="-1"></a>a <span className="ot">&lt;-</span> anscombe <span className="sc">%&gt;%</span> </span>
<span id="cb7-2"><a aria-hidden="true" href="#cb7-2" tabindex="-1"></a>  <span className="fu">pivot_longer</span>(</span>
<span id="cb7-3"><a aria-hidden="true" href="#cb7-3" tabindex="-1"></a>    <span className="fu">everything</span>(),</span>
<span id="cb7-4"><a aria-hidden="true" href="#cb7-4" tabindex="-1"></a>    <span className="at">names_to =</span> <span className="fu">c</span>(<span className="st">".value"</span>, <span className="st">"set"</span>),</span>
<span id="cb7-5"><a aria-hidden="true" href="#cb7-5" tabindex="-1"></a>    <span className="at">names_pattern =</span> <span className="st">"(.)(.)"</span></span>
<span id="cb7-6"><a aria-hidden="true" href="#cb7-6" tabindex="-1"></a>  ) <span className="sc">%&gt;%</span> </span>
<span id="cb7-7"><a aria-hidden="true" href="#cb7-7" tabindex="-1"></a>  <span className="fu">arrange</span>(set)</span>
<span id="cb7-8"><a aria-hidden="true" href="#cb7-8" tabindex="-1"></a>a</span></code></pre></div>
<div className="cell-output cell-output-stdout">
<pre className="demo-highlight sourceCode r rcss"><code className="sourceCode r"># A tibble: 44 × 3
<br/>   set       x     y
<br/>   &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt;
<br/> 1 1        10  8.04
<br/> 2 1         8  6.95
<br/> 3 1        13  7.58
<br/> 4 1         9  8.81
<br/> 5 1        11  8.33
<br/> 6 1        14  9.96
<br/> 7 1         6  7.24
<br/> 8 1         4  4.26
<br/> 9 1        12 10.8 
<br/>10 1         7  4.82
<br/># ℹ 34 more rows</code></pre>
</div>
</div>
<p>To demonstrate the continence brought by the tidy structure, the output can be readily streamlined with <Link to="/R/gallery">ggplot2</Link> to visualize all datasets at the same time.</p>
<div className="cell" data-layout-align="center">
<div className="sourceCode cell-code" id="cb9"><pre className="sourceCode r code-with-copy"><code className="sourceCode r"><span id="cb9-1"><a aria-hidden="true" href="#cb9-1" tabindex="-1"></a><span className="fu">library</span>(ggplot2)</span>
<span id="cb9-2"><a aria-hidden="true" href="#cb9-2" tabindex="-1"></a>a <span className="sc">%&gt;%</span> </span>
<span id="cb9-3"><a aria-hidden="true" href="#cb9-3" tabindex="-1"></a>  <span className="fu">ggplot</span>(<span className="fu">aes</span>(<span className="at">x =</span> x, <span className="at">y =</span> y)) <span className="sc">+</span> <span className="fu">geom_point</span>() <span className="sc">+</span></span>
<span id="cb9-4"><a aria-hidden="true" href="#cb9-4" tabindex="-1"></a>  <span className="fu">facet_wrap</span>(<span className="sc">~</span>set, <span className="at">nrow =</span> <span className="dv">1</span>) <span className="sc">+</span></span>
<span id="cb9-5"><a aria-hidden="true" href="#cb9-5" tabindex="-1"></a>  <span className="fu">theme_bw</span>()</span></code></pre></div>
<div className="cell-output-display">
<div className="quarto-figure quarto-figure-center">
<figure className="figure">
<p><img className="cover-img" src={imgPivotLongerPart4} /></p>
</figure>
</div>
</div>
</div>
<p><strong>e.g.3.</strong> The same tidying approach used for <code>anscombe</code> above can be used to tidy up the dataset below.</p>
<div className="cell" data-layout-align="center">
<div className="sourceCode cell-code" id="cb10"><pre className="sourceCode r code-with-copy"><code className="sourceCode r"><span id="cb10-1"><a aria-hidden="true" href="#cb10-1" tabindex="-1"></a>pnl <span className="ot">&lt;-</span> <span className="fu">tibble</span>(</span>
<span id="cb10-2"><a aria-hidden="true" href="#cb10-2" tabindex="-1"></a>  <span className="at">x =</span> <span className="dv">1</span><span className="sc">:</span><span className="dv">4</span>, </span>
<span id="cb10-3"><a aria-hidden="true" href="#cb10-3" tabindex="-1"></a>  <span className="at">a =</span> <span className="fu">c</span>(<span className="dv">1</span>, <span className="dv">1</span>,<span className="dv">0</span>, <span className="dv">0</span>),</span>
<span id="cb10-4"><a aria-hidden="true" href="#cb10-4" tabindex="-1"></a>  <span className="at">b =</span> <span className="fu">c</span>(<span className="dv">0</span>, <span className="dv">1</span>, <span className="dv">1</span>, <span className="dv">1</span>),</span>
<span id="cb10-5"><a aria-hidden="true" href="#cb10-5" tabindex="-1"></a>  <span className="at">y1 =</span> <span className="fu">rnorm</span>(<span className="dv">4</span>),</span>
<span id="cb10-6"><a aria-hidden="true" href="#cb10-6" tabindex="-1"></a>  <span className="at">y2 =</span> <span className="fu">rnorm</span>(<span className="dv">4</span>),</span>
<span id="cb10-7"><a aria-hidden="true" href="#cb10-7" tabindex="-1"></a>  <span className="at">z1 =</span> <span className="fu">rep</span>(<span className="dv">3</span>, <span className="dv">4</span>),</span>
<span id="cb10-8"><a aria-hidden="true" href="#cb10-8" tabindex="-1"></a>  <span className="at">z2 =</span> <span className="fu">rep</span>(<span className="sc">-</span><span className="dv">2</span>, <span className="dv">4</span>))</span>
<span id="cb10-9"><a aria-hidden="true" href="#cb10-9" tabindex="-1"></a></span><br/>
<span id="cb10-10"><a aria-hidden="true" href="#cb10-10" tabindex="-1"></a>pnl</span></code></pre></div>
<div className="cell-output cell-output-stdout">
<pre className="demo-highlight sourceCode r rcss"><code className="sourceCode r"># A tibble: 4 × 7
<br/>      x     a     b    y1     y2    z1    z2
<br/>  &lt;int&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;  &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;
<br/>1     1     1     0 0.619  0.485     3    -2
<br/>2     2     1     1 0.855 -0.886     3    -2
<br/>3     3     0     1 2.32  -0.808     3    -2
<br/>4     4     0     1 1.64   1.36      3    -2</code></pre>
</div>
<div className="sourceCode cell-code" id="cb12"><pre className="sourceCode r code-with-copy"><code className="sourceCode r"><span id="cb12-1"><a aria-hidden="true" href="#cb12-1" tabindex="-1"></a>pnl <span className="sc">%&gt;%</span> </span>
<span id="cb12-2"><a aria-hidden="true" href="#cb12-2" tabindex="-1"></a>  <span className="fu">pivot_longer</span>(</span>
<span id="cb12-3"><a aria-hidden="true" href="#cb12-3" tabindex="-1"></a>    <span className="sc">!</span><span className="fu">c</span>(x, a, b), </span>
<span id="cb12-4"><a aria-hidden="true" href="#cb12-4" tabindex="-1"></a>    <span className="at">names_to =</span> <span className="fu">c</span>(<span className="st">".value"</span>, <span className="st">"time"</span>), </span>
<span id="cb12-5"><a aria-hidden="true" href="#cb12-5" tabindex="-1"></a>    <span className="at">names_pattern =</span> <span className="st">"(.)(.)"</span></span>
<span id="cb12-6"><a aria-hidden="true" href="#cb12-6" tabindex="-1"></a>  )</span></code></pre></div>
<div className="cell-output cell-output-stdout">
<pre className="demo-highlight sourceCode r rcss"><code className="sourceCode r"># A tibble: 8 × 6
<br/>      x     a     b time       y     z
<br/>  &lt;int&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;chr&gt;  &lt;dbl&gt; &lt;dbl&gt;
<br/>1     1     1     0 1      0.619     3
<br/>2     1     1     0 2      0.485    -2
<br/>3     2     1     1 1      0.855     3
<br/>4     2     1     1 2     -0.886    -2
<br/>5     3     0     1 1      2.32      3
<br/>6     3     0     1 2     -0.808    -2
<br/>7     4     0     1 1      1.64      3
<br/>8     4     0     1 2      1.36     -2</code></pre>
</div>
</div>
</main>
</div>
</div>
)}