A01 - Tidy Data

A01 - Tidy Data#


Revised

21 May 2023


Programming Environment#

R.version.string # R.Version()
.libPaths()

packages <- c(
  'tidyverse'
)

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages])
}

# Load packages
invisible(lapply(packages, library, character.only = TRUE))
'R version 4.3.0 (2023-04-21)'
'/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library'
── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
 dplyr     1.1.2      readr     2.1.4
 forcats   1.0.0      stringr   1.5.0
 ggplot2   3.4.3      tibble    3.2.1
 lubridate 1.9.2      tidyr     1.3.0
 purrr     1.0.2     
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
 dplyr::filter() masks stats::filter()
 dplyr::lag()    masks stats::lag()
 Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

galton <- tibble(
  Child =c(13.2, 9.2, 9.0, 9.0,13.5,12.5, 5.5, 5.5,11.0, 8.0,10.5, 8.5, 7.0, 4.5, 3.0,12.0, 9.0, 8.0, 6.5, 2.5, 2.5),
  Gender=c( 'M', 'F', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'F', 'M', 'M', 'F', 'F', 'F', 'M', 'M', 'M', 'F', 'F', 'F'),
  Father=c(18.5,18.5,18.5,18.5,15.5,15.5,15.5,15.5,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0),
  Mother=c( 7.0, 7.0, 7.0, 7.0, 6.5, 6.5, 6.5, 6.5, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5),
)
galton
write_csv(galton, 'tidy_galton.csv')
A tibble: 21 × 4
ChildGenderFatherMother
<dbl><chr><dbl><dbl>
13.2M18.5 7.0
9.2F18.5 7.0
9.0F18.5 7.0
9.0F18.5 7.0
13.5M15.5 6.5
12.5M15.5 6.5
5.5F15.5 6.5
5.5F15.5 6.5
11.0M15.0 4.0
8.0F15.0 4.0
10.5M15.0 4.0
8.5M15.0 4.0
7.0F15.0 4.0
4.5F15.0 4.0
3.0F15.0 4.0
12.0M15.0-1.5
9.0M15.0-1.5
8.0M15.0-1.5
6.5F15.0-1.5
2.5F15.0-1.5
2.5F15.0-1.5
presidents <- tibble(
  Name      =c(      'Adams, John',       'Madison, James',       'Madison, James',       'Madison, James',       'Madison, James',      'Van Buren, Martin','Harrison, William Henry','Tyler, John'),
  VP        =c('Jefferson, Thomas',      'Clinton, George',             'Vacancy 1',      'Gerry, Elbridge',           'Vacancy 2','Johnson, Richard Mentor',            'Tyler, John',  'Vacancy 3'),
  Party     =c(       'Federalist','Democratic-Republican','Democratic-Republican','Democratic-Republican','Democratic-Republican',               'Democrat',                   'Whig',       'Whig'),
  Start_Date=c(       '03-04-1797',           '03-04-1809',           '04-21-1812',           '03-04-1813',           '11-24-1814',             '03-04-1837',             '03-04-1841', '04-04-1841'),
  End_Date  =c(       '03-04-1801',           '04-20-1812',           '03-03-1813',           '11-23-1814',           '03-04-1817',             '03-04-1841',             '04-04-1841', '03-04-1845'),
)
presidents
write_csv(presidents, 'tidy_presidents.csv')
A tibble: 8 × 5
NameVPPartyStart_DateEnd_Date
<chr><chr><chr><chr><chr>
Adams, John Jefferson, Thomas Federalist 03-04-179703-04-1801
Madison, James Clinton, George Democratic-Republican03-04-180904-20-1812
Madison, James Vacancy 1 Democratic-Republican04-21-181203-03-1813
Madison, James Gerry, Elbridge Democratic-Republican03-04-181311-23-1814
Madison, James Vacancy 2 Democratic-Republican11-24-181403-04-1817
Van Buren, Martin Johnson, Richard MentorDemocrat 03-04-183703-04-1841
Harrison, William HenryTyler, John Whig 03-04-184104-04-1841
Tyler, John Vacancy 3 Whig 04-04-184103-04-1845