A01 - Tidy Data#
Revised
21 May 2023
Programming Environment#
R.version.string # R.Version()
.libPaths()
packages <- c(
'tidyverse'
)
# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# Load packages
invisible(lapply(packages, library, character.only = TRUE))
'R version 4.3.0 (2023-04-21)'
'/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library'
── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.3 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
galton <- tibble(
Child =c(13.2, 9.2, 9.0, 9.0,13.5,12.5, 5.5, 5.5,11.0, 8.0,10.5, 8.5, 7.0, 4.5, 3.0,12.0, 9.0, 8.0, 6.5, 2.5, 2.5),
Gender=c( 'M', 'F', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'F', 'M', 'M', 'F', 'F', 'F', 'M', 'M', 'M', 'F', 'F', 'F'),
Father=c(18.5,18.5,18.5,18.5,15.5,15.5,15.5,15.5,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0),
Mother=c( 7.0, 7.0, 7.0, 7.0, 6.5, 6.5, 6.5, 6.5, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5),
)
galton
write_csv(galton, 'tidy_galton.csv')
Child | Gender | Father | Mother |
---|---|---|---|
<dbl> | <chr> | <dbl> | <dbl> |
13.2 | M | 18.5 | 7.0 |
9.2 | F | 18.5 | 7.0 |
9.0 | F | 18.5 | 7.0 |
9.0 | F | 18.5 | 7.0 |
13.5 | M | 15.5 | 6.5 |
12.5 | M | 15.5 | 6.5 |
5.5 | F | 15.5 | 6.5 |
5.5 | F | 15.5 | 6.5 |
11.0 | M | 15.0 | 4.0 |
8.0 | F | 15.0 | 4.0 |
10.5 | M | 15.0 | 4.0 |
8.5 | M | 15.0 | 4.0 |
7.0 | F | 15.0 | 4.0 |
4.5 | F | 15.0 | 4.0 |
3.0 | F | 15.0 | 4.0 |
12.0 | M | 15.0 | -1.5 |
9.0 | M | 15.0 | -1.5 |
8.0 | M | 15.0 | -1.5 |
6.5 | F | 15.0 | -1.5 |
2.5 | F | 15.0 | -1.5 |
2.5 | F | 15.0 | -1.5 |
presidents <- tibble(
Name =c( 'Adams, John', 'Madison, James', 'Madison, James', 'Madison, James', 'Madison, James', 'Van Buren, Martin','Harrison, William Henry','Tyler, John'),
VP =c('Jefferson, Thomas', 'Clinton, George', 'Vacancy 1', 'Gerry, Elbridge', 'Vacancy 2','Johnson, Richard Mentor', 'Tyler, John', 'Vacancy 3'),
Party =c( 'Federalist','Democratic-Republican','Democratic-Republican','Democratic-Republican','Democratic-Republican', 'Democrat', 'Whig', 'Whig'),
Start_Date=c( '03-04-1797', '03-04-1809', '04-21-1812', '03-04-1813', '11-24-1814', '03-04-1837', '03-04-1841', '04-04-1841'),
End_Date =c( '03-04-1801', '04-20-1812', '03-03-1813', '11-23-1814', '03-04-1817', '03-04-1841', '04-04-1841', '03-04-1845'),
)
presidents
write_csv(presidents, 'tidy_presidents.csv')
Name | VP | Party | Start_Date | End_Date |
---|---|---|---|---|
<chr> | <chr> | <chr> | <chr> | <chr> |
Adams, John | Jefferson, Thomas | Federalist | 03-04-1797 | 03-04-1801 |
Madison, James | Clinton, George | Democratic-Republican | 03-04-1809 | 04-20-1812 |
Madison, James | Vacancy 1 | Democratic-Republican | 04-21-1812 | 03-03-1813 |
Madison, James | Gerry, Elbridge | Democratic-Republican | 03-04-1813 | 11-23-1814 |
Madison, James | Vacancy 2 | Democratic-Republican | 11-24-1814 | 03-04-1817 |
Van Buren, Martin | Johnson, Richard Mentor | Democrat | 03-04-1837 | 03-04-1841 |
Harrison, William Henry | Tyler, John | Whig | 03-04-1841 | 04-04-1841 |
Tyler, John | Vacancy 3 | Whig | 04-04-1841 | 03-04-1845 |