This package aims to make really easy to tidy data retrieved from Gapminder. A the beginning is:
When you have loaded the package you are now in possession of two super powers (functions): tidy_indice and tidy_bunch.
tidy_indice
function tidy as explain above tidy a data sheet downloaded on Gapminder. This data sheet can be either in csv or xlsx as indicated on the gapminder site.
tidy_indice
take as argument the path to the file and return the data as a tidy data frame.
filepath <- system.file("extdata", "life_expectancy_years.csv", package = "tidygapminder")
# From .............................
df <- data.table::fread(filepath)
head(df)
#> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
#> 1: country 1800.0 1801.0 1802.0 1803.0 1804.0 1805.0 1806.0 1807.0 1808.0
#> 2: Afghanistan 28.2 28.2 28.2 28.2 28.2 28.2 28.1 28.1 28.1
#> 3: Albania 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4
#> 4: Algeria 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8
#> 5: Andorra NA NA NA NA NA NA NA NA NA
#> 6: Angola 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0
#> V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21
#> 1: 1809.0 1810.0 1811.0 1812.0 1813.0 1814.0 1815.0 1816.0 1817.0 1818.0 1819.0
#> 2: 28.1 28.1 28.1 28.1 28.1 28.1 28.1 28.1 28.0 28.0 28.0
#> 3: 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4
#> 4: 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0
#> V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32
#> 1: 1820.0 1821.0 1822.0 1823.0 1824.0 1825.0 1826.0 1827.0 1828.0 1829.0 1830.0
#> 2: 28.0 28.0 28.0 28.0 28.0 27.9 27.9 27.9 27.9 27.9 27.9
#> 3: 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4
#> 4: 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0
#> V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43
#> 1: 1831.0 1832.0 1833.0 1834.0 1835.0 1836.0 1837.0 1838.0 1839.0 1840.0 1841.0
#> 2: 27.9 27.9 27.9 27.9 27.9 27.8 27.8 27.8 27.8 27.8 27.8
#> 3: 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4
#> 4: 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0
#> V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54
#> 1: 1842.0 1843.0 1844.0 1845.0 1846.0 1847.0 1848.0 1849.0 1850.0 1851.0 1852.0
#> 2: 27.8 27.8 27.8 27.8 27.7 27.7 27.7 27.7 27.7 27.7 27.7
#> 3: 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4
#> 4: 28.8 28.8 28.8 28.8 28.8 28.8 28.8 20.0 15.0 22.0 28.8
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0
#> V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65
#> 1: 1853.0 1854.0 1855.0 1856.0 1857.0 1858.0 1859.0 1860.0 1861.0 1862.0 1863.0
#> 2: 27.7 27.7 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6
#> 3: 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4
#> 4: 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.0
#> V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76
#> 1: 1864.0 1865.0 1866.0 1867.0 1868.0 1869.0 1870.0 1871.0 1872.0 1873.0 1874.0
#> 2: 27.6 27.5 27.5 27.5 27.5 27.5 27.5 27.6 27.6 27.7 27.7
#> 3: 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4
#> 4: 28.8 28.8 28.8 21.0 11.0 15.0 22.0 28.9 28.9 28.9 29.0
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 27.0 27.0 27.0 27.0 27.0 27.0 27.0 27.1 27.1 27.2 27.3
#> V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87
#> 1: 1875.0 1876.0 1877.0 1878.0 1879.0 1880.0 1881.0 1882.0 1883.0 1884.0 1885.0
#> 2: 27.8 27.8 27.9 28.0 28.0 28.1 28.1 28.2 28.2 28.3 28.4
#> 3: 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4 35.4
#> 4: 29.0 29.1 29.1 29.1 29.2 29.2 29.3 29.3 29.4 29.4 29.4
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 27.4 27.5 27.6 27.7 27.8 27.8 27.9 28.0 28.1 28.2 28.3
#> V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98
#> 1: 1886.0 1887.0 1888.0 1889.0 1890.0 1891.0 1892.0 1893.0 1894.0 1895.0 1896.0
#> 2: 28.4 28.5 28.5 28.6 28.6 28.7 28.8 28.8 28.9 28.9 29.0
#> 3: 35.4 35.4 35.4 35.4 35.5 35.5 35.5 35.5 35.5 35.5 35.5
#> 4: 29.5 29.5 29.6 29.6 29.6 29.7 29.7 29.8 29.8 29.8 29.9
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 28.3 28.4 28.5 28.6 28.7 28.8 28.9 28.9 29.0 29.1 29.2
#> V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109
#> 1: 1897.0 1898.0 1899.0 1900.0 1901.0 1902.0 1903.0 1904.0 1905.0 1906.0 1907.0
#> 2: 29.1 29.1 29.2 29.2 29.3 29.3 29.4 29.4 29.5 29.6 29.6
#> 3: 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5
#> 4: 29.9 30.0 30.0 30.1 30.2 30.3 31.3 25.3 28.0 29.5 29.4
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 29.3 29.4 29.4 29.5 29.6 29.7 29.8 29.9 30.0 30.1 30.1
#> V110 V111 V112 V113 V114 V115 V116 V117 V118 V119
#> 1: 1908.0 1909.0 1910.0 1911.0 1912.0 1913.0 1914.0 1915.0 1916.0 1917.0
#> 2: 29.7 29.7 29.8 29.8 29.9 29.9 30.0 30.1 30.1 30.2
#> 3: 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5
#> 4: 29.3 30.9 32.5 32.3 33.7 31.5 31.0 30.5 30.1 30.2
#> 5: NA NA NA NA NA NA NA NA NA NA
#> 6: 30.2 30.3 30.4 30.5 30.6 30.6 30.7 30.8 30.9 31.0
#> V120 V121 V122 V123 V124 V125 V126 V127 V128 V129
#> 1: 1918.00 1919.0 1920.0 1921.0 1922.0 1923.0 1924.0 1925.0 1926.0 1927.0
#> 2: 7.89 30.3 30.3 30.4 30.4 30.5 30.6 30.6 30.7 30.7
#> 3: 19.50 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5 35.5
#> 4: 23.60 30.3 29.4 29.5 29.2 31.8 33.3 34.1 33.4 28.6
#> 5: NA NA NA NA NA NA NA NA NA NA
#> 6: 12.00 31.2 31.2 31.3 31.4 31.5 31.6 31.7 31.8 31.8
#> V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140
#> 1: 1928.0 1929.0 1930.0 1931.0 1932.0 1933.0 1934.0 1935.0 1936.0 1937.0 1938.0
#> 2: 30.8 30.8 30.9 30.9 31.0 31.1 31.1 31.2 31.2 31.3 31.3
#> 3: 35.5 35.5 36.4 37.3 38.2 39.1 40.0 40.9 41.8 42.8 43.6
#> 4: 32.2 32.5 33.8 31.7 33.1 34.3 33.7 35.6 36.8 34.9 34.3
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 31.9 32.0 32.1 32.2 32.3 32.4 32.4 32.5 32.6 32.7 32.8
#> V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151
#> 1: 1939.0 1940.0 1941.0 1942.0 1943.0 1944.0 1945.0 1946.0 1947.0 1948.0 1949.0
#> 2: 31.4 31.4 31.5 31.6 31.6 31.7 31.7 31.8 31.8 31.9 31.9
#> 3: 43.2 42.2 41.7 40.2 37.2 34.2 47.2 50.3 51.8 52.7 53.6
#> 4: 36.6 37.1 35.3 34.7 30.0 35.5 33.2 35.4 38.8 42.0 44.4
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 32.9 33.0 33.3 33.7 34.0 34.4 34.8 35.1 35.5 35.9 36.2
#> V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162
#> 1: 1950.0 1951.0 1952.0 1953.0 1954.0 1955.0 1956.0 1957.0 1958.0 1959.0 1960.0
#> 2: 32.0 32.4 33.0 33.7 34.4 35.1 35.8 36.5 37.2 37.9 38.6
#> 3: 54.5 54.7 55.2 55.8 56.5 57.3 58.3 59.3 60.4 61.6 62.7
#> 4: 46.9 47.1 47.6 48.1 48.6 49.2 49.7 50.3 50.9 51.4 52.0
#> 5: NA NA NA NA NA NA NA NA NA NA NA
#> 6: 36.6 36.9 37.5 38.1 38.7 39.3 39.9 40.5 41.2 41.8 42.4
#> V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173
#> 1: 1961.0 1962.0 1963.0 1964.0 1965.0 1966.0 1967.0 1968.0 1969.0 1970.0 1971.0
#> 2: 39.4 40.1 40.8 41.5 42.2 42.9 43.7 44.4 45.1 45.8 45.9
#> 3: 63.7 64.6 65.3 65.9 66.3 66.5 66.7 66.9 67.1 67.4 68.0
#> 4: 52.6 53.2 53.8 54.3 54.9 55.4 56.0 56.5 57.0 57.5 57.8
#> 5: NA NA NA NA NA NA NA NA NA 76.0 76.3
#> 6: 43.0 43.6 44.3 44.9 45.5 46.2 46.8 47.4 48.1 48.7 49.0
#> V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184
#> 1: 1972.0 1973.0 1974.0 1975.0 1976.0 1977.0 1978.0 1979.0 1980.0 1981.0 1982.0
#> 2: 45.9 46.0 46.1 46.3 46.5 46.6 45.0 43.6 43.3 44.1 43.8
#> 3: 68.6 69.2 69.8 70.3 70.8 71.3 71.7 72.0 72.3 72.4 72.5
#> 4: 58.2 58.5 59.1 59.5 60.0 60.6 61.2 61.9 62.1 63.4 64.4
#> 5: 76.6 76.9 77.2 77.4 77.7 78.0 78.3 78.6 78.7 78.8 78.8
#> 6: 49.2 49.4 49.6 49.5 49.5 49.6 49.7 49.8 49.9 50.0 50.0
#> V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195
#> 1: 1983.0 1984.0 1985.0 1986.0 1987.0 1988.0 1989.0 1990.0 1991.0 1992.0 1993.0
#> 2: 42.0 39.8 41.6 42.6 44.7 47.0 50.8 51.6 51.3 51.4 51.4
#> 3: 72.6 72.8 73.0 73.2 73.2 73.4 73.7 73.9 73.9 73.9 73.9
#> 4: 65.7 66.9 68.0 68.7 69.4 70.0 70.5 71.0 71.4 71.7 72.0
#> 5: 78.8 79.0 79.1 79.2 79.3 79.3 79.4 79.5 79.5 79.6 79.8
#> 6: 50.1 50.2 50.3 50.2 50.0 49.8 50.2 50.2 50.8 51.0 49.7
#> V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206
#> 1: 1994.0 1995.0 1996.0 1997.0 1998.0 1999.0 2000.0 2001.0 2002.0 2003.0 2004.0
#> 2: 50.7 51.1 51.4 51.1 50.1 51.5 51.6 51.7 52.4 53.0 53.5
#> 3: 74.0 74.1 74.3 72.5 74.3 74.4 74.4 74.5 74.5 74.6 74.7
#> 4: 72.1 72.3 72.8 73.0 73.1 73.5 73.9 74.1 74.4 74.5 75.1
#> 5: 80.0 80.3 80.6 81.0 81.3 81.5 81.8 82.0 82.3 82.4 82.3
#> 6: 51.1 52.0 52.3 52.7 52.8 52.9 53.4 53.6 54.5 55.1 55.7
#> V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217
#> 1: 2005.0 2006.0 2007.0 2008.0 2009.0 2010.0 2011.0 2012.0 2013.0 2014.0 2015.0
#> 2: 53.9 54.1 54.6 55.2 55.7 56.2 56.7 57.2 57.7 57.8 57.9
#> 3: 74.9 75.2 75.4 75.6 75.9 76.3 76.7 77.0 77.2 77.4 77.6
#> 4: 75.4 75.6 75.9 76.1 76.3 76.5 76.7 76.8 77.0 77.1 77.3
#> 5: 82.5 82.5 82.7 82.7 82.7 82.7 82.6 82.6 82.6 82.6 82.5
#> 6: 56.5 57.0 57.8 58.6 59.3 60.1 60.9 61.7 62.5 63.3 64.0
#> V218 V219 V220
#> 1: 2016.0 2017.0 2018.0
#> 2: 58.0 58.4 58.7
#> 3: 77.7 77.9 78.0
#> 4: 77.4 77.6 77.9
#> 5: 82.5 NA NA
#> 6: 64.7 64.9 65.2
# To................................
ti_df <- tidy_indice(filepath)
head(ti_df)
#> # A tibble: 6 x 3
#> country year life_expectancy_years
#> <chr> <dbl> <dbl>
#> 1 Afghanistan 1800 28.2
#> 2 Afghanistan 1801 28.2
#> 3 Afghanistan 1802 28.2
#> 4 Afghanistan 1803 28.2
#> 5 Afghanistan 1804 28.2
#> 6 Afghanistan 1805 28.2
tidy_bunch
makes use of tidy_indice
to tidy a whole set of data sheets and have the options to merge all data frames into one big data frame with merge
set to TRUE
:
dir_path <- system.file("extdata", package = "tidygapminder")
# From ................................
list.files(dir_path)
#> [1] "agriculture_land.xlsx" "life_expectancy_years.csv"
# To ..................................
td_dp <- tidy_bunch(dir_path, merge = TRUE)
head(td_dp)
#> country year Agricultural land (% of land area) life_expectancy_years
#> 1 Afghanistan 1800 NA 28.2
#> 2 Afghanistan 1801 NA 28.2
#> 3 Afghanistan 1802 NA 28.2
#> 4 Afghanistan 1803 NA 28.2
#> 5 Afghanistan 1804 NA 28.2
#> 6 Afghanistan 1805 NA 28.2
Enjoy!!!