A simple look into tidygapminder

This package aims to make really easy to tidy data retrieved from Gapminder. A the beginning is:

library(tidygapminder)

When you have loaded the package you are now in possession of two super powers (functions): tidy_indice and tidy_bunch.

tidy_indice

tidy_indice function tidy as explain above tidy a data sheet downloaded on Gapminder. This data sheet can be either in csv or xlsx as indicated on the gapminder site.

tidy_indice take as argument the path to the file and return the data as a tidy data frame.

filepath <- system.file("extdata", "life_expectancy_years.csv", package = "tidygapminder")

# From .............................
df <- data.table::fread(filepath)

head(df)
#>             V1     V2     V3     V4     V5     V6     V7     V8     V9    V10
#> 1:     country 1800.0 1801.0 1802.0 1803.0 1804.0 1805.0 1806.0 1807.0 1808.0
#> 2: Afghanistan   28.2   28.2   28.2   28.2   28.2   28.2   28.1   28.1   28.1
#> 3:     Albania   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4
#> 4:     Algeria   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8
#> 5:     Andorra     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:      Angola   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0
#>       V11    V12    V13    V14    V15    V16    V17    V18    V19    V20    V21
#> 1: 1809.0 1810.0 1811.0 1812.0 1813.0 1814.0 1815.0 1816.0 1817.0 1818.0 1819.0
#> 2:   28.1   28.1   28.1   28.1   28.1   28.1   28.1   28.1   28.0   28.0   28.0
#> 3:   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4
#> 4:   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0
#>       V22    V23    V24    V25    V26    V27    V28    V29    V30    V31    V32
#> 1: 1820.0 1821.0 1822.0 1823.0 1824.0 1825.0 1826.0 1827.0 1828.0 1829.0 1830.0
#> 2:   28.0   28.0   28.0   28.0   28.0   27.9   27.9   27.9   27.9   27.9   27.9
#> 3:   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4
#> 4:   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0
#>       V33    V34    V35    V36    V37    V38    V39    V40    V41    V42    V43
#> 1: 1831.0 1832.0 1833.0 1834.0 1835.0 1836.0 1837.0 1838.0 1839.0 1840.0 1841.0
#> 2:   27.9   27.9   27.9   27.9   27.9   27.8   27.8   27.8   27.8   27.8   27.8
#> 3:   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4
#> 4:   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0
#>       V44    V45    V46    V47    V48    V49    V50    V51    V52    V53    V54
#> 1: 1842.0 1843.0 1844.0 1845.0 1846.0 1847.0 1848.0 1849.0 1850.0 1851.0 1852.0
#> 2:   27.8   27.8   27.8   27.8   27.7   27.7   27.7   27.7   27.7   27.7   27.7
#> 3:   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4
#> 4:   28.8   28.8   28.8   28.8   28.8   28.8   28.8   20.0   15.0   22.0   28.8
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0
#>       V55    V56    V57    V58    V59    V60    V61    V62    V63    V64    V65
#> 1: 1853.0 1854.0 1855.0 1856.0 1857.0 1858.0 1859.0 1860.0 1861.0 1862.0 1863.0
#> 2:   27.7   27.7   27.6   27.6   27.6   27.6   27.6   27.6   27.6   27.6   27.6
#> 3:   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4
#> 4:   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8   28.8
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.0
#>       V66    V67    V68    V69    V70    V71    V72    V73    V74    V75    V76
#> 1: 1864.0 1865.0 1866.0 1867.0 1868.0 1869.0 1870.0 1871.0 1872.0 1873.0 1874.0
#> 2:   27.6   27.5   27.5   27.5   27.5   27.5   27.5   27.6   27.6   27.7   27.7
#> 3:   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4
#> 4:   28.8   28.8   28.8   21.0   11.0   15.0   22.0   28.9   28.9   28.9   29.0
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   27.0   27.0   27.0   27.0   27.0   27.0   27.0   27.1   27.1   27.2   27.3
#>       V77    V78    V79    V80    V81    V82    V83    V84    V85    V86    V87
#> 1: 1875.0 1876.0 1877.0 1878.0 1879.0 1880.0 1881.0 1882.0 1883.0 1884.0 1885.0
#> 2:   27.8   27.8   27.9   28.0   28.0   28.1   28.1   28.2   28.2   28.3   28.4
#> 3:   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4   35.4
#> 4:   29.0   29.1   29.1   29.1   29.2   29.2   29.3   29.3   29.4   29.4   29.4
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   27.4   27.5   27.6   27.7   27.8   27.8   27.9   28.0   28.1   28.2   28.3
#>       V88    V89    V90    V91    V92    V93    V94    V95    V96    V97    V98
#> 1: 1886.0 1887.0 1888.0 1889.0 1890.0 1891.0 1892.0 1893.0 1894.0 1895.0 1896.0
#> 2:   28.4   28.5   28.5   28.6   28.6   28.7   28.8   28.8   28.9   28.9   29.0
#> 3:   35.4   35.4   35.4   35.4   35.5   35.5   35.5   35.5   35.5   35.5   35.5
#> 4:   29.5   29.5   29.6   29.6   29.6   29.7   29.7   29.8   29.8   29.8   29.9
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   28.3   28.4   28.5   28.6   28.7   28.8   28.9   28.9   29.0   29.1   29.2
#>       V99   V100   V101   V102   V103   V104   V105   V106   V107   V108   V109
#> 1: 1897.0 1898.0 1899.0 1900.0 1901.0 1902.0 1903.0 1904.0 1905.0 1906.0 1907.0
#> 2:   29.1   29.1   29.2   29.2   29.3   29.3   29.4   29.4   29.5   29.6   29.6
#> 3:   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5
#> 4:   29.9   30.0   30.0   30.1   30.2   30.3   31.3   25.3   28.0   29.5   29.4
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   29.3   29.4   29.4   29.5   29.6   29.7   29.8   29.9   30.0   30.1   30.1
#>      V110   V111   V112   V113   V114   V115   V116   V117   V118   V119
#> 1: 1908.0 1909.0 1910.0 1911.0 1912.0 1913.0 1914.0 1915.0 1916.0 1917.0
#> 2:   29.7   29.7   29.8   29.8   29.9   29.9   30.0   30.1   30.1   30.2
#> 3:   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5
#> 4:   29.3   30.9   32.5   32.3   33.7   31.5   31.0   30.5   30.1   30.2
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   30.2   30.3   30.4   30.5   30.6   30.6   30.7   30.8   30.9   31.0
#>       V120   V121   V122   V123   V124   V125   V126   V127   V128   V129
#> 1: 1918.00 1919.0 1920.0 1921.0 1922.0 1923.0 1924.0 1925.0 1926.0 1927.0
#> 2:    7.89   30.3   30.3   30.4   30.4   30.5   30.6   30.6   30.7   30.7
#> 3:   19.50   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5   35.5
#> 4:   23.60   30.3   29.4   29.5   29.2   31.8   33.3   34.1   33.4   28.6
#> 5:      NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   12.00   31.2   31.2   31.3   31.4   31.5   31.6   31.7   31.8   31.8
#>      V130   V131   V132   V133   V134   V135   V136   V137   V138   V139   V140
#> 1: 1928.0 1929.0 1930.0 1931.0 1932.0 1933.0 1934.0 1935.0 1936.0 1937.0 1938.0
#> 2:   30.8   30.8   30.9   30.9   31.0   31.1   31.1   31.2   31.2   31.3   31.3
#> 3:   35.5   35.5   36.4   37.3   38.2   39.1   40.0   40.9   41.8   42.8   43.6
#> 4:   32.2   32.5   33.8   31.7   33.1   34.3   33.7   35.6   36.8   34.9   34.3
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   31.9   32.0   32.1   32.2   32.3   32.4   32.4   32.5   32.6   32.7   32.8
#>      V141   V142   V143   V144   V145   V146   V147   V148   V149   V150   V151
#> 1: 1939.0 1940.0 1941.0 1942.0 1943.0 1944.0 1945.0 1946.0 1947.0 1948.0 1949.0
#> 2:   31.4   31.4   31.5   31.6   31.6   31.7   31.7   31.8   31.8   31.9   31.9
#> 3:   43.2   42.2   41.7   40.2   37.2   34.2   47.2   50.3   51.8   52.7   53.6
#> 4:   36.6   37.1   35.3   34.7   30.0   35.5   33.2   35.4   38.8   42.0   44.4
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   32.9   33.0   33.3   33.7   34.0   34.4   34.8   35.1   35.5   35.9   36.2
#>      V152   V153   V154   V155   V156   V157   V158   V159   V160   V161   V162
#> 1: 1950.0 1951.0 1952.0 1953.0 1954.0 1955.0 1956.0 1957.0 1958.0 1959.0 1960.0
#> 2:   32.0   32.4   33.0   33.7   34.4   35.1   35.8   36.5   37.2   37.9   38.6
#> 3:   54.5   54.7   55.2   55.8   56.5   57.3   58.3   59.3   60.4   61.6   62.7
#> 4:   46.9   47.1   47.6   48.1   48.6   49.2   49.7   50.3   50.9   51.4   52.0
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 6:   36.6   36.9   37.5   38.1   38.7   39.3   39.9   40.5   41.2   41.8   42.4
#>      V163   V164   V165   V166   V167   V168   V169   V170   V171   V172   V173
#> 1: 1961.0 1962.0 1963.0 1964.0 1965.0 1966.0 1967.0 1968.0 1969.0 1970.0 1971.0
#> 2:   39.4   40.1   40.8   41.5   42.2   42.9   43.7   44.4   45.1   45.8   45.9
#> 3:   63.7   64.6   65.3   65.9   66.3   66.5   66.7   66.9   67.1   67.4   68.0
#> 4:   52.6   53.2   53.8   54.3   54.9   55.4   56.0   56.5   57.0   57.5   57.8
#> 5:     NA     NA     NA     NA     NA     NA     NA     NA     NA   76.0   76.3
#> 6:   43.0   43.6   44.3   44.9   45.5   46.2   46.8   47.4   48.1   48.7   49.0
#>      V174   V175   V176   V177   V178   V179   V180   V181   V182   V183   V184
#> 1: 1972.0 1973.0 1974.0 1975.0 1976.0 1977.0 1978.0 1979.0 1980.0 1981.0 1982.0
#> 2:   45.9   46.0   46.1   46.3   46.5   46.6   45.0   43.6   43.3   44.1   43.8
#> 3:   68.6   69.2   69.8   70.3   70.8   71.3   71.7   72.0   72.3   72.4   72.5
#> 4:   58.2   58.5   59.1   59.5   60.0   60.6   61.2   61.9   62.1   63.4   64.4
#> 5:   76.6   76.9   77.2   77.4   77.7   78.0   78.3   78.6   78.7   78.8   78.8
#> 6:   49.2   49.4   49.6   49.5   49.5   49.6   49.7   49.8   49.9   50.0   50.0
#>      V185   V186   V187   V188   V189   V190   V191   V192   V193   V194   V195
#> 1: 1983.0 1984.0 1985.0 1986.0 1987.0 1988.0 1989.0 1990.0 1991.0 1992.0 1993.0
#> 2:   42.0   39.8   41.6   42.6   44.7   47.0   50.8   51.6   51.3   51.4   51.4
#> 3:   72.6   72.8   73.0   73.2   73.2   73.4   73.7   73.9   73.9   73.9   73.9
#> 4:   65.7   66.9   68.0   68.7   69.4   70.0   70.5   71.0   71.4   71.7   72.0
#> 5:   78.8   79.0   79.1   79.2   79.3   79.3   79.4   79.5   79.5   79.6   79.8
#> 6:   50.1   50.2   50.3   50.2   50.0   49.8   50.2   50.2   50.8   51.0   49.7
#>      V196   V197   V198   V199   V200   V201   V202   V203   V204   V205   V206
#> 1: 1994.0 1995.0 1996.0 1997.0 1998.0 1999.0 2000.0 2001.0 2002.0 2003.0 2004.0
#> 2:   50.7   51.1   51.4   51.1   50.1   51.5   51.6   51.7   52.4   53.0   53.5
#> 3:   74.0   74.1   74.3   72.5   74.3   74.4   74.4   74.5   74.5   74.6   74.7
#> 4:   72.1   72.3   72.8   73.0   73.1   73.5   73.9   74.1   74.4   74.5   75.1
#> 5:   80.0   80.3   80.6   81.0   81.3   81.5   81.8   82.0   82.3   82.4   82.3
#> 6:   51.1   52.0   52.3   52.7   52.8   52.9   53.4   53.6   54.5   55.1   55.7
#>      V207   V208   V209   V210   V211   V212   V213   V214   V215   V216   V217
#> 1: 2005.0 2006.0 2007.0 2008.0 2009.0 2010.0 2011.0 2012.0 2013.0 2014.0 2015.0
#> 2:   53.9   54.1   54.6   55.2   55.7   56.2   56.7   57.2   57.7   57.8   57.9
#> 3:   74.9   75.2   75.4   75.6   75.9   76.3   76.7   77.0   77.2   77.4   77.6
#> 4:   75.4   75.6   75.9   76.1   76.3   76.5   76.7   76.8   77.0   77.1   77.3
#> 5:   82.5   82.5   82.7   82.7   82.7   82.7   82.6   82.6   82.6   82.6   82.5
#> 6:   56.5   57.0   57.8   58.6   59.3   60.1   60.9   61.7   62.5   63.3   64.0
#>      V218   V219   V220
#> 1: 2016.0 2017.0 2018.0
#> 2:   58.0   58.4   58.7
#> 3:   77.7   77.9   78.0
#> 4:   77.4   77.6   77.9
#> 5:   82.5     NA     NA
#> 6:   64.7   64.9   65.2

# To................................

ti_df <- tidy_indice(filepath)

head(ti_df)
#> # A tibble: 6 x 3
#>   country      year life_expectancy_years
#>   <chr>       <dbl>                 <dbl>
#> 1 Afghanistan  1800                  28.2
#> 2 Afghanistan  1801                  28.2
#> 3 Afghanistan  1802                  28.2
#> 4 Afghanistan  1803                  28.2
#> 5 Afghanistan  1804                  28.2
#> 6 Afghanistan  1805                  28.2

tidy_bunch

tidy_bunch makes use of tidy_indice to tidy a whole set of data sheets and have the options to merge all data frames into one big data frame with merge set to TRUE:

dir_path <- system.file("extdata", package = "tidygapminder")

# From ................................
list.files(dir_path)
#> [1] "agriculture_land.xlsx"     "life_expectancy_years.csv"

# To ..................................
td_dp <- tidy_bunch(dir_path, merge = TRUE)

head(td_dp)
#>       country year Agricultural land (% of land area) life_expectancy_years
#> 1 Afghanistan 1800                                 NA                  28.2
#> 2 Afghanistan 1801                                 NA                  28.2
#> 3 Afghanistan 1802                                 NA                  28.2
#> 4 Afghanistan 1803                                 NA                  28.2
#> 5 Afghanistan 1804                                 NA                  28.2
#> 6 Afghanistan 1805                                 NA                  28.2

Enjoy!!!