Data sources and processing procedures

Introduction of data sources and availability

The data used in this package were original compiled and processed by United States Geographic Services (USGS). The fertilizer data include the application in both farms and non-farms for 1945 through 2012. The folks in USGS utilized the sales data of commercial fertilizer each state or county from the Association of American Plant Food Control Officials (AAPFCO) commercial fertilizer sales data. State estimates were then allocated to the county-level using fertilizer expenditure from the Census of Agriculture as county weights for farm fertilizer, and effective population density as county weights for nonfarm fertilizer. The data sources and other further information are availalbe in Table 1.

Dataset name	Temporal coverage	Source	Website	Comments
Fertilizer data before 1985	1945 - 1985	USGS	Link	Only has farm data.
Fertilizer data after 1986	1986 - 2012	USGS	Link	Published in 2017.
County background data	2010	US Census	Link	Assume descriptors of counties do not change.
Manure data before 1997	1982 - 1997	USGS	link	Manual data into farm every five years
Manure data in 2002	2002	USGS	link	Published in 2013
Manure data in 2007 and 2012	2007 & 2012	USGS	link	Published in 2017

Data cleanning

clean data before 1982.

# clean nitroge and phosphorus data.
nitrogen_1985 = n45_85 %>% 
  select(-Rowid_) %>%  # remove irrelavent info.
  # add leading zeros for FIPS to make it 5 digits.
  mutate(FIPS = str_pad(FIPS, 5, pad = "0")) %>% 
  gather(Year_temp, Quantity, Y45:Y85) %>% 
  mutate(Fertilizer = rep("N", length(.$Quantity)),
         Farm.Type = rep("farm", length(.$Quantity)),
         Year = paste("19",str_sub(Year_temp, start = 2),sep = "")
         ) %>% 
  select(-Year_temp)

phosphorus_1985 = p45_85 %>% 
  select(-Rowid_) %>%  # remove irrelavent info.
  mutate(FIPS = str_pad(FIPS, 5, pad = "0")) %>% 
  gather(Year_temp, Quantity, Y45:Y85) %>% 
  mutate(Fertilizer = rep("P", length(.$Quantity)),
         Farm.Type = rep("farm", length(.$Quantity)),
         Year = paste("19",str_sub(Year_temp, start = 2),sep = "")
         ) %>% 
  select(-Year_temp)
# clean dataset for data before 1985
clean_data_1985 = rbind(phosphorus_1985, nitrogen_1985)

clean data after 1987

# remove duplicates in county data.
county_data = county_raw %>%
  distinct(GEOID, .keep_all = TRUE) %>%
  # select certin columns.
  select(GEOID, ALAND, AWATER,INTPTLAT, INTPTLONG) %>%
  mutate(FIPSno = GEOID) %>%
  select(-GEOID)

# combine county data with county level fertilizer data.
county_summary = left_join(raw_data,county_data, by = "FIPSno")

clean_data = county_summary %>%
  # remove some columns with FIPS numbers.
  select(-c(FIPS_st, FIPS_co,FIPSno)) %>%
  # wide to long dataset.
  gather(Fert.Type, Quantity, farmN1987:nonfP2012) %>%
  # separate the fert.type into three columns: farm type, fertilizer, year.
  mutate(Year = str_sub(Fert.Type, start = -4), 
         Fertilizer = str_sub(Fert.Type, start = -5, end = -5),
         Farm.Type = str_sub(Fert.Type, start = 1, end = 4)
         ) %>%
  # repalce nonf into nonfarm
  mutate(Farm.Type = ifelse(Farm.Type == "nonf", "nonfarm", "farm")) %>%
  # remove Fert.Type
  select(-Fert.Type)

# extract county summaries info from clean data.
cnty_summary_1985 = county_summary %>% 
  select(FIPS,State, County, ALAND, AWATER, INTPTLAT, INTPTLONG) %>% 
  right_join(clean_data_1985, by = "FIPS") 

# add data from 1945. 
clean_data = rbind(clean_data, cnty_summary_1985) %>% 
  rename(Nutrient = Fertilizer) %>% # renam Fertilizer to nutrient.
  mutate(Input.Type = rep("Fertilizer")) # add a colume as fertilizer, compared with Manure.

Clean manure data before 1997

# read in manure data from 1982 to 1997.
cnty_manure_97 = read_csv("../data-raw/cnty_manure_82-97.csv")
cnty_manure_summary = cnty_manure_97 %>% 
  select(-c(State, County)) %>% 
  gather(dummy, Quantity, N_1982:P_1997) %>%   # dummy is a temporay column.
  mutate(Farm.Type = rep("farm", length(.$FIPS)),
         Input.Type = rep("Manure", length(.$FIPS))) %>% 
  separate(dummy, c("Nutrient", "Year"), sep = "_")

Clean manure data after 1997

# read in manure data.
cnty_manure_02 = read_csv("../data-raw/cnty_manure_2002.csv")
cnty_manure_07 = read_csv("../data-raw/cnty_manure_2007.csv")
cnty_manure_12 = read_csv("../data-raw/cnty_manure_2012.csv")

cnty_manure_02_12 = rbind(cnty_manure_02, cnty_manure_07, cnty_manure_12) %>% 
  select(-c(State, County)) %>% 
  gather(Nutrient, Quantity, N:P) %>% 
  mutate(Farm.Type = rep("farm", length(.$FIPS)),
         Input.Type = rep("Manure", length(.$FIPS)))

Save data as rdata with compaction

# connect manure data.
cnty_manure_summary = rbind(cnty_manure_summary,cnty_manure_02_12)

cnty_manure_all = county_summary %>% 
  select(FIPS,State, County, ALAND, AWATER, INTPTLAT, INTPTLONG) %>% 
  right_join(cnty_manure_summary, by = "FIPS") 

clean_data = rbind(clean_data, cnty_manure_all)

# NOT RUN
# save cleaned data into .rda format.
save(clean_data, file = "../data/usfertilizer_county.rda")

Data sources and processing procedures

Wenlong

2018-04-02