‘JMA’ web page consists of some layers. You can use different function for download each component.
I recommend to use existing data, which are already downloaded and cleaned up.
When you want to download links for climate data, use download_area_links() and download_links(). download_area_links() returns links for 6 areas. download_links() returns links for countries and stations.
For polite scraping, 5 sec interval is set in download_links(), it takes about 15 minutes to get all station links. Please use existing links by “data(station_links)”, if you do not need to renew links.
library(clidatajp)
library(magrittr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tibble)
library(ggplot2)
library(stringi)
# existing data
data(station_links)
%>%
station_links ::mutate("station" := stringi::stri_unescape_unicode(station)) %>%
dplyrprint() %>%
`$`("station") %>%
clean_station() %>%
::bind_cols(station_links["url"])
dplyr
# Download new data
# If you want links for all countries and all sations, remove head().
<- "https://www.data.jma.go.jp/gmd/cpd/monitor/nrmlist/"
url <- gracefully_fail(url)
res if(!is.null(res)){
<- download_area_links()
area_links <- NULL
station_links <- head(area_links) # for test
area_links for(i in seq_along(area_links)){
print(stringr::str_c("area: ", i, " / ", length(area_links)))
<- download_links(area_links[i])
country_links <- head(country_links) # for test
country_links for(j in seq_along(country_links)){
print(stringr::str_c(" country: ", j, " / ", length(country_links)))
<- c(station_links, download_links(country_links[j]))
station_links
}
}<- tibble::tibble(url = station_links)
station_links
station_links }
I recommend to use existing data, which are already downloaded and cleaned up.
When you want to know how to prepare “data(climate_jp)”, please check url shown below.
https://github.com/matutosi/clidatajp/blob/main/data-raw/climate_jp.R
For polite scraping, 5 sec interval is set in download_climate(), it takes over 5 hours to get world climate data of all stations because of huge amount of data (3444 stations). Please use existing data by “data(climate_world)”, if you do not need to renew climate data.
# existing data
data(climate_jp)
%>%
climate_jp ::mutate_if(is.character, stringi::stri_unescape_unicode)
dplyr
data(climate_world)
%>%
climate_world ::mutate_if(is.character, stringi::stri_unescape_unicode)
dplyr
# Download new data
# If you want links for all countries and all sations, remove head().
<- "https://www.data.jma.go.jp/gmd/cpd/monitor/nrmlist/"
url <- gracefully_fail(url)
res if(!is.null(res)){
<-
station_links %>%
station_links head() %>%
`$`("url")
<- list()
climate for(i in seq_along(station_links)){
print(stringr::str_c(i, " / ", length(station_links)))
<- download_climate(station_links[i])
climate[[i]]
}<- dplyr::bind_rows(climate)
world_climate
world_climate }
Clean up data before drawing plot.
data(climate_world)
data(climate_jp)
<-
climate ::bind_rows(climate_world, climate_jp) %>%
dplyr::mutate_if(is.character, stringi::stri_unescape_unicode) %>%
dplyr::group_by(country, station) %>%
dplyr::filter(sum(is.na(temperature), is.na(precipitation)) == 0) %>%
dplyr::filter(period != "1991-2020" | is.na(period))
dplyr
<-
climate %>%
climate ::summarise(temp = mean(as.numeric(temperature)), prec = sum(as.numeric(precipitation))) %>%
dplyr::left_join(dplyr::distinct(dplyr::select(climate, station:altitude))) %>%
dplyr::left_join(tibble::tibble(NS = c("S", "N"), ns = c(-1, 1))) %>%
dplyr::left_join(tibble::tibble(WE = c("W", "E"), we = c(-1, 1))) %>%
dplyr::group_by(station) %>%
dplyr::mutate(lat = latitude * ns, lon = longitude * we)
dplyr#> `summarise()` has grouped output by 'country'. You can override using the
#> `.groups` argument.
#> Adding missing grouping variables: `country`
#> Joining with `by = join_by(country, station)`
#> Warning in dplyr::left_join(., dplyr::distinct(dplyr::select(climate, station:altitude))): Each row in `x` is expected to match at most 1 row in `y`.
#> ℹ Row 1 of `x` matches multiple rows.
#> ℹ If multiple matches are expected, set `multiple = "all"` to silence this
#> warning.
#> Joining with `by = join_by(NS)`
#> Joining with `by = join_by(WE)`
Draw a world map with temperature.
%>%
climate ::ggplot(aes(lon, lat, colour = temp)) +
ggplot2scale_colour_gradient2(low = "blue", mid = "gray", high = "red", midpoint = 15) +
geom_point() +
coord_fixed() +
theme_bw() +
theme(legend.key.size = unit(0.3, 'cm'))
# ggsave("temperature.png")
Draw a world map with precipitation except over 5000 mm/yr (to avoid extended legend).
%>%
climate ::filter(prec < 5000) %>%
dplyr::ggplot(aes(lon, lat, colour = prec)) +
ggplot2scale_colour_gradient2(low = "yellow", mid = "gray", high = "blue", midpoint = 1500) +
geom_point() +
coord_fixed() +
theme_bw() +
theme(legend.key.size = unit(0.3, 'cm'))
# ggsave("precipitation.png")
Show relationships between temperature and precipitation except Japan.
<- stringi::stri_unescape_unicode("\\u65e5\\u672c")
japan %>%
climate ::filter(country != japan) %>%
dplyr::ggplot(aes(temp, prec)) +
ggplot2geom_point() +
theme_bw() +
theme(legend.position="none")
# ggsave("climate_nojp.png")
Show relationships between temperature and precipitation including Japan.
%>%
climate ::ggplot(aes(temp, prec)) +
ggplot2geom_point() +
theme_bw()
# ggsave("climate_all.png")
Show relationships between temperature and precipitation. Blue: Japan, red: others.
%>%
climate ::mutate(jp = (country == japan)) %>%
dplyr::ggplot(aes(temp, prec, colour = jp)) +
ggplot2geom_point() +
theme_bw() +
theme(legend.position="none")
# ggsave("climate_compare_jp.png")