|
- library(dplyr)
- library(purrr)
- library(tidyr)
- library(stringr)
- library(readr)
- library(forcats)
- library(gganimate)
-
- DATA_DIR <- "data-raw"
-
- fs::dir_create(here::here(DATA_DIR))
-
- DOWNLOAD_DATA <- if (!exists(DOWNLOAD_DATA)) TRUE else DOWNLOAD_DATA
-
- download_file <- function(filename, url, basedir) {
- filename <- gsub(" ", "_", filename)
- filename <- gsub("/", "-", filename)
- fs::dir_create(basedir)
- download.file(url, destfile = fs::path(basedir, filename))
- }
-
- if (DOWNLOAD_DATA) {
- ## ----download-popest-tables-1900-1980, eval=FALSE------------------------
- x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/")
-
- xx <-
- data_frame(
- filename = rvest::html_nodes(x, "a") %>% rvest::html_text(),
- url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href")
- ) %>%
- filter(str_detect(url, "\\.csv")) %>%
- mutate(url = str_c(
- "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/",
- url
- ))
-
- pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980"))
-
- ## ----download-popest-tables-1980-1990, eval=FALSE------------------------
- xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>%
- rvest::html_nodes(".list.section .uscb-text-link") %>%
- rvest::html_attrs() %>%
- purrr::transpose() %>%
- as_tibble() %>%
- unnest() %>%
- filter(str_detect(href, "rqi\\.zip")) %>%
- select(filename = name, url = href) %>%
- mutate(
- url = paste0("https:", url),
- filename = basename(url)
- ) %>%
- pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990"))
-
- withr::with_dir(here::here(DATA_DIR, "1980-1990"), {
- fs::dir_ls(regexp = "zip") %>%
- purrr::walk(unzip)
- })
-
- ## ----download-popest-tables-1990-2000, eval=FALSE------------------------
- xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>%
- rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>%
- rvest::html_attrs() %>%
- purrr::transpose() %>%
- as_tibble() %>%
- unnest() %>%
- select(filename = name, url = href) %>%
- mutate(url = paste0("https:", url),
- filename = paste0(filename, ".csv")) %>%
- pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000"))
-
- ## ----download-popest-tables-2000-2010----
- ## "2000-2010/us-est00int-01.xls"
- download_file(
- "us-est00int-alldata-5yr.csv",
- "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv",
- here::here(DATA_DIR, "2000-2010")
- )
-
-
- ## ----download-popest-tables-2010----
- ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"
- ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US
- fs::dir_create(here::here(DATA_DIR, "2010-2017"))
- if (!fs::file_exists(here::here(DATA_DIR, "2010-2017", "PEP_2017_PEPSYASEXN_with_ann.csv"))) {
- rlang::abort(paste(
- "Download the Annual Estimates of Resident Population by Single Year of Age from:\n",
- "https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US\n",
- "and extract `PEP_2017_PEPSYASEXN_with_ann.csv` into:", here::here(DATA_DIR, "2010-2017")
- ))
- }
-
- ## ----download-popest-projections
- ## "np2017_d1.csv"
- download_file(
- "np2017_d1.csv",
- "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv",
- here::here(DATA_DIR)
- )
- }
|