You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

100 line
3.5KB

  1. library(dplyr)
  2. library(purrr)
  3. library(tidyr)
  4. library(stringr)
  5. library(readr)
  6. library(forcats)
  7. library(gganimate)
  8. DATA_DIR <- "data-raw"
  9. fs::dir_create(here::here(DATA_DIR))
  10. DOWNLOAD_DATA <- if (!exists(DOWNLOAD_DATA)) TRUE else DOWNLOAD_DATA
  11. download_file <- function(filename, url, basedir) {
  12. filename <- gsub(" ", "_", filename)
  13. filename <- gsub("/", "-", filename)
  14. fs::dir_create(basedir)
  15. download.file(url, destfile = fs::path(basedir, filename))
  16. }
  17. if (DOWNLOAD_DATA) {
  18. ## ----download-popest-tables-1900-1980, eval=FALSE------------------------
  19. x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/")
  20. xx <-
  21. data_frame(
  22. filename = rvest::html_nodes(x, "a") %>% rvest::html_text(),
  23. url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href")
  24. ) %>%
  25. filter(str_detect(url, "\\.csv")) %>%
  26. mutate(url = str_c(
  27. "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/",
  28. url
  29. ))
  30. pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980"))
  31. ## ----download-popest-tables-1980-1990, eval=FALSE------------------------
  32. xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>%
  33. rvest::html_nodes(".list.section .uscb-text-link") %>%
  34. rvest::html_attrs() %>%
  35. purrr::transpose() %>%
  36. as_tibble() %>%
  37. unnest() %>%
  38. filter(str_detect(href, "rqi\\.zip")) %>%
  39. select(filename = name, url = href) %>%
  40. mutate(
  41. url = paste0("https:", url),
  42. filename = basename(url)
  43. ) %>%
  44. pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990"))
  45. withr::with_dir(here::here(DATA_DIR, "1980-1990"), {
  46. fs::dir_ls(regexp = "zip") %>%
  47. purrr::walk(unzip)
  48. })
  49. ## ----download-popest-tables-1990-2000, eval=FALSE------------------------
  50. xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>%
  51. rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>%
  52. rvest::html_attrs() %>%
  53. purrr::transpose() %>%
  54. as_tibble() %>%
  55. unnest() %>%
  56. select(filename = name, url = href) %>%
  57. mutate(url = paste0("https:", url),
  58. filename = paste0(filename, ".csv")) %>%
  59. pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000"))
  60. ## ----download-popest-tables-2000-2010----
  61. ## "2000-2010/us-est00int-01.xls"
  62. download_file(
  63. "us-est00int-alldata-5yr.csv",
  64. "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv",
  65. here::here(DATA_DIR, "2000-2010")
  66. )
  67. ## ----download-popest-tables-2010----
  68. ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"
  69. ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US
  70. fs::dir_create(here::here(DATA_DIR, "2010-2017"))
  71. if (!fs::file_exists(here::here(DATA_DIR, "2010-2017", "PEP_2017_PEPSYASEXN_with_ann.csv"))) {
  72. rlang::abort(paste(
  73. "Download the Annual Estimates of Resident Population by Single Year of Age from:\n",
  74. "https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US\n",
  75. "and extract `PEP_2017_PEPSYASEXN_with_ann.csv` into:", here::here(DATA_DIR, "2010-2017")
  76. ))
  77. }
  78. ## ----download-popest-projections
  79. ## "np2017_d1.csv"
  80. download_file(
  81. "np2017_d1.csv",
  82. "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv",
  83. here::here(DATA_DIR)
  84. )
  85. }