| ^README\.Rmd$ |
| # News | |||||
| ## 2018-10-25 | |||||
| Project Started |
| library(dplyr) | |||||
| library(purrr) | |||||
| library(tidyr) | |||||
| library(stringr) | |||||
| library(readr) | |||||
| library(forcats) | |||||
| library(gganimate) | |||||
| DATA_DIR <- "data-raw" | |||||
| fs::dir_create(here::here(DATA_DIR)) | |||||
| DOWNLOAD_DATA <- if (!exists(DOWNLOAD_DATA)) TRUE else DOWNLOAD_DATA | |||||
| download_file <- function(filename, url, basedir) { | |||||
| filename <- gsub(" ", "_", filename) | |||||
| filename <- gsub("/", "-", filename) | |||||
| fs::dir_create(basedir) | |||||
| download.file(url, destfile = fs::path(basedir, filename)) | |||||
| } | |||||
| if (DOWNLOAD_DATA) { | |||||
| ## ----download-popest-tables-1900-1980, eval=FALSE------------------------ | |||||
| x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/") | |||||
| xx <- | |||||
| data_frame( | |||||
| filename = rvest::html_nodes(x, "a") %>% rvest::html_text(), | |||||
| url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href") | |||||
| ) %>% | |||||
| filter(str_detect(url, "\\.csv")) %>% | |||||
| mutate(url = str_c( | |||||
| "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/", | |||||
| url | |||||
| )) | |||||
| pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980")) | |||||
| ## ----download-popest-tables-1980-1990, eval=FALSE------------------------ | |||||
| xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>% | |||||
| rvest::html_nodes(".list.section .uscb-text-link") %>% | |||||
| rvest::html_attrs() %>% | |||||
| purrr::transpose() %>% | |||||
| as_tibble() %>% | |||||
| unnest() %>% | |||||
| filter(str_detect(href, "rqi\\.zip")) %>% | |||||
| select(filename = name, url = href) %>% | |||||
| mutate( | |||||
| url = paste0("https:", url), | |||||
| filename = basename(url) | |||||
| ) %>% | |||||
| pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990")) | |||||
| withr::with_dir(here::here(DATA_DIR, "1980-1990"), { | |||||
| fs::dir_ls(regexp = "zip") %>% | |||||
| purrr::walk(unzip) | |||||
| }) | |||||
| ## ----download-popest-tables-1990-2000, eval=FALSE------------------------ | |||||
| xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>% | |||||
| rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>% | |||||
| rvest::html_attrs() %>% | |||||
| purrr::transpose() %>% | |||||
| as_tibble() %>% | |||||
| unnest() %>% | |||||
| select(filename = name, url = href) %>% | |||||
| mutate(url = paste0("https:", url), | |||||
| filename = paste0(filename, ".csv")) %>% | |||||
| pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000")) | |||||
| ## ----download-popest-tables-2000-2010---- | |||||
| ## "2000-2010/us-est00int-01.xls" | |||||
| download_file( | |||||
| "us-est00int-alldata-5yr.csv", | |||||
| "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv", | |||||
| here::here(DATA_DIR, "2000-2010") | |||||
| ) | |||||
| ## ----download-popest-tables-2010---- | |||||
| ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv" | |||||
| ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US | |||||
| fs::dir_create(here::here(DATA_DIR, "2010-2017")) | |||||
| if (!fs::file_exists(here::here(DATA_DIR, "2010-2017", "PEP_2017_PEPSYASEXN_with_ann.csv"))) { | |||||
| rlang::abort(paste( | |||||
| "Download the Annual Estimates of Resident Population by Single Year of Age from:\n", | |||||
| "https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US\n", | |||||
| "and extract `PEP_2017_PEPSYASEXN_with_ann.csv` into:", here::here(DATA_DIR, "2010-2017") | |||||
| )) | |||||
| } | |||||
| ## ----download-popest-projections | |||||
| ## "np2017_d1.csv" | |||||
| download_file( | |||||
| "np2017_d1.csv", | |||||
| "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv", | |||||
| here::here(DATA_DIR) | |||||
| ) | |||||
| } |
| library(dplyr) | |||||
| library(purrr) | |||||
| library(tidyr) | |||||
| library(stringr) | |||||
| library(readr) | |||||
| library(readxl) | |||||
| library(fs) | |||||
| # library(here) | |||||
| library(lubridate) | |||||
| library(forcats) | |||||
| DATA_DIR <- "data-raw" | |||||
| pop <- list() | |||||
| import_pre1980 <- function(file) { | |||||
| # cli::cat_bullet(file) | |||||
| year <- str_extract(file, "\\d{4}\\.csv") | |||||
| year <- as.integer(sub("\\.csv", "", year)) | |||||
| skip <- if (year >= 1960) 8 else 9 | |||||
| x <- read_csv(file, col_names = FALSE, skip = skip, | |||||
| col_types = cols_only( | |||||
| X1 = col_guess(), X2 = col_guess(), | |||||
| X3 = col_guess(), X4 = col_guess() | |||||
| )) | |||||
| colnames(x) <- c("Age", "Total", "Total Male", "Total Female") | |||||
| x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ] | |||||
| stopifnot(nrow(x) %in% c(76L, 86L)) | |||||
| x %>% | |||||
| mutate( | |||||
| Year = year, | |||||
| Age = sub("\\+", "", Age), | |||||
| Age = as.integer(Age) | |||||
| ) %>% | |||||
| select(Year, everything()) | |||||
| } | |||||
| pop[["1900-1980"]] <- fs::dir_ls( | |||||
| here::here(DATA_DIR, "1900-1980") | |||||
| ) %>% | |||||
| map_dfr(import_pre1980) | |||||
| import_1980_1990 <- function(file) { | |||||
| read_fwf( | |||||
| file, | |||||
| col_positions = fwf_positions( | |||||
| start = c(3, 5, 7, 12, 22, 32), | |||||
| end = c(4, 6, 9, 20, 30, 40), | |||||
| col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female") | |||||
| ), | |||||
| skip = 0 | |||||
| ) %>% | |||||
| filter(Age < 101, Month == 4, Year == min(Year)) %>% | |||||
| mutate(Year = Year + 1900) | |||||
| } | |||||
| pop[["1980-1990"]] <- fs::dir_ls( | |||||
| here::here(DATA_DIR, "1980-1990"), | |||||
| regexp = "TXT" | |||||
| ) %>% | |||||
| map_dfr(import_1980_1990) | |||||
| import_1990_2000 <- function(file) { | |||||
| x <- read_csv(file, col_names = FALSE, skip = 3) | |||||
| colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female") | |||||
| x %>% | |||||
| filter( | |||||
| !is.na(Total), | |||||
| !str_detect(Age, "All") | |||||
| ) %>% | |||||
| mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>% | |||||
| filter( | |||||
| lubridate::month(Month) == 4 | |||||
| ) %>% | |||||
| mutate( | |||||
| Year = lubridate::year(Month), | |||||
| Age = str_remove(Age, "\\+"), | |||||
| Age = as.integer(Age) | |||||
| ) %>% | |||||
| select(Year, everything(), -Month) | |||||
| } | |||||
| pop[["1990-2000"]] <- fs::dir_ls( | |||||
| here::here(DATA_DIR, "1990-2000"), | |||||
| regexp = "\\d{4}\\.csv" | |||||
| ) %>% | |||||
| map_dfr(import_1990_2000) | |||||
| # pop[["2000-2010"]] <- read_csv( | |||||
| # here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv") | |||||
| # ) %>% | |||||
| # select(Month = MONTH, Year = YEAR, Age = AGE, | |||||
| # Total = TOT_POP, `Total Male` = TOT_MALE, | |||||
| # `Total Female` = TOT_FEMALE) %>% | |||||
| # filter(Year > 2000, Age <= 125) | |||||
| # pop[["2000-2010"]] <- readxl::read_xls( | |||||
| # here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"), | |||||
| # sheet = "Sheet1" | |||||
| # ) %>% | |||||
| # mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>% | |||||
| # select(-age_label) %>% | |||||
| # gather(Year, value, -1:-2) %>% | |||||
| # spread(Group, value) %>% | |||||
| # mutate(Year = as.integer(Year)) %>% | |||||
| # filter(Year > 2000) | |||||
| pop[["2000-2010"]] <- | |||||
| readr::read_csv( | |||||
| here::here(DATA_DIR, "2000-2010", "us-est00int-alldata-5yr.csv") | |||||
| ) %>% | |||||
| select( | |||||
| Year = year, | |||||
| Age = AGEGRP, | |||||
| Total = TOT_POP, | |||||
| `Total Male` = TOT_MALE, | |||||
| `Total Female` = TOT_FEMALE | |||||
| ) %>% | |||||
| filter(Age != 0, Year > 2000, Year < 2010) %>% | |||||
| mutate(Age = (Age - 1) * 5) | |||||
| pop[["2010-2017"]] <- readr::read_csv( | |||||
| here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"), | |||||
| skip = 1 | |||||
| ) %>% | |||||
| select(-1:-4) %>% | |||||
| gather(key, value, -Sex) %>% | |||||
| filter(str_detect(key, "Population Estimate")) %>% | |||||
| mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>% | |||||
| filter(!str_detect(key, "Total|Median")) %>% | |||||
| separate(key, c("Year", "Age"), sep = ";") %>% | |||||
| mutate( | |||||
| Year = as.integer(Year), | |||||
| Age = sub("+", "", Age, fixed = TRUE), | |||||
| Age = as.integer(Age), | |||||
| Sex = recode( | |||||
| Sex, | |||||
| "Both Sexes" = "Total", | |||||
| "Female" = "Total Female", | |||||
| "Male" = "Total Male" | |||||
| ) | |||||
| ) %>% | |||||
| filter(Year < 2016) %>% | |||||
| spread(Sex, value) | |||||
| # pop[["2018"]] <- readr::read_tsv( | |||||
| # here::here(DATA_DIR, "national-population-projections_2014-2060.txt") | |||||
| # ) %>% | |||||
| # select(-1) %>% | |||||
| # filter(!is.na(Year)) %>% | |||||
| # select(contains("Code"), contains("Pop")) %>% | |||||
| # {colnames(.) <- sub(" Code", "", colnames(.)); .} %>% | |||||
| # group_by(Year, Age) %>% | |||||
| # mutate(Total = sum(`Projected Populations`)) %>% | |||||
| # ungroup() %>% | |||||
| # spread(Gender, `Projected Populations`) %>% | |||||
| # mutate( | |||||
| # Age = sub("+", "", Age, fixed = TRUE), | |||||
| # Age = as.integer(Age) | |||||
| # ) %>% | |||||
| # filter(Year > 2017) | |||||
| pop[["2018"]] <- | |||||
| readr::read_csv( | |||||
| here::here(DATA_DIR, "np2017_d1.csv") | |||||
| ) %>% | |||||
| filter(ORIGIN == 0, RACE == 0) %>% | |||||
| gather(Age, n, -SEX:-YEAR) %>% | |||||
| mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>% | |||||
| spread(SEX, n) %>% | |||||
| filter(Age != "TOTAL_POP") %>% | |||||
| mutate( | |||||
| Age = sub("POP_", "", Age, fixed = TRUE), | |||||
| Age = as.integer(Age) | |||||
| ) %>% | |||||
| select(Year = YEAR, Age, starts_with("Total")) | |||||
| fs::dir_create(here::here("data")) | |||||
| saveRDS(pop, here::here("data", "pop_estimates_by_age.rds")) |
| ## ----setup, include=FALSE------------------------------------------------ | |||||
| knitr::opts_chunk$set(echo=FALSE) | |||||
| ## ----library------------------------------------------------------------- | |||||
| library(dplyr) | |||||
| library(purrr) | |||||
| library(tidyr) | |||||
| library(stringr) | |||||
| library(readr) | |||||
| library(forcats) | |||||
| library(gganimate) | |||||
| DATA_DIR <- "data-raw" | |||||
| ANIM_OUT <- "anim" | |||||
| fs::dir_create(here::here(ANIM_OUT)) | |||||
| pop <- readRDS(here::here("data", "pop_estimates_by_age.rds")) | |||||
| ## ------------------------------------------------------------------------ | |||||
| ga <- | |||||
| pop %>% | |||||
| bind_rows() %>% | |||||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||||
| group_by(age_group_int) %>% | |||||
| mutate( | |||||
| # age_group = paste(min(Age), max(Age), sep = " - "), | |||||
| age_group = paste(min(Age)), | |||||
| age_group = ifelse(Age >= 100, "100+", age_group) | |||||
| ) %>% | |||||
| ungroup() %>% | |||||
| arrange(age_group_int, Year) %>% | |||||
| mutate(age_group = fct_inorder(age_group)) %>% | |||||
| select(-age_group_int) %>% | |||||
| group_by(Year, age_group) %>% | |||||
| summarize(Total = sum(Total)) %>% | |||||
| group_by(Year) %>% | |||||
| mutate(GrandTotal = sum(Total)) %>% | |||||
| ungroup() %>% | |||||
| mutate(Total = Total/GrandTotal) %>% | |||||
| arrange(Year, age_group) %>% | |||||
| # complete(Year, age_group, fill = list(Total = 0)) %>% | |||||
| # filter(Year > 2000) %>% | |||||
| { | |||||
| ggplot(.) + | |||||
| # aes(age_group, Total) + | |||||
| # geom_vline(xintercept = c(1980, 1990), color = "grey50") + | |||||
| # geom_line(aes(group = age_group)) + | |||||
| # geom_point(size = 0.2) + | |||||
| # geom_col(fill = "grey40") + | |||||
| # facet_wrap(~ Year) + | |||||
| # coord_flip() + | |||||
| # scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) + | |||||
| geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) + | |||||
| geom_segment(aes(yend = age_group, y = age_group, xend = Total), | |||||
| x = 0, | |||||
| size = 5, | |||||
| color = "grey40", | |||||
| alpha = 0.25, | |||||
| data = filter(., Year == min(Year)) %>% select(-Year) | |||||
| ) + | |||||
| # annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) + | |||||
| theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") + | |||||
| theme(panel.grid.major.y = element_blank()) + | |||||
| scale_x_continuous(labels = scales::percent, expand = c(0, 0, 0.05, 0)) + | |||||
| # labs(x = "Percent of Population", y = NULL) + | |||||
| theme(axis.title.x = element_text(hjust = 0.5, size = 30)) + | |||||
| labs(x = "{closest_state}", y = NULL) + | |||||
| gganimate::transition_time(Year) + | |||||
| gganimate::transition_states(Year, 1, 0, wrap = FALSE) + | |||||
| gganimate::ease_aes("linear") + | |||||
| gganimate::enter_fade() | |||||
| } | |||||
| grkmisc::logger(msg = "Writing MP4 population bar animation") | |||||
| anim_save( | |||||
| here::here(ANIM_OUT, "pop-anim.mp4"), | |||||
| animate(ga, width = 1040*2, height = 480*2, res = 144, | |||||
| fps = 20, nframes = 161*2, renderer = av_renderer()) | |||||
| ) | |||||
| grkmisc::logger(msg = "Writing gif population bar animation") | |||||
| anim_save( | |||||
| here::here(ANIM_OUT, "pop-anim.gif"), | |||||
| animate(ga, width = 1040*2, height = 480*2, res = 144, | |||||
| fps = 20, nframes = 161*2, | |||||
| renderer = gganimate::gifski_renderer(loop = FALSE)) | |||||
| ) | |||||
| pop %>% | |||||
| bind_rows() %>% | |||||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||||
| select(1:3, age_group_int) %>% | |||||
| filter(age_group_int > 5) %>% | |||||
| mutate( | |||||
| old = age_group_int >= 13, | |||||
| old = c("Adult", "Older Adult")[old + 1] | |||||
| ) %>% | |||||
| group_by(Year, old) %>% | |||||
| summarize(n = sum(Total)) %>% | |||||
| spread(old, n) %>% | |||||
| { | |||||
| ggplot(.) + | |||||
| aes(x = Year) + | |||||
| geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) + | |||||
| geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) + | |||||
| geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange, size = 2) | |||||
| } %>% | |||||
| invisible() | |||||
| elder_dependency <- | |||||
| pop %>% | |||||
| bind_rows() %>% | |||||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||||
| select(1:3, age_group_int) %>% | |||||
| mutate( | |||||
| old = case_when( | |||||
| age_group_int < 3 ~ "Children", | |||||
| age_group_int >= 13 ~ "Older Adult", | |||||
| TRUE ~ "Adult" | |||||
| ) | |||||
| ) %>% | |||||
| group_by(Year, old) %>% | |||||
| summarize(n = sum(Total)) %>% | |||||
| spread(old, n) %>% | |||||
| mutate( | |||||
| dependency_elder = `Older Adult` / Adult, | |||||
| dependency_all = (`Older Adult` + Children) / Adult | |||||
| ) | |||||
| ga_elder_dep <- | |||||
| ggplot(elder_dependency) + | |||||
| aes(Year, dependency_elder) + | |||||
| geom_point(color = "#00589a") + | |||||
| geom_point(data = filter(elder_dependency, Year == 2018), size = 4, color = "#eb1455") + | |||||
| labs(y = "Dependency Ratio", x = NULL) + | |||||
| theme_minimal(base_size = 24, base_family = "Fira Sans Condensed") + | |||||
| scale_y_continuous(labels = scales::percent) + | |||||
| gganimate::transition_states(Year, 1, 0, FALSE) + | |||||
| gganimate::shadow_mark() | |||||
| grkmisc::logger(msg = "Writing mp4 elder dependency animation") | |||||
| anim_save( | |||||
| here::here(ANIM_OUT, "pop-dependency.mp4"), | |||||
| animate(ga_elder_dep, width = 1040*2, height = 480*2, res = 144, | |||||
| fps = 18, nframes = 161, renderer = av_renderer()) | |||||
| ) | |||||
| grkmisc::logger(msg = "Writing gif elder dependency animation") | |||||
| anim_save( | |||||
| here::here(ANIM_OUT, "pop-dependency.gif"), | |||||
| animate(ga_elder_dep, width = 1040*2, height = 480*2, res = 144, | |||||
| fps = 18, nframes = 161, | |||||
| renderer = gganimate::gifski_renderer(loop = FALSE)) | |||||
| ) | |||||
| elder_dep_together <- | |||||
| elder_dependency %>% | |||||
| ungroup() %>% | |||||
| gather(timeseries, ratio, starts_with("depend")) %>% | |||||
| select(-Adult:-`Older Adult`) %>% | |||||
| mutate(timeseries = ifelse(timeseries == "dependency_elder", 0, 1)) | |||||
| ga_dep_together <- | |||||
| ggplot(elder_dep_together) + | |||||
| aes(Year, ratio) + | |||||
| geom_point(color = "#00589a") + | |||||
| # geom_point(color = "#82c878") + | |||||
| # geom_point(data = filter(., Year == 2018), size = 4, color = "#eb1455") + | |||||
| labs(y = "Dependency Ratio", x = NULL) + | |||||
| theme_minimal(base_size = 24, base_family = "Fira Sans Condensed") + | |||||
| scale_y_continuous(labels = scales::percent) + | |||||
| gganimate::transition_states(timeseries, 1, 0, FALSE) + | |||||
| gganimate::view_follow(fixed_x = TRUE) + | |||||
| gganimate::ease_aes("sine-in-out")+ | |||||
| gganimate::shadow_mark() | |||||
| grkmisc::logger(msg = "Writing mp4 all dependency animation") | |||||
| anim_save( | |||||
| here::here(ANIM_OUT, "pop-dependency-both.mp4"), | |||||
| animate(ga_dep_together, width = 1040*2, height = 480*2, res = 144, | |||||
| fps = 18, nframes = 80, | |||||
| renderer = av_renderer()) | |||||
| ) | |||||
| grkmisc::logger(msg = "Writing gif all dependency animation") | |||||
| anim_save( | |||||
| here::here(ANIM_OUT, "pop-dependency-both.gif"), | |||||
| animate(ga_dep_together, width = 1040*2, height = 480*2, res = 144, | |||||
| fps = 18, nframes = 80, | |||||
| renderer = gganimate::gifski_renderer(loop = FALSE)) | |||||
| ) |
| ## ----setup, include=FALSE------------------------------------------------ | |||||
| knitr::opts_chunk$set(echo=FALSE) | |||||
| ## ----library------------------------------------------------------------- | |||||
| library(dplyr) | |||||
| library(purrr) | |||||
| library(tidyr) | |||||
| library(stringr) | |||||
| library(readr) | |||||
| library(forcats) | |||||
| library(gganimate) | |||||
| DATA_DIR <- "data-raw" | |||||
| ANIM_OUT <- "anim" | |||||
| fs::dir_create(here::here(DATA_DIR)) | |||||
| fs::dir_create(here::here(ANIM_OUT)) | |||||
| DOWNLOAD_DATA <- TRUE | |||||
| download_file <- function(filename, url, basedir) { | |||||
| filename <- gsub(" ", "_", filename) | |||||
| filename <- gsub("/", "-", filename) | |||||
| fs::dir_create(basedir) | |||||
| download.file(url, destfile = fs::path(basedir, filename)) | |||||
| } | |||||
| if (DOWNLOAD_DATA) { | |||||
| ## ----download-popest-tables-1900-1980, eval=FALSE------------------------ | |||||
| x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/") | |||||
| xx <- | |||||
| data_frame( | |||||
| filename = rvest::html_nodes(x, "a") %>% rvest::html_text(), | |||||
| url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href") | |||||
| ) %>% | |||||
| filter(str_detect(url, "\\.csv")) %>% | |||||
| mutate(url = str_c( | |||||
| "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/", | |||||
| url | |||||
| )) | |||||
| pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980")) | |||||
| ## ----download-popest-tables-1980-1990, eval=FALSE------------------------ | |||||
| xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>% | |||||
| rvest::html_nodes(".list.section .uscb-text-link") %>% | |||||
| rvest::html_attrs() %>% | |||||
| purrr::transpose() %>% | |||||
| as_tibble() %>% | |||||
| unnest() %>% | |||||
| select(filename = name, url = href) %>% | |||||
| mutate(url = paste0("https:", url)) %>% | |||||
| pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990")) | |||||
| ## ----download-popest-tables-1990-2000, eval=FALSE------------------------ | |||||
| xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>% | |||||
| rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>% | |||||
| rvest::html_attrs() %>% | |||||
| purrr::transpose() %>% | |||||
| as_tibble() %>% | |||||
| unnest() %>% | |||||
| select(filename = name, url = href) %>% | |||||
| mutate(url = paste0("https:", url), | |||||
| filename = paste0(filename, ".csv")) %>% | |||||
| pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000")) | |||||
| ## ----download-popest-tables-2000-2010---- | |||||
| ## "2000-2010/us-est00int-01.xls" | |||||
| download_file( | |||||
| "us-est00int-alldata-5yr.csv", | |||||
| "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv", | |||||
| here::here(DATA_DIR, "2000-2010") | |||||
| ) | |||||
| ## ----download-popest-tables-2010---- | |||||
| ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv" | |||||
| ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US | |||||
| ## ----download-popest-projections | |||||
| ## "np2017_d1.csv" | |||||
| download_file( | |||||
| "np2017_d1.csv", | |||||
| "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv", | |||||
| here::here(DATA_DIR) | |||||
| ) | |||||
| } | |||||
| ## ----import-historical--------------------------------------------------- | |||||
| pop <- list() | |||||
| import_pre1980 <- function(file) { | |||||
| # cli::cat_bullet(file) | |||||
| year <- str_extract(file, "\\d{4}\\.csv") | |||||
| year <- as.integer(sub("\\.csv", "", year)) | |||||
| skip <- if (year >= 1960) 8 else 9 | |||||
| x <- read_csv(file, col_names = FALSE, skip = skip, | |||||
| col_types = cols_only( | |||||
| X1 = col_guess(), X2 = col_guess(), | |||||
| X3 = col_guess(), X4 = col_guess() | |||||
| )) | |||||
| colnames(x) <- c("Age", "Total", "Total Male", "Total Female") | |||||
| x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ] | |||||
| stopifnot(nrow(x) %in% c(76L, 86L)) | |||||
| x %>% | |||||
| mutate( | |||||
| Year = year, | |||||
| Age = sub("\\+", "", Age), | |||||
| Age = as.integer(Age) | |||||
| ) %>% | |||||
| select(Year, everything()) | |||||
| } | |||||
| pop[["1900-1980"]] <- fs::dir_ls( | |||||
| here::here(DATA_DIR, "1900-1980") | |||||
| ) %>% | |||||
| map_dfr(import_pre1980) | |||||
| import_1980_1990 <- function(file) { | |||||
| read_fwf( | |||||
| file, | |||||
| col_positions = fwf_positions( | |||||
| start = c(3, 5, 7, 12, 22, 32), | |||||
| end = c(4, 6, 9, 20, 30, 40), | |||||
| col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female") | |||||
| ), | |||||
| skip = 0 | |||||
| ) %>% | |||||
| filter(Age < 101, Month == 4, Year == min(Year)) %>% | |||||
| mutate(Year = Year + 1900) | |||||
| } | |||||
| pop[["1980-1990"]] <- fs::dir_ls( | |||||
| here::here(DATA_DIR, "1980-1990"), | |||||
| regexp = "TXT" | |||||
| ) %>% | |||||
| map_dfr(import_1980_1990) | |||||
| import_1990_2000 <- function(file) { | |||||
| x <- read_csv(file, col_names = FALSE, skip = 3) | |||||
| colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female") | |||||
| x %>% | |||||
| filter( | |||||
| !is.na(Total), | |||||
| !str_detect(Age, "All") | |||||
| ) %>% | |||||
| mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>% | |||||
| filter( | |||||
| lubridate::month(Month) == 4 | |||||
| ) %>% | |||||
| mutate( | |||||
| Year = lubridate::year(Month), | |||||
| Age = str_remove(Age, "\\+"), | |||||
| Age = as.integer(Age) | |||||
| ) %>% | |||||
| select(Year, everything(), -Month) | |||||
| } | |||||
| pop[["1990-2000"]] <- fs::dir_ls( | |||||
| here::here(DATA_DIR, "1990-2000"), | |||||
| regexp = "\\d{4}\\.csv" | |||||
| ) %>% | |||||
| map_dfr(import_1990_2000) | |||||
| # pop[["2000-2010"]] <- read_csv( | |||||
| # here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv") | |||||
| # ) %>% | |||||
| # select(Month = MONTH, Year = YEAR, Age = AGE, | |||||
| # Total = TOT_POP, `Total Male` = TOT_MALE, | |||||
| # `Total Female` = TOT_FEMALE) %>% | |||||
| # filter(Year > 2000, Age <= 125) | |||||
| pop[["2000-2010"]] <- readxl::read_xls( | |||||
| here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"), | |||||
| sheet = "Sheet1" | |||||
| ) %>% | |||||
| mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>% | |||||
| select(-age_label) %>% | |||||
| gather(Year, value, -1:-2) %>% | |||||
| spread(Group, value) %>% | |||||
| mutate(Year = as.integer(Year)) %>% | |||||
| filter(Year > 2000) | |||||
| pop[["2010-2017"]] <- readr::read_csv( | |||||
| here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"), | |||||
| skip = 1 | |||||
| ) %>% | |||||
| select(-1:-4) %>% | |||||
| gather(key, value, -Sex) %>% | |||||
| filter(str_detect(key, "Population Estimate")) %>% | |||||
| mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>% | |||||
| filter(!str_detect(key, "Total|Median")) %>% | |||||
| separate(key, c("Year", "Age"), sep = ";") %>% | |||||
| mutate( | |||||
| Year = as.integer(Year), | |||||
| Age = sub("+", "", Age, fixed = TRUE), | |||||
| Age = as.integer(Age), | |||||
| Sex = recode( | |||||
| Sex, | |||||
| "Both Sexes" = "Total", | |||||
| "Female" = "Total Female", | |||||
| "Male" = "Total Male" | |||||
| ) | |||||
| ) %>% | |||||
| spread(Sex, value) | |||||
| # pop[["2018"]] <- readr::read_tsv( | |||||
| # here::here(DATA_DIR, "national-population-projections_2014-2060.txt") | |||||
| # ) %>% | |||||
| # select(-1) %>% | |||||
| # filter(!is.na(Year)) %>% | |||||
| # select(contains("Code"), contains("Pop")) %>% | |||||
| # {colnames(.) <- sub(" Code", "", colnames(.)); .} %>% | |||||
| # group_by(Year, Age) %>% | |||||
| # mutate(Total = sum(`Projected Populations`)) %>% | |||||
| # ungroup() %>% | |||||
| # spread(Gender, `Projected Populations`) %>% | |||||
| # mutate( | |||||
| # Age = sub("+", "", Age, fixed = TRUE), | |||||
| # Age = as.integer(Age) | |||||
| # ) %>% | |||||
| # filter(Year > 2017) | |||||
| pop[["2018"]] <- | |||||
| readr::read_csv( | |||||
| here::here(DATA_DIR, "np2017_d1.csv") | |||||
| ) %>% | |||||
| gather(Age, n, -SEX:-YEAR) %>% | |||||
| mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>% | |||||
| spread(SEX, n) %>% | |||||
| filter(Age != "TOTAL_POP") %>% | |||||
| mutate( | |||||
| Age = sub("POP_", "", Age, fixed = TRUE), | |||||
| Age = as.integer(Age) | |||||
| ) %>% | |||||
| select(Year = YEAR, Age, starts_with("Total")) | |||||
| ## ------------------------------------------------------------------------ | |||||
| ga <- | |||||
| pop %>% | |||||
| bind_rows() %>% | |||||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||||
| group_by(age_group_int) %>% | |||||
| mutate( | |||||
| # age_group = paste(min(Age), max(Age), sep = " - "), | |||||
| age_group = paste(min(Age)), | |||||
| age_group = ifelse(Age >= 100, "100+", age_group) | |||||
| ) %>% | |||||
| ungroup() %>% | |||||
| arrange(age_group_int, Year) %>% | |||||
| mutate(age_group = fct_inorder(age_group)) %>% | |||||
| select(-age_group_int) %>% | |||||
| group_by(Year, age_group) %>% | |||||
| summarize(Total = sum(Total)) %>% | |||||
| group_by(Year) %>% | |||||
| mutate(GrandTotal = sum(Total)) %>% | |||||
| ungroup() %>% | |||||
| mutate(Total = Total/GrandTotal) %>% | |||||
| arrange(Year, age_group) %>% View() | |||||
| # complete(Year, age_group, fill = list(Total = 0)) %>% | |||||
| # filter(Year > 2000) %>% | |||||
| { | |||||
| ggplot(.) + | |||||
| # aes(age_group, Total) + | |||||
| # geom_vline(xintercept = c(1980, 1990), color = "grey50") + | |||||
| # geom_line(aes(group = age_group)) + | |||||
| # geom_point(size = 0.2) + | |||||
| # geom_col(fill = "grey40") + | |||||
| # facet_wrap(~ Year) + | |||||
| # coord_flip() + | |||||
| # scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) + | |||||
| geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) + | |||||
| geom_segment(aes(yend = age_group, y = age_group, xend = Total), | |||||
| x = 0, | |||||
| size = 5, | |||||
| color = "grey40", | |||||
| alpha = 0.25, | |||||
| data = filter(., Year == min(Year)) %>% select(-Year) | |||||
| ) + | |||||
| # annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) + | |||||
| theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") + | |||||
| theme(panel.grid.major.y = element_blank()) + | |||||
| scale_x_continuous(labels = scales::percent, expand = c(0, 0)) + | |||||
| # labs(x = "Percent of Population", y = NULL) + | |||||
| theme(axis.title.x = element_text(hjust = 0.5, size = 30)) + | |||||
| labs(x = "{closest_state}", y = NULL) + | |||||
| gganimate::transition_time(Year) + | |||||
| gganimate::transition_states(Year, 1, 0, wrap = FALSE) + | |||||
| gganimate::ease_aes("linear") + | |||||
| gganimate::enter_fade() | |||||
| } | |||||
| anim_save( | |||||
| here::here(ANIM_OUT, "pop-anim.mp4"), | |||||
| animate(ga, width = 1040*2, height = 480*2, res = 144, | |||||
| fps = 20, nframes = 161*2, | |||||
| renderer = av_renderer()) | |||||
| ) | |||||
| pop %>% | |||||
| bind_rows() %>% | |||||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||||
| select(1:3, age_group_int) %>% | |||||
| filter(age_group_int > 5) %>% | |||||
| mutate( | |||||
| old = age_group_int >= 13, | |||||
| old = c("Adult", "Older Adult")[old + 1] | |||||
| ) %>% | |||||
| group_by(Year, old) %>% | |||||
| summarize(n = sum(Total)) %>% | |||||
| spread(old, n) %>% | |||||
| { | |||||
| ggplot(.) + | |||||
| aes(x = Year) + | |||||
| geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) + | |||||
| geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) + | |||||
| geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange) | |||||
| } | |||||
| ga2 <- | |||||
| pop %>% | |||||
| bind_rows() %>% | |||||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||||
| select(1:3, age_group_int) %>% | |||||
| filter(age_group_int >= 3) %>% | |||||
| mutate( | |||||
| old = age_group_int >= 13, | |||||
| old = c("Adult", "Older Adult")[old + 1] | |||||
| ) %>% | |||||
| group_by(Year, old) %>% | |||||
| summarize(n = sum(Total)) %>% | |||||
| spread(old, n) %>% | |||||
| mutate(ratio = `Older Adult` / Adult) %>% | |||||
| { | |||||
| ggplot(.) + | |||||
| aes(Year, ratio) + | |||||
| geom_point() + | |||||
| geom_point(data = filter(., Year == 2018), color = "red") + | |||||
| labs(y = "Dependency Ratio", x = NULL) + | |||||
| theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") + | |||||
| scale_y_continuous(labels = scales::percent) + | |||||
| gganimate::transition_states(Year, 1, 0, FALSE) + | |||||
| gganimate::shadow_mark() | |||||
| } | |||||
| anim_save( | |||||
| here::here(ANIM_OUT, "population-projections/pop-dependency.mp4"), | |||||
| animate(ga2, width = 1040*2, height = 480*2, res = 144, | |||||
| fps = 18, nframes = 161, | |||||
| renderer = av_renderer()) | |||||
| ) |