| @@ -1 +0,0 @@ | |||
| ^README\.Rmd$ | |||
| @@ -1,5 +0,0 @@ | |||
| # News | |||
| ## 2018-10-25 | |||
| Project Started | |||
| @@ -0,0 +1,99 @@ | |||
| library(dplyr) | |||
| library(purrr) | |||
| library(tidyr) | |||
| library(stringr) | |||
| library(readr) | |||
| library(forcats) | |||
| library(gganimate) | |||
| DATA_DIR <- "data-raw" | |||
| fs::dir_create(here::here(DATA_DIR)) | |||
| DOWNLOAD_DATA <- if (!exists(DOWNLOAD_DATA)) TRUE else DOWNLOAD_DATA | |||
| download_file <- function(filename, url, basedir) { | |||
| filename <- gsub(" ", "_", filename) | |||
| filename <- gsub("/", "-", filename) | |||
| fs::dir_create(basedir) | |||
| download.file(url, destfile = fs::path(basedir, filename)) | |||
| } | |||
| if (DOWNLOAD_DATA) { | |||
| ## ----download-popest-tables-1900-1980, eval=FALSE------------------------ | |||
| x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/") | |||
| xx <- | |||
| data_frame( | |||
| filename = rvest::html_nodes(x, "a") %>% rvest::html_text(), | |||
| url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href") | |||
| ) %>% | |||
| filter(str_detect(url, "\\.csv")) %>% | |||
| mutate(url = str_c( | |||
| "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/", | |||
| url | |||
| )) | |||
| pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980")) | |||
| ## ----download-popest-tables-1980-1990, eval=FALSE------------------------ | |||
| xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>% | |||
| rvest::html_nodes(".list.section .uscb-text-link") %>% | |||
| rvest::html_attrs() %>% | |||
| purrr::transpose() %>% | |||
| as_tibble() %>% | |||
| unnest() %>% | |||
| filter(str_detect(href, "rqi\\.zip")) %>% | |||
| select(filename = name, url = href) %>% | |||
| mutate( | |||
| url = paste0("https:", url), | |||
| filename = basename(url) | |||
| ) %>% | |||
| pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990")) | |||
| withr::with_dir(here::here(DATA_DIR, "1980-1990"), { | |||
| fs::dir_ls(regexp = "zip") %>% | |||
| purrr::walk(unzip) | |||
| }) | |||
| ## ----download-popest-tables-1990-2000, eval=FALSE------------------------ | |||
| xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>% | |||
| rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>% | |||
| rvest::html_attrs() %>% | |||
| purrr::transpose() %>% | |||
| as_tibble() %>% | |||
| unnest() %>% | |||
| select(filename = name, url = href) %>% | |||
| mutate(url = paste0("https:", url), | |||
| filename = paste0(filename, ".csv")) %>% | |||
| pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000")) | |||
| ## ----download-popest-tables-2000-2010---- | |||
| ## "2000-2010/us-est00int-01.xls" | |||
| download_file( | |||
| "us-est00int-alldata-5yr.csv", | |||
| "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv", | |||
| here::here(DATA_DIR, "2000-2010") | |||
| ) | |||
| ## ----download-popest-tables-2010---- | |||
| ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv" | |||
| ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US | |||
| fs::dir_create(here::here(DATA_DIR, "2010-2017")) | |||
| if (!fs::file_exists(here::here(DATA_DIR, "2010-2017", "PEP_2017_PEPSYASEXN_with_ann.csv"))) { | |||
| rlang::abort(paste( | |||
| "Download the Annual Estimates of Resident Population by Single Year of Age from:\n", | |||
| "https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US\n", | |||
| "and extract `PEP_2017_PEPSYASEXN_with_ann.csv` into:", here::here(DATA_DIR, "2010-2017") | |||
| )) | |||
| } | |||
| ## ----download-popest-projections | |||
| ## "np2017_d1.csv" | |||
| download_file( | |||
| "np2017_d1.csv", | |||
| "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv", | |||
| here::here(DATA_DIR) | |||
| ) | |||
| } | |||
| @@ -0,0 +1,181 @@ | |||
| library(dplyr) | |||
| library(purrr) | |||
| library(tidyr) | |||
| library(stringr) | |||
| library(readr) | |||
| library(readxl) | |||
| library(fs) | |||
| # library(here) | |||
| library(lubridate) | |||
| library(forcats) | |||
| DATA_DIR <- "data-raw" | |||
| pop <- list() | |||
| import_pre1980 <- function(file) { | |||
| # cli::cat_bullet(file) | |||
| year <- str_extract(file, "\\d{4}\\.csv") | |||
| year <- as.integer(sub("\\.csv", "", year)) | |||
| skip <- if (year >= 1960) 8 else 9 | |||
| x <- read_csv(file, col_names = FALSE, skip = skip, | |||
| col_types = cols_only( | |||
| X1 = col_guess(), X2 = col_guess(), | |||
| X3 = col_guess(), X4 = col_guess() | |||
| )) | |||
| colnames(x) <- c("Age", "Total", "Total Male", "Total Female") | |||
| x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ] | |||
| stopifnot(nrow(x) %in% c(76L, 86L)) | |||
| x %>% | |||
| mutate( | |||
| Year = year, | |||
| Age = sub("\\+", "", Age), | |||
| Age = as.integer(Age) | |||
| ) %>% | |||
| select(Year, everything()) | |||
| } | |||
| pop[["1900-1980"]] <- fs::dir_ls( | |||
| here::here(DATA_DIR, "1900-1980") | |||
| ) %>% | |||
| map_dfr(import_pre1980) | |||
| import_1980_1990 <- function(file) { | |||
| read_fwf( | |||
| file, | |||
| col_positions = fwf_positions( | |||
| start = c(3, 5, 7, 12, 22, 32), | |||
| end = c(4, 6, 9, 20, 30, 40), | |||
| col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female") | |||
| ), | |||
| skip = 0 | |||
| ) %>% | |||
| filter(Age < 101, Month == 4, Year == min(Year)) %>% | |||
| mutate(Year = Year + 1900) | |||
| } | |||
| pop[["1980-1990"]] <- fs::dir_ls( | |||
| here::here(DATA_DIR, "1980-1990"), | |||
| regexp = "TXT" | |||
| ) %>% | |||
| map_dfr(import_1980_1990) | |||
| import_1990_2000 <- function(file) { | |||
| x <- read_csv(file, col_names = FALSE, skip = 3) | |||
| colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female") | |||
| x %>% | |||
| filter( | |||
| !is.na(Total), | |||
| !str_detect(Age, "All") | |||
| ) %>% | |||
| mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>% | |||
| filter( | |||
| lubridate::month(Month) == 4 | |||
| ) %>% | |||
| mutate( | |||
| Year = lubridate::year(Month), | |||
| Age = str_remove(Age, "\\+"), | |||
| Age = as.integer(Age) | |||
| ) %>% | |||
| select(Year, everything(), -Month) | |||
| } | |||
| pop[["1990-2000"]] <- fs::dir_ls( | |||
| here::here(DATA_DIR, "1990-2000"), | |||
| regexp = "\\d{4}\\.csv" | |||
| ) %>% | |||
| map_dfr(import_1990_2000) | |||
| # pop[["2000-2010"]] <- read_csv( | |||
| # here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv") | |||
| # ) %>% | |||
| # select(Month = MONTH, Year = YEAR, Age = AGE, | |||
| # Total = TOT_POP, `Total Male` = TOT_MALE, | |||
| # `Total Female` = TOT_FEMALE) %>% | |||
| # filter(Year > 2000, Age <= 125) | |||
| # pop[["2000-2010"]] <- readxl::read_xls( | |||
| # here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"), | |||
| # sheet = "Sheet1" | |||
| # ) %>% | |||
| # mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>% | |||
| # select(-age_label) %>% | |||
| # gather(Year, value, -1:-2) %>% | |||
| # spread(Group, value) %>% | |||
| # mutate(Year = as.integer(Year)) %>% | |||
| # filter(Year > 2000) | |||
| pop[["2000-2010"]] <- | |||
| readr::read_csv( | |||
| here::here(DATA_DIR, "2000-2010", "us-est00int-alldata-5yr.csv") | |||
| ) %>% | |||
| select( | |||
| Year = year, | |||
| Age = AGEGRP, | |||
| Total = TOT_POP, | |||
| `Total Male` = TOT_MALE, | |||
| `Total Female` = TOT_FEMALE | |||
| ) %>% | |||
| filter(Age != 0, Year > 2000, Year < 2010) %>% | |||
| mutate(Age = (Age - 1) * 5) | |||
| pop[["2010-2017"]] <- readr::read_csv( | |||
| here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"), | |||
| skip = 1 | |||
| ) %>% | |||
| select(-1:-4) %>% | |||
| gather(key, value, -Sex) %>% | |||
| filter(str_detect(key, "Population Estimate")) %>% | |||
| mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>% | |||
| filter(!str_detect(key, "Total|Median")) %>% | |||
| separate(key, c("Year", "Age"), sep = ";") %>% | |||
| mutate( | |||
| Year = as.integer(Year), | |||
| Age = sub("+", "", Age, fixed = TRUE), | |||
| Age = as.integer(Age), | |||
| Sex = recode( | |||
| Sex, | |||
| "Both Sexes" = "Total", | |||
| "Female" = "Total Female", | |||
| "Male" = "Total Male" | |||
| ) | |||
| ) %>% | |||
| filter(Year < 2016) %>% | |||
| spread(Sex, value) | |||
| # pop[["2018"]] <- readr::read_tsv( | |||
| # here::here(DATA_DIR, "national-population-projections_2014-2060.txt") | |||
| # ) %>% | |||
| # select(-1) %>% | |||
| # filter(!is.na(Year)) %>% | |||
| # select(contains("Code"), contains("Pop")) %>% | |||
| # {colnames(.) <- sub(" Code", "", colnames(.)); .} %>% | |||
| # group_by(Year, Age) %>% | |||
| # mutate(Total = sum(`Projected Populations`)) %>% | |||
| # ungroup() %>% | |||
| # spread(Gender, `Projected Populations`) %>% | |||
| # mutate( | |||
| # Age = sub("+", "", Age, fixed = TRUE), | |||
| # Age = as.integer(Age) | |||
| # ) %>% | |||
| # filter(Year > 2017) | |||
| pop[["2018"]] <- | |||
| readr::read_csv( | |||
| here::here(DATA_DIR, "np2017_d1.csv") | |||
| ) %>% | |||
| filter(ORIGIN == 0, RACE == 0) %>% | |||
| gather(Age, n, -SEX:-YEAR) %>% | |||
| mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>% | |||
| spread(SEX, n) %>% | |||
| filter(Age != "TOTAL_POP") %>% | |||
| mutate( | |||
| Age = sub("POP_", "", Age, fixed = TRUE), | |||
| Age = as.integer(Age) | |||
| ) %>% | |||
| select(Year = YEAR, Age, starts_with("Total")) | |||
| fs::dir_create(here::here("data")) | |||
| saveRDS(pop, here::here("data", "pop_estimates_by_age.rds")) | |||
| @@ -0,0 +1,188 @@ | |||
| ## ----setup, include=FALSE------------------------------------------------ | |||
| knitr::opts_chunk$set(echo=FALSE) | |||
| ## ----library------------------------------------------------------------- | |||
| library(dplyr) | |||
| library(purrr) | |||
| library(tidyr) | |||
| library(stringr) | |||
| library(readr) | |||
| library(forcats) | |||
| library(gganimate) | |||
| DATA_DIR <- "data-raw" | |||
| ANIM_OUT <- "anim" | |||
| fs::dir_create(here::here(ANIM_OUT)) | |||
| pop <- readRDS(here::here("data", "pop_estimates_by_age.rds")) | |||
| ## ------------------------------------------------------------------------ | |||
| ga <- | |||
| pop %>% | |||
| bind_rows() %>% | |||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||
| group_by(age_group_int) %>% | |||
| mutate( | |||
| # age_group = paste(min(Age), max(Age), sep = " - "), | |||
| age_group = paste(min(Age)), | |||
| age_group = ifelse(Age >= 100, "100+", age_group) | |||
| ) %>% | |||
| ungroup() %>% | |||
| arrange(age_group_int, Year) %>% | |||
| mutate(age_group = fct_inorder(age_group)) %>% | |||
| select(-age_group_int) %>% | |||
| group_by(Year, age_group) %>% | |||
| summarize(Total = sum(Total)) %>% | |||
| group_by(Year) %>% | |||
| mutate(GrandTotal = sum(Total)) %>% | |||
| ungroup() %>% | |||
| mutate(Total = Total/GrandTotal) %>% | |||
| arrange(Year, age_group) %>% | |||
| # complete(Year, age_group, fill = list(Total = 0)) %>% | |||
| # filter(Year > 2000) %>% | |||
| { | |||
| ggplot(.) + | |||
| # aes(age_group, Total) + | |||
| # geom_vline(xintercept = c(1980, 1990), color = "grey50") + | |||
| # geom_line(aes(group = age_group)) + | |||
| # geom_point(size = 0.2) + | |||
| # geom_col(fill = "grey40") + | |||
| # facet_wrap(~ Year) + | |||
| # coord_flip() + | |||
| # scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) + | |||
| geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) + | |||
| geom_segment(aes(yend = age_group, y = age_group, xend = Total), | |||
| x = 0, | |||
| size = 5, | |||
| color = "grey40", | |||
| alpha = 0.25, | |||
| data = filter(., Year == min(Year)) %>% select(-Year) | |||
| ) + | |||
| # annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) + | |||
| theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") + | |||
| theme(panel.grid.major.y = element_blank()) + | |||
| scale_x_continuous(labels = scales::percent, expand = c(0, 0, 0.05, 0)) + | |||
| # labs(x = "Percent of Population", y = NULL) + | |||
| theme(axis.title.x = element_text(hjust = 0.5, size = 30)) + | |||
| labs(x = "{closest_state}", y = NULL) + | |||
| gganimate::transition_time(Year) + | |||
| gganimate::transition_states(Year, 1, 0, wrap = FALSE) + | |||
| gganimate::ease_aes("linear") + | |||
| gganimate::enter_fade() | |||
| } | |||
| grkmisc::logger(msg = "Writing MP4 population bar animation") | |||
| anim_save( | |||
| here::here(ANIM_OUT, "pop-anim.mp4"), | |||
| animate(ga, width = 1040*2, height = 480*2, res = 144, | |||
| fps = 20, nframes = 161*2, renderer = av_renderer()) | |||
| ) | |||
| grkmisc::logger(msg = "Writing gif population bar animation") | |||
| anim_save( | |||
| here::here(ANIM_OUT, "pop-anim.gif"), | |||
| animate(ga, width = 1040*2, height = 480*2, res = 144, | |||
| fps = 20, nframes = 161*2, | |||
| renderer = gganimate::gifski_renderer(loop = FALSE)) | |||
| ) | |||
| pop %>% | |||
| bind_rows() %>% | |||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||
| select(1:3, age_group_int) %>% | |||
| filter(age_group_int > 5) %>% | |||
| mutate( | |||
| old = age_group_int >= 13, | |||
| old = c("Adult", "Older Adult")[old + 1] | |||
| ) %>% | |||
| group_by(Year, old) %>% | |||
| summarize(n = sum(Total)) %>% | |||
| spread(old, n) %>% | |||
| { | |||
| ggplot(.) + | |||
| aes(x = Year) + | |||
| geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) + | |||
| geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) + | |||
| geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange, size = 2) | |||
| } %>% | |||
| invisible() | |||
| elder_dependency <- | |||
| pop %>% | |||
| bind_rows() %>% | |||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||
| select(1:3, age_group_int) %>% | |||
| mutate( | |||
| old = case_when( | |||
| age_group_int < 3 ~ "Children", | |||
| age_group_int >= 13 ~ "Older Adult", | |||
| TRUE ~ "Adult" | |||
| ) | |||
| ) %>% | |||
| group_by(Year, old) %>% | |||
| summarize(n = sum(Total)) %>% | |||
| spread(old, n) %>% | |||
| mutate( | |||
| dependency_elder = `Older Adult` / Adult, | |||
| dependency_all = (`Older Adult` + Children) / Adult | |||
| ) | |||
| ga_elder_dep <- | |||
| ggplot(elder_dependency) + | |||
| aes(Year, dependency_elder) + | |||
| geom_point(color = "#00589a") + | |||
| geom_point(data = filter(elder_dependency, Year == 2018), size = 4, color = "#eb1455") + | |||
| labs(y = "Dependency Ratio", x = NULL) + | |||
| theme_minimal(base_size = 24, base_family = "Fira Sans Condensed") + | |||
| scale_y_continuous(labels = scales::percent) + | |||
| gganimate::transition_states(Year, 1, 0, FALSE) + | |||
| gganimate::shadow_mark() | |||
| grkmisc::logger(msg = "Writing mp4 elder dependency animation") | |||
| anim_save( | |||
| here::here(ANIM_OUT, "pop-dependency.mp4"), | |||
| animate(ga_elder_dep, width = 1040*2, height = 480*2, res = 144, | |||
| fps = 18, nframes = 161, renderer = av_renderer()) | |||
| ) | |||
| grkmisc::logger(msg = "Writing gif elder dependency animation") | |||
| anim_save( | |||
| here::here(ANIM_OUT, "pop-dependency.gif"), | |||
| animate(ga_elder_dep, width = 1040*2, height = 480*2, res = 144, | |||
| fps = 18, nframes = 161, | |||
| renderer = gganimate::gifski_renderer(loop = FALSE)) | |||
| ) | |||
| elder_dep_together <- | |||
| elder_dependency %>% | |||
| ungroup() %>% | |||
| gather(timeseries, ratio, starts_with("depend")) %>% | |||
| select(-Adult:-`Older Adult`) %>% | |||
| mutate(timeseries = ifelse(timeseries == "dependency_elder", 0, 1)) | |||
| ga_dep_together <- | |||
| ggplot(elder_dep_together) + | |||
| aes(Year, ratio) + | |||
| geom_point(color = "#00589a") + | |||
| # geom_point(color = "#82c878") + | |||
| # geom_point(data = filter(., Year == 2018), size = 4, color = "#eb1455") + | |||
| labs(y = "Dependency Ratio", x = NULL) + | |||
| theme_minimal(base_size = 24, base_family = "Fira Sans Condensed") + | |||
| scale_y_continuous(labels = scales::percent) + | |||
| gganimate::transition_states(timeseries, 1, 0, FALSE) + | |||
| gganimate::view_follow(fixed_x = TRUE) + | |||
| gganimate::ease_aes("sine-in-out")+ | |||
| gganimate::shadow_mark() | |||
| grkmisc::logger(msg = "Writing mp4 all dependency animation") | |||
| anim_save( | |||
| here::here(ANIM_OUT, "pop-dependency-both.mp4"), | |||
| animate(ga_dep_together, width = 1040*2, height = 480*2, res = 144, | |||
| fps = 18, nframes = 80, | |||
| renderer = av_renderer()) | |||
| ) | |||
| grkmisc::logger(msg = "Writing gif all dependency animation") | |||
| anim_save( | |||
| here::here(ANIM_OUT, "pop-dependency-both.gif"), | |||
| animate(ga_dep_together, width = 1040*2, height = 480*2, res = 144, | |||
| fps = 18, nframes = 80, | |||
| renderer = gganimate::gifski_renderer(loop = FALSE)) | |||
| ) | |||
| @@ -1,357 +0,0 @@ | |||
| ## ----setup, include=FALSE------------------------------------------------ | |||
| knitr::opts_chunk$set(echo=FALSE) | |||
| ## ----library------------------------------------------------------------- | |||
| library(dplyr) | |||
| library(purrr) | |||
| library(tidyr) | |||
| library(stringr) | |||
| library(readr) | |||
| library(forcats) | |||
| library(gganimate) | |||
| DATA_DIR <- "data-raw" | |||
| ANIM_OUT <- "anim" | |||
| fs::dir_create(here::here(DATA_DIR)) | |||
| fs::dir_create(here::here(ANIM_OUT)) | |||
| DOWNLOAD_DATA <- TRUE | |||
| download_file <- function(filename, url, basedir) { | |||
| filename <- gsub(" ", "_", filename) | |||
| filename <- gsub("/", "-", filename) | |||
| fs::dir_create(basedir) | |||
| download.file(url, destfile = fs::path(basedir, filename)) | |||
| } | |||
| if (DOWNLOAD_DATA) { | |||
| ## ----download-popest-tables-1900-1980, eval=FALSE------------------------ | |||
| x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/") | |||
| xx <- | |||
| data_frame( | |||
| filename = rvest::html_nodes(x, "a") %>% rvest::html_text(), | |||
| url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href") | |||
| ) %>% | |||
| filter(str_detect(url, "\\.csv")) %>% | |||
| mutate(url = str_c( | |||
| "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/", | |||
| url | |||
| )) | |||
| pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980")) | |||
| ## ----download-popest-tables-1980-1990, eval=FALSE------------------------ | |||
| xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>% | |||
| rvest::html_nodes(".list.section .uscb-text-link") %>% | |||
| rvest::html_attrs() %>% | |||
| purrr::transpose() %>% | |||
| as_tibble() %>% | |||
| unnest() %>% | |||
| select(filename = name, url = href) %>% | |||
| mutate(url = paste0("https:", url)) %>% | |||
| pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990")) | |||
| ## ----download-popest-tables-1990-2000, eval=FALSE------------------------ | |||
| xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>% | |||
| rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>% | |||
| rvest::html_attrs() %>% | |||
| purrr::transpose() %>% | |||
| as_tibble() %>% | |||
| unnest() %>% | |||
| select(filename = name, url = href) %>% | |||
| mutate(url = paste0("https:", url), | |||
| filename = paste0(filename, ".csv")) %>% | |||
| pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000")) | |||
| ## ----download-popest-tables-2000-2010---- | |||
| ## "2000-2010/us-est00int-01.xls" | |||
| download_file( | |||
| "us-est00int-alldata-5yr.csv", | |||
| "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv", | |||
| here::here(DATA_DIR, "2000-2010") | |||
| ) | |||
| ## ----download-popest-tables-2010---- | |||
| ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv" | |||
| ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US | |||
| ## ----download-popest-projections | |||
| ## "np2017_d1.csv" | |||
| download_file( | |||
| "np2017_d1.csv", | |||
| "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv", | |||
| here::here(DATA_DIR) | |||
| ) | |||
| } | |||
| ## ----import-historical--------------------------------------------------- | |||
| pop <- list() | |||
| import_pre1980 <- function(file) { | |||
| # cli::cat_bullet(file) | |||
| year <- str_extract(file, "\\d{4}\\.csv") | |||
| year <- as.integer(sub("\\.csv", "", year)) | |||
| skip <- if (year >= 1960) 8 else 9 | |||
| x <- read_csv(file, col_names = FALSE, skip = skip, | |||
| col_types = cols_only( | |||
| X1 = col_guess(), X2 = col_guess(), | |||
| X3 = col_guess(), X4 = col_guess() | |||
| )) | |||
| colnames(x) <- c("Age", "Total", "Total Male", "Total Female") | |||
| x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ] | |||
| stopifnot(nrow(x) %in% c(76L, 86L)) | |||
| x %>% | |||
| mutate( | |||
| Year = year, | |||
| Age = sub("\\+", "", Age), | |||
| Age = as.integer(Age) | |||
| ) %>% | |||
| select(Year, everything()) | |||
| } | |||
| pop[["1900-1980"]] <- fs::dir_ls( | |||
| here::here(DATA_DIR, "1900-1980") | |||
| ) %>% | |||
| map_dfr(import_pre1980) | |||
| import_1980_1990 <- function(file) { | |||
| read_fwf( | |||
| file, | |||
| col_positions = fwf_positions( | |||
| start = c(3, 5, 7, 12, 22, 32), | |||
| end = c(4, 6, 9, 20, 30, 40), | |||
| col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female") | |||
| ), | |||
| skip = 0 | |||
| ) %>% | |||
| filter(Age < 101, Month == 4, Year == min(Year)) %>% | |||
| mutate(Year = Year + 1900) | |||
| } | |||
| pop[["1980-1990"]] <- fs::dir_ls( | |||
| here::here(DATA_DIR, "1980-1990"), | |||
| regexp = "TXT" | |||
| ) %>% | |||
| map_dfr(import_1980_1990) | |||
| import_1990_2000 <- function(file) { | |||
| x <- read_csv(file, col_names = FALSE, skip = 3) | |||
| colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female") | |||
| x %>% | |||
| filter( | |||
| !is.na(Total), | |||
| !str_detect(Age, "All") | |||
| ) %>% | |||
| mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>% | |||
| filter( | |||
| lubridate::month(Month) == 4 | |||
| ) %>% | |||
| mutate( | |||
| Year = lubridate::year(Month), | |||
| Age = str_remove(Age, "\\+"), | |||
| Age = as.integer(Age) | |||
| ) %>% | |||
| select(Year, everything(), -Month) | |||
| } | |||
| pop[["1990-2000"]] <- fs::dir_ls( | |||
| here::here(DATA_DIR, "1990-2000"), | |||
| regexp = "\\d{4}\\.csv" | |||
| ) %>% | |||
| map_dfr(import_1990_2000) | |||
| # pop[["2000-2010"]] <- read_csv( | |||
| # here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv") | |||
| # ) %>% | |||
| # select(Month = MONTH, Year = YEAR, Age = AGE, | |||
| # Total = TOT_POP, `Total Male` = TOT_MALE, | |||
| # `Total Female` = TOT_FEMALE) %>% | |||
| # filter(Year > 2000, Age <= 125) | |||
| pop[["2000-2010"]] <- readxl::read_xls( | |||
| here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"), | |||
| sheet = "Sheet1" | |||
| ) %>% | |||
| mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>% | |||
| select(-age_label) %>% | |||
| gather(Year, value, -1:-2) %>% | |||
| spread(Group, value) %>% | |||
| mutate(Year = as.integer(Year)) %>% | |||
| filter(Year > 2000) | |||
| pop[["2010-2017"]] <- readr::read_csv( | |||
| here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"), | |||
| skip = 1 | |||
| ) %>% | |||
| select(-1:-4) %>% | |||
| gather(key, value, -Sex) %>% | |||
| filter(str_detect(key, "Population Estimate")) %>% | |||
| mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>% | |||
| filter(!str_detect(key, "Total|Median")) %>% | |||
| separate(key, c("Year", "Age"), sep = ";") %>% | |||
| mutate( | |||
| Year = as.integer(Year), | |||
| Age = sub("+", "", Age, fixed = TRUE), | |||
| Age = as.integer(Age), | |||
| Sex = recode( | |||
| Sex, | |||
| "Both Sexes" = "Total", | |||
| "Female" = "Total Female", | |||
| "Male" = "Total Male" | |||
| ) | |||
| ) %>% | |||
| spread(Sex, value) | |||
| # pop[["2018"]] <- readr::read_tsv( | |||
| # here::here(DATA_DIR, "national-population-projections_2014-2060.txt") | |||
| # ) %>% | |||
| # select(-1) %>% | |||
| # filter(!is.na(Year)) %>% | |||
| # select(contains("Code"), contains("Pop")) %>% | |||
| # {colnames(.) <- sub(" Code", "", colnames(.)); .} %>% | |||
| # group_by(Year, Age) %>% | |||
| # mutate(Total = sum(`Projected Populations`)) %>% | |||
| # ungroup() %>% | |||
| # spread(Gender, `Projected Populations`) %>% | |||
| # mutate( | |||
| # Age = sub("+", "", Age, fixed = TRUE), | |||
| # Age = as.integer(Age) | |||
| # ) %>% | |||
| # filter(Year > 2017) | |||
| pop[["2018"]] <- | |||
| readr::read_csv( | |||
| here::here(DATA_DIR, "np2017_d1.csv") | |||
| ) %>% | |||
| gather(Age, n, -SEX:-YEAR) %>% | |||
| mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>% | |||
| spread(SEX, n) %>% | |||
| filter(Age != "TOTAL_POP") %>% | |||
| mutate( | |||
| Age = sub("POP_", "", Age, fixed = TRUE), | |||
| Age = as.integer(Age) | |||
| ) %>% | |||
| select(Year = YEAR, Age, starts_with("Total")) | |||
| ## ------------------------------------------------------------------------ | |||
| ga <- | |||
| pop %>% | |||
| bind_rows() %>% | |||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||
| group_by(age_group_int) %>% | |||
| mutate( | |||
| # age_group = paste(min(Age), max(Age), sep = " - "), | |||
| age_group = paste(min(Age)), | |||
| age_group = ifelse(Age >= 100, "100+", age_group) | |||
| ) %>% | |||
| ungroup() %>% | |||
| arrange(age_group_int, Year) %>% | |||
| mutate(age_group = fct_inorder(age_group)) %>% | |||
| select(-age_group_int) %>% | |||
| group_by(Year, age_group) %>% | |||
| summarize(Total = sum(Total)) %>% | |||
| group_by(Year) %>% | |||
| mutate(GrandTotal = sum(Total)) %>% | |||
| ungroup() %>% | |||
| mutate(Total = Total/GrandTotal) %>% | |||
| arrange(Year, age_group) %>% View() | |||
| # complete(Year, age_group, fill = list(Total = 0)) %>% | |||
| # filter(Year > 2000) %>% | |||
| { | |||
| ggplot(.) + | |||
| # aes(age_group, Total) + | |||
| # geom_vline(xintercept = c(1980, 1990), color = "grey50") + | |||
| # geom_line(aes(group = age_group)) + | |||
| # geom_point(size = 0.2) + | |||
| # geom_col(fill = "grey40") + | |||
| # facet_wrap(~ Year) + | |||
| # coord_flip() + | |||
| # scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) + | |||
| geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) + | |||
| geom_segment(aes(yend = age_group, y = age_group, xend = Total), | |||
| x = 0, | |||
| size = 5, | |||
| color = "grey40", | |||
| alpha = 0.25, | |||
| data = filter(., Year == min(Year)) %>% select(-Year) | |||
| ) + | |||
| # annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) + | |||
| theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") + | |||
| theme(panel.grid.major.y = element_blank()) + | |||
| scale_x_continuous(labels = scales::percent, expand = c(0, 0)) + | |||
| # labs(x = "Percent of Population", y = NULL) + | |||
| theme(axis.title.x = element_text(hjust = 0.5, size = 30)) + | |||
| labs(x = "{closest_state}", y = NULL) + | |||
| gganimate::transition_time(Year) + | |||
| gganimate::transition_states(Year, 1, 0, wrap = FALSE) + | |||
| gganimate::ease_aes("linear") + | |||
| gganimate::enter_fade() | |||
| } | |||
| anim_save( | |||
| here::here(ANIM_OUT, "pop-anim.mp4"), | |||
| animate(ga, width = 1040*2, height = 480*2, res = 144, | |||
| fps = 20, nframes = 161*2, | |||
| renderer = av_renderer()) | |||
| ) | |||
| pop %>% | |||
| bind_rows() %>% | |||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||
| select(1:3, age_group_int) %>% | |||
| filter(age_group_int > 5) %>% | |||
| mutate( | |||
| old = age_group_int >= 13, | |||
| old = c("Adult", "Older Adult")[old + 1] | |||
| ) %>% | |||
| group_by(Year, old) %>% | |||
| summarize(n = sum(Total)) %>% | |||
| spread(old, n) %>% | |||
| { | |||
| ggplot(.) + | |||
| aes(x = Year) + | |||
| geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) + | |||
| geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) + | |||
| geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange) | |||
| } | |||
| ga2 <- | |||
| pop %>% | |||
| bind_rows() %>% | |||
| mutate(age_group_int = as.integer(Age) %/% 5) %>% | |||
| select(1:3, age_group_int) %>% | |||
| filter(age_group_int >= 3) %>% | |||
| mutate( | |||
| old = age_group_int >= 13, | |||
| old = c("Adult", "Older Adult")[old + 1] | |||
| ) %>% | |||
| group_by(Year, old) %>% | |||
| summarize(n = sum(Total)) %>% | |||
| spread(old, n) %>% | |||
| mutate(ratio = `Older Adult` / Adult) %>% | |||
| { | |||
| ggplot(.) + | |||
| aes(Year, ratio) + | |||
| geom_point() + | |||
| geom_point(data = filter(., Year == 2018), color = "red") + | |||
| labs(y = "Dependency Ratio", x = NULL) + | |||
| theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") + | |||
| scale_y_continuous(labels = scales::percent) + | |||
| gganimate::transition_states(Year, 1, 0, FALSE) + | |||
| gganimate::shadow_mark() | |||
| } | |||
| anim_save( | |||
| here::here(ANIM_OUT, "population-projections/pop-dependency.mp4"), | |||
| animate(ga2, width = 1040*2, height = 480*2, res = 144, | |||
| fps = 18, nframes = 161, | |||
| renderer = av_renderer()) | |||
| ) | |||