|
|
|
@@ -0,0 +1,357 @@ |
|
|
|
## ----setup, include=FALSE------------------------------------------------ |
|
|
|
knitr::opts_chunk$set(echo=FALSE) |
|
|
|
|
|
|
|
## ----library------------------------------------------------------------- |
|
|
|
library(dplyr) |
|
|
|
library(purrr) |
|
|
|
library(tidyr) |
|
|
|
library(stringr) |
|
|
|
library(readr) |
|
|
|
library(forcats) |
|
|
|
library(gganimate) |
|
|
|
|
|
|
|
DATA_DIR <- "data-raw" |
|
|
|
ANIM_OUT <- "anim" |
|
|
|
|
|
|
|
fs::dir_create(here::here(DATA_DIR)) |
|
|
|
fs::dir_create(here::here(ANIM_OUT)) |
|
|
|
|
|
|
|
DOWNLOAD_DATA <- TRUE |
|
|
|
|
|
|
|
|
|
|
|
download_file <- function(filename, url, basedir) { |
|
|
|
filename <- gsub(" ", "_", filename) |
|
|
|
filename <- gsub("/", "-", filename) |
|
|
|
fs::dir_create(basedir) |
|
|
|
download.file(url, destfile = fs::path(basedir, filename)) |
|
|
|
} |
|
|
|
|
|
|
|
if (DOWNLOAD_DATA) { |
|
|
|
## ----download-popest-tables-1900-1980, eval=FALSE------------------------ |
|
|
|
x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/") |
|
|
|
|
|
|
|
xx <- |
|
|
|
data_frame( |
|
|
|
filename = rvest::html_nodes(x, "a") %>% rvest::html_text(), |
|
|
|
url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href") |
|
|
|
) %>% |
|
|
|
filter(str_detect(url, "\\.csv")) %>% |
|
|
|
mutate(url = str_c( |
|
|
|
"https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/", |
|
|
|
url |
|
|
|
)) |
|
|
|
|
|
|
|
pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980")) |
|
|
|
|
|
|
|
## ----download-popest-tables-1980-1990, eval=FALSE------------------------ |
|
|
|
xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>% |
|
|
|
rvest::html_nodes(".list.section .uscb-text-link") %>% |
|
|
|
rvest::html_attrs() %>% |
|
|
|
purrr::transpose() %>% |
|
|
|
as_tibble() %>% |
|
|
|
unnest() %>% |
|
|
|
select(filename = name, url = href) %>% |
|
|
|
mutate(url = paste0("https:", url)) %>% |
|
|
|
pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990")) |
|
|
|
|
|
|
|
## ----download-popest-tables-1990-2000, eval=FALSE------------------------ |
|
|
|
xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>% |
|
|
|
rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>% |
|
|
|
rvest::html_attrs() %>% |
|
|
|
purrr::transpose() %>% |
|
|
|
as_tibble() %>% |
|
|
|
unnest() %>% |
|
|
|
select(filename = name, url = href) %>% |
|
|
|
mutate(url = paste0("https:", url), |
|
|
|
filename = paste0(filename, ".csv")) %>% |
|
|
|
pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000")) |
|
|
|
|
|
|
|
## ----download-popest-tables-2000-2010---- |
|
|
|
## "2000-2010/us-est00int-01.xls" |
|
|
|
download_file( |
|
|
|
"us-est00int-alldata-5yr.csv", |
|
|
|
"https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv", |
|
|
|
here::here(DATA_DIR, "2000-2010") |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
## ----download-popest-tables-2010---- |
|
|
|
## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv" |
|
|
|
## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US |
|
|
|
|
|
|
|
## ----download-popest-projections |
|
|
|
## "np2017_d1.csv" |
|
|
|
download_file( |
|
|
|
"np2017_d1.csv", |
|
|
|
"https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv", |
|
|
|
here::here(DATA_DIR) |
|
|
|
) |
|
|
|
} |
|
|
|
|
|
|
|
## ----import-historical--------------------------------------------------- |
|
|
|
pop <- list() |
|
|
|
|
|
|
|
import_pre1980 <- function(file) { |
|
|
|
# cli::cat_bullet(file) |
|
|
|
year <- str_extract(file, "\\d{4}\\.csv") |
|
|
|
year <- as.integer(sub("\\.csv", "", year)) |
|
|
|
skip <- if (year >= 1960) 8 else 9 |
|
|
|
x <- read_csv(file, col_names = FALSE, skip = skip, |
|
|
|
col_types = cols_only( |
|
|
|
X1 = col_guess(), X2 = col_guess(), |
|
|
|
X3 = col_guess(), X4 = col_guess() |
|
|
|
|
|
|
|
)) |
|
|
|
colnames(x) <- c("Age", "Total", "Total Male", "Total Female") |
|
|
|
x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ] |
|
|
|
stopifnot(nrow(x) %in% c(76L, 86L)) |
|
|
|
x %>% |
|
|
|
mutate( |
|
|
|
Year = year, |
|
|
|
Age = sub("\\+", "", Age), |
|
|
|
Age = as.integer(Age) |
|
|
|
) %>% |
|
|
|
select(Year, everything()) |
|
|
|
} |
|
|
|
|
|
|
|
pop[["1900-1980"]] <- fs::dir_ls( |
|
|
|
here::here(DATA_DIR, "1900-1980") |
|
|
|
) %>% |
|
|
|
map_dfr(import_pre1980) |
|
|
|
|
|
|
|
import_1980_1990 <- function(file) { |
|
|
|
read_fwf( |
|
|
|
file, |
|
|
|
col_positions = fwf_positions( |
|
|
|
start = c(3, 5, 7, 12, 22, 32), |
|
|
|
end = c(4, 6, 9, 20, 30, 40), |
|
|
|
col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female") |
|
|
|
), |
|
|
|
skip = 0 |
|
|
|
) %>% |
|
|
|
filter(Age < 101, Month == 4, Year == min(Year)) %>% |
|
|
|
mutate(Year = Year + 1900) |
|
|
|
} |
|
|
|
|
|
|
|
pop[["1980-1990"]] <- fs::dir_ls( |
|
|
|
here::here(DATA_DIR, "1980-1990"), |
|
|
|
regexp = "TXT" |
|
|
|
) %>% |
|
|
|
map_dfr(import_1980_1990) |
|
|
|
|
|
|
|
import_1990_2000 <- function(file) { |
|
|
|
x <- read_csv(file, col_names = FALSE, skip = 3) |
|
|
|
colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female") |
|
|
|
|
|
|
|
x %>% |
|
|
|
filter( |
|
|
|
!is.na(Total), |
|
|
|
!str_detect(Age, "All") |
|
|
|
) %>% |
|
|
|
mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>% |
|
|
|
filter( |
|
|
|
lubridate::month(Month) == 4 |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
Year = lubridate::year(Month), |
|
|
|
Age = str_remove(Age, "\\+"), |
|
|
|
Age = as.integer(Age) |
|
|
|
) %>% |
|
|
|
select(Year, everything(), -Month) |
|
|
|
} |
|
|
|
|
|
|
|
pop[["1990-2000"]] <- fs::dir_ls( |
|
|
|
here::here(DATA_DIR, "1990-2000"), |
|
|
|
regexp = "\\d{4}\\.csv" |
|
|
|
) %>% |
|
|
|
map_dfr(import_1990_2000) |
|
|
|
|
|
|
|
# pop[["2000-2010"]] <- read_csv( |
|
|
|
# here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv") |
|
|
|
# ) %>% |
|
|
|
# select(Month = MONTH, Year = YEAR, Age = AGE, |
|
|
|
# Total = TOT_POP, `Total Male` = TOT_MALE, |
|
|
|
# `Total Female` = TOT_FEMALE) %>% |
|
|
|
# filter(Year > 2000, Age <= 125) |
|
|
|
|
|
|
|
pop[["2000-2010"]] <- readxl::read_xls( |
|
|
|
here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"), |
|
|
|
sheet = "Sheet1" |
|
|
|
) %>% |
|
|
|
mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>% |
|
|
|
select(-age_label) %>% |
|
|
|
gather(Year, value, -1:-2) %>% |
|
|
|
spread(Group, value) %>% |
|
|
|
mutate(Year = as.integer(Year)) %>% |
|
|
|
filter(Year > 2000) |
|
|
|
|
|
|
|
pop[["2010-2017"]] <- readr::read_csv( |
|
|
|
here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"), |
|
|
|
skip = 1 |
|
|
|
) %>% |
|
|
|
select(-1:-4) %>% |
|
|
|
gather(key, value, -Sex) %>% |
|
|
|
filter(str_detect(key, "Population Estimate")) %>% |
|
|
|
mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>% |
|
|
|
filter(!str_detect(key, "Total|Median")) %>% |
|
|
|
separate(key, c("Year", "Age"), sep = ";") %>% |
|
|
|
mutate( |
|
|
|
Year = as.integer(Year), |
|
|
|
Age = sub("+", "", Age, fixed = TRUE), |
|
|
|
Age = as.integer(Age), |
|
|
|
Sex = recode( |
|
|
|
Sex, |
|
|
|
"Both Sexes" = "Total", |
|
|
|
"Female" = "Total Female", |
|
|
|
"Male" = "Total Male" |
|
|
|
) |
|
|
|
) %>% |
|
|
|
spread(Sex, value) |
|
|
|
|
|
|
|
# pop[["2018"]] <- readr::read_tsv( |
|
|
|
# here::here(DATA_DIR, "national-population-projections_2014-2060.txt") |
|
|
|
# ) %>% |
|
|
|
# select(-1) %>% |
|
|
|
# filter(!is.na(Year)) %>% |
|
|
|
# select(contains("Code"), contains("Pop")) %>% |
|
|
|
# {colnames(.) <- sub(" Code", "", colnames(.)); .} %>% |
|
|
|
# group_by(Year, Age) %>% |
|
|
|
# mutate(Total = sum(`Projected Populations`)) %>% |
|
|
|
# ungroup() %>% |
|
|
|
# spread(Gender, `Projected Populations`) %>% |
|
|
|
# mutate( |
|
|
|
# Age = sub("+", "", Age, fixed = TRUE), |
|
|
|
# Age = as.integer(Age) |
|
|
|
# ) %>% |
|
|
|
# filter(Year > 2017) |
|
|
|
|
|
|
|
pop[["2018"]] <- |
|
|
|
readr::read_csv( |
|
|
|
here::here(DATA_DIR, "np2017_d1.csv") |
|
|
|
) %>% |
|
|
|
gather(Age, n, -SEX:-YEAR) %>% |
|
|
|
mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>% |
|
|
|
spread(SEX, n) %>% |
|
|
|
filter(Age != "TOTAL_POP") %>% |
|
|
|
mutate( |
|
|
|
Age = sub("POP_", "", Age, fixed = TRUE), |
|
|
|
Age = as.integer(Age) |
|
|
|
) %>% |
|
|
|
select(Year = YEAR, Age, starts_with("Total")) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## ------------------------------------------------------------------------ |
|
|
|
ga <- |
|
|
|
pop %>% |
|
|
|
bind_rows() %>% |
|
|
|
mutate(age_group_int = as.integer(Age) %/% 5) %>% |
|
|
|
group_by(age_group_int) %>% |
|
|
|
mutate( |
|
|
|
# age_group = paste(min(Age), max(Age), sep = " - "), |
|
|
|
age_group = paste(min(Age)), |
|
|
|
age_group = ifelse(Age >= 100, "100+", age_group) |
|
|
|
) %>% |
|
|
|
ungroup() %>% |
|
|
|
arrange(age_group_int, Year) %>% |
|
|
|
mutate(age_group = fct_inorder(age_group)) %>% |
|
|
|
select(-age_group_int) %>% |
|
|
|
group_by(Year, age_group) %>% |
|
|
|
summarize(Total = sum(Total)) %>% |
|
|
|
group_by(Year) %>% |
|
|
|
mutate(GrandTotal = sum(Total)) %>% |
|
|
|
ungroup() %>% |
|
|
|
mutate(Total = Total/GrandTotal) %>% |
|
|
|
arrange(Year, age_group) %>% View() |
|
|
|
# complete(Year, age_group, fill = list(Total = 0)) %>% |
|
|
|
# filter(Year > 2000) %>% |
|
|
|
{ |
|
|
|
ggplot(.) + |
|
|
|
# aes(age_group, Total) + |
|
|
|
# geom_vline(xintercept = c(1980, 1990), color = "grey50") + |
|
|
|
# geom_line(aes(group = age_group)) + |
|
|
|
# geom_point(size = 0.2) + |
|
|
|
# geom_col(fill = "grey40") + |
|
|
|
# facet_wrap(~ Year) + |
|
|
|
# coord_flip() + |
|
|
|
# scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) + |
|
|
|
geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) + |
|
|
|
geom_segment(aes(yend = age_group, y = age_group, xend = Total), |
|
|
|
x = 0, |
|
|
|
size = 5, |
|
|
|
color = "grey40", |
|
|
|
alpha = 0.25, |
|
|
|
data = filter(., Year == min(Year)) %>% select(-Year) |
|
|
|
) + |
|
|
|
# annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) + |
|
|
|
theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") + |
|
|
|
theme(panel.grid.major.y = element_blank()) + |
|
|
|
scale_x_continuous(labels = scales::percent, expand = c(0, 0)) + |
|
|
|
# labs(x = "Percent of Population", y = NULL) + |
|
|
|
theme(axis.title.x = element_text(hjust = 0.5, size = 30)) + |
|
|
|
labs(x = "{closest_state}", y = NULL) + |
|
|
|
gganimate::transition_time(Year) + |
|
|
|
gganimate::transition_states(Year, 1, 0, wrap = FALSE) + |
|
|
|
gganimate::ease_aes("linear") + |
|
|
|
gganimate::enter_fade() |
|
|
|
} |
|
|
|
|
|
|
|
anim_save( |
|
|
|
here::here(ANIM_OUT, "pop-anim.mp4"), |
|
|
|
animate(ga, width = 1040*2, height = 480*2, res = 144, |
|
|
|
fps = 20, nframes = 161*2, |
|
|
|
renderer = av_renderer()) |
|
|
|
) |
|
|
|
|
|
|
|
pop %>% |
|
|
|
bind_rows() %>% |
|
|
|
mutate(age_group_int = as.integer(Age) %/% 5) %>% |
|
|
|
select(1:3, age_group_int) %>% |
|
|
|
filter(age_group_int > 5) %>% |
|
|
|
mutate( |
|
|
|
old = age_group_int >= 13, |
|
|
|
old = c("Adult", "Older Adult")[old + 1] |
|
|
|
) %>% |
|
|
|
group_by(Year, old) %>% |
|
|
|
summarize(n = sum(Total)) %>% |
|
|
|
spread(old, n) %>% |
|
|
|
{ |
|
|
|
ggplot(.) + |
|
|
|
aes(x = Year) + |
|
|
|
geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) + |
|
|
|
geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) + |
|
|
|
geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange) |
|
|
|
} |
|
|
|
|
|
|
|
ga2 <- |
|
|
|
pop %>% |
|
|
|
bind_rows() %>% |
|
|
|
mutate(age_group_int = as.integer(Age) %/% 5) %>% |
|
|
|
select(1:3, age_group_int) %>% |
|
|
|
filter(age_group_int >= 3) %>% |
|
|
|
mutate( |
|
|
|
old = age_group_int >= 13, |
|
|
|
old = c("Adult", "Older Adult")[old + 1] |
|
|
|
) %>% |
|
|
|
group_by(Year, old) %>% |
|
|
|
summarize(n = sum(Total)) %>% |
|
|
|
spread(old, n) %>% |
|
|
|
mutate(ratio = `Older Adult` / Adult) %>% |
|
|
|
{ |
|
|
|
ggplot(.) + |
|
|
|
aes(Year, ratio) + |
|
|
|
geom_point() + |
|
|
|
geom_point(data = filter(., Year == 2018), color = "red") + |
|
|
|
labs(y = "Dependency Ratio", x = NULL) + |
|
|
|
theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") + |
|
|
|
scale_y_continuous(labels = scales::percent) + |
|
|
|
gganimate::transition_states(Year, 1, 0, FALSE) + |
|
|
|
gganimate::shadow_mark() |
|
|
|
} |
|
|
|
|
|
|
|
anim_save( |
|
|
|
here::here(ANIM_OUT, "population-projections/pop-dependency.mp4"), |
|
|
|
animate(ga2, width = 1040*2, height = 480*2, res = 144, |
|
|
|
fps = 18, nframes = 161, |
|
|
|
renderer = av_renderer()) |
|
|
|
) |