před 7 roky · 15498ec765
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1 +0,0 @@
 ^README\.Rmd$
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +0,0 @@
 # News

 ## 2018-10-25

 Project Started
--- a/R/00_download-data.R
+++ b/R/00_download-data.R
@@ -0,0 +1,99 @@
 library(dplyr)
 library(purrr)
 library(tidyr)
 library(stringr)
 library(readr)
 library(forcats)
 library(gganimate)

 DATA_DIR <- "data-raw"

 fs::dir_create(here::here(DATA_DIR))

 DOWNLOAD_DATA <- if (!exists(DOWNLOAD_DATA)) TRUE else DOWNLOAD_DATA

 download_file <- function(filename, url, basedir) {
  filename <- gsub(" ", "_", filename)
  filename <- gsub("/", "-", filename)
  fs::dir_create(basedir)
  download.file(url, destfile = fs::path(basedir, filename))
 }

 if (DOWNLOAD_DATA) {
  ## ----download-popest-tables-1900-1980, eval=FALSE------------------------
  x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/")

  xx <-
    data_frame(
      filename = rvest::html_nodes(x, "a") %>% rvest::html_text(),
      url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href")
    ) %>%
    filter(str_detect(url, "\\.csv")) %>%
    mutate(url = str_c(
      "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/",
      url
    ))

  pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980"))

  ## ----download-popest-tables-1980-1990, eval=FALSE------------------------
  xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>%
    rvest::html_nodes(".list.section .uscb-text-link") %>%
    rvest::html_attrs() %>%
    purrr::transpose() %>%
    as_tibble() %>%
    unnest() %>%
    filter(str_detect(href, "rqi\\.zip")) %>%
    select(filename = name, url = href) %>%
    mutate(
      url = paste0("https:", url),
      filename = basename(url)
    ) %>%
    pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990"))

  withr::with_dir(here::here(DATA_DIR, "1980-1990"), {
    fs::dir_ls(regexp = "zip") %>%
      purrr::walk(unzip)
  })

  ## ----download-popest-tables-1990-2000, eval=FALSE------------------------
  xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>%
    rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>%
    rvest::html_attrs() %>%
    purrr::transpose() %>%
    as_tibble() %>%
    unnest() %>%
    select(filename = name, url = href) %>%
    mutate(url = paste0("https:", url),
           filename = paste0(filename, ".csv")) %>%
    pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000"))

  ## ----download-popest-tables-2000-2010----
  ## "2000-2010/us-est00int-01.xls"
  download_file(
    "us-est00int-alldata-5yr.csv",
    "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv",
    here::here(DATA_DIR, "2000-2010")
  )


  ## ----download-popest-tables-2010----
  ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"
  ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US
  fs::dir_create(here::here(DATA_DIR, "2010-2017"))
  if (!fs::file_exists(here::here(DATA_DIR, "2010-2017", "PEP_2017_PEPSYASEXN_with_ann.csv"))) {
    rlang::abort(paste(
      "Download the Annual Estimates of Resident Population by Single Year of Age from:\n",
      "https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US\n",
      "and extract `PEP_2017_PEPSYASEXN_with_ann.csv` into:", here::here(DATA_DIR, "2010-2017")
    ))
  }

  ## ----download-popest-projections
  ## "np2017_d1.csv"
  download_file(
    "np2017_d1.csv",
    "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv",
    here::here(DATA_DIR)
  )
 }
--- a/R/01_process-data.R
+++ b/R/01_process-data.R
@@ -0,0 +1,181 @@
 library(dplyr)
 library(purrr)
 library(tidyr)
 library(stringr)
 library(readr)
 library(readxl)
 library(fs)
 # library(here)
 library(lubridate)
 library(forcats)

 DATA_DIR <- "data-raw"

 pop <- list()

 import_pre1980 <- function(file) {
  # cli::cat_bullet(file)
  year <- str_extract(file, "\\d{4}\\.csv")
  year <- as.integer(sub("\\.csv", "", year))
  skip <- if (year >= 1960) 8 else 9
  x <- read_csv(file, col_names = FALSE, skip = skip,
                col_types = cols_only(
                  X1 = col_guess(), X2 = col_guess(),
                  X3 = col_guess(), X4 = col_guess()

                ))
  colnames(x) <- c("Age", "Total", "Total Male", "Total Female")
  x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ]
  stopifnot(nrow(x) %in% c(76L, 86L))
  x %>%
    mutate(
      Year = year,
      Age = sub("\\+", "", Age),
      Age = as.integer(Age)
    ) %>%
    select(Year, everything())
 }

 pop[["1900-1980"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1900-1980")
 ) %>%
  map_dfr(import_pre1980)

 import_1980_1990 <- function(file) {
  read_fwf(
    file,
    col_positions = fwf_positions(
      start = c(3, 5, 7, 12, 22, 32),
      end = c(4, 6, 9, 20, 30, 40),
      col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female")
    ),
    skip = 0
  ) %>%
    filter(Age < 101, Month == 4, Year == min(Year)) %>%
    mutate(Year = Year + 1900)
 }

 pop[["1980-1990"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1980-1990"),
  regexp = "TXT"
 ) %>%
  map_dfr(import_1980_1990)

 import_1990_2000 <- function(file) {
  x <- read_csv(file, col_names = FALSE, skip = 3)
  colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female")

  x %>%
    filter(
      !is.na(Total),
      !str_detect(Age, "All")
    ) %>%
    mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>%
    filter(
      lubridate::month(Month) == 4
    ) %>%
    mutate(
      Year = lubridate::year(Month),
      Age = str_remove(Age, "\\+"),
      Age = as.integer(Age)
    ) %>%
    select(Year, everything(), -Month)
 }

 pop[["1990-2000"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1990-2000"),
  regexp = "\\d{4}\\.csv"
 ) %>%
  map_dfr(import_1990_2000)

 # pop[["2000-2010"]] <-  read_csv(
 #   here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv")
 # ) %>%
 #   select(Month = MONTH, Year = YEAR, Age = AGE,
 #          Total = TOT_POP, `Total Male` = TOT_MALE,
 #          `Total Female` = TOT_FEMALE) %>%
 #   filter(Year > 2000, Age <= 125)

 # pop[["2000-2010"]] <-  readxl::read_xls(
 #   here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"),
 #   sheet = "Sheet1"
 # ) %>%
 #   mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>%
 #   select(-age_label) %>%
 #   gather(Year, value, -1:-2) %>%
 #   spread(Group, value) %>%
 #   mutate(Year = as.integer(Year)) %>%
 #   filter(Year > 2000)

 pop[["2000-2010"]] <-
  readr::read_csv(
    here::here(DATA_DIR, "2000-2010", "us-est00int-alldata-5yr.csv")
  ) %>%
    select(
      Year = year,
      Age = AGEGRP,
      Total = TOT_POP,
      `Total Male` = TOT_MALE,
      `Total Female` = TOT_FEMALE
    ) %>%
    filter(Age != 0, Year > 2000, Year < 2010) %>%
    mutate(Age = (Age - 1) * 5)

 pop[["2010-2017"]] <- readr::read_csv(
  here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"),
  skip = 1
 ) %>%
  select(-1:-4) %>%
  gather(key, value, -Sex) %>%
  filter(str_detect(key, "Population Estimate")) %>%
  mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>%
  filter(!str_detect(key, "Total|Median")) %>%
  separate(key, c("Year", "Age"), sep = ";") %>%
  mutate(
    Year = as.integer(Year),
    Age = sub("+", "", Age, fixed = TRUE),
    Age = as.integer(Age),
    Sex = recode(
      Sex,
      "Both Sexes" = "Total",
      "Female" = "Total Female",
      "Male" = "Total Male"
    )
  ) %>%
  filter(Year < 2016) %>%
  spread(Sex, value)

 # pop[["2018"]] <- readr::read_tsv(
 #   here::here(DATA_DIR, "national-population-projections_2014-2060.txt")
 # ) %>%
 #   select(-1) %>%
 #   filter(!is.na(Year)) %>%
 #   select(contains("Code"), contains("Pop")) %>%
 #   {colnames(.) <- sub(" Code", "", colnames(.)); .} %>%
 #   group_by(Year, Age) %>%
 #   mutate(Total = sum(`Projected Populations`)) %>%
 #   ungroup() %>%
 #   spread(Gender, `Projected Populations`) %>%
 #   mutate(
 #     Age = sub("+", "", Age, fixed = TRUE),
 #     Age = as.integer(Age)
 #   ) %>%
 #   filter(Year > 2017)

 pop[["2018"]] <-
  readr::read_csv(
    here::here(DATA_DIR, "np2017_d1.csv")
  ) %>%
  filter(ORIGIN == 0, RACE == 0) %>%
  gather(Age, n, -SEX:-YEAR) %>%
  mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>%
  spread(SEX, n) %>%
  filter(Age != "TOTAL_POP") %>%
  mutate(
    Age = sub("POP_", "", Age, fixed = TRUE),
    Age = as.integer(Age)
  ) %>%
  select(Year = YEAR, Age, starts_with("Total"))

 fs::dir_create(here::here("data"))
 saveRDS(pop, here::here("data", "pop_estimates_by_age.rds"))
--- a/R/02_population-animations.R
+++ b/R/02_population-animations.R
@@ -0,0 +1,188 @@
 ## ----setup, include=FALSE------------------------------------------------
 knitr::opts_chunk$set(echo=FALSE)

 ## ----library-------------------------------------------------------------
 library(dplyr)
 library(purrr)
 library(tidyr)
 library(stringr)
 library(readr)
 library(forcats)
 library(gganimate)

 DATA_DIR <- "data-raw"
 ANIM_OUT <- "anim"
 fs::dir_create(here::here(ANIM_OUT))

 pop <- readRDS(here::here("data", "pop_estimates_by_age.rds"))

 ## ------------------------------------------------------------------------
 ga <-
  pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  group_by(age_group_int) %>%
  mutate(
    # age_group = paste(min(Age), max(Age), sep = " - "),
    age_group = paste(min(Age)),
    age_group = ifelse(Age >= 100, "100+", age_group)
  ) %>%
  ungroup() %>%
  arrange(age_group_int, Year) %>%
  mutate(age_group = fct_inorder(age_group)) %>%
  select(-age_group_int) %>%
  group_by(Year, age_group) %>%
  summarize(Total = sum(Total)) %>%
  group_by(Year) %>%
  mutate(GrandTotal  = sum(Total)) %>%
  ungroup() %>%
  mutate(Total = Total/GrandTotal) %>%
  arrange(Year, age_group) %>%
  # complete(Year, age_group, fill = list(Total = 0)) %>%
  # filter(Year > 2000) %>%
  {
    ggplot(.) +
      # aes(age_group, Total) +
      # geom_vline(xintercept = c(1980, 1990), color = "grey50") +
      # geom_line(aes(group = age_group)) +
      # geom_point(size = 0.2) +
      # geom_col(fill = "grey40") +
      # facet_wrap(~ Year) +
      # coord_flip() +
      # scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) +
      geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) +
      geom_segment(aes(yend = age_group, y = age_group, xend = Total),
                   x = 0,
                   size = 5,
                   color = "grey40",
                   alpha = 0.25,
                   data = filter(., Year == min(Year)) %>% select(-Year)
      ) +
      # annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) +
      theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
      theme(panel.grid.major.y = element_blank()) +
      scale_x_continuous(labels = scales::percent, expand = c(0, 0, 0.05, 0)) +
      # labs(x = "Percent of Population", y = NULL) +
      theme(axis.title.x = element_text(hjust = 0.5, size = 30)) +
      labs(x = "{closest_state}", y = NULL) +
      gganimate::transition_time(Year) +
      gganimate::transition_states(Year, 1, 0, wrap = FALSE) +
      gganimate::ease_aes("linear") +
      gganimate::enter_fade()
  }

 grkmisc::logger(msg = "Writing MP4 population bar animation")
 anim_save(
  here::here(ANIM_OUT, "pop-anim.mp4"),
  animate(ga, width = 1040*2, height = 480*2, res = 144,
          fps = 20, nframes = 161*2, renderer = av_renderer())
 )
 grkmisc::logger(msg = "Writing gif population bar animation")
 anim_save(
  here::here(ANIM_OUT, "pop-anim.gif"),
  animate(ga, width = 1040*2, height = 480*2, res = 144,
          fps = 20, nframes = 161*2,
          renderer = gganimate::gifski_renderer(loop = FALSE))
 )

 pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  select(1:3, age_group_int) %>%
  filter(age_group_int > 5) %>%
  mutate(
    old = age_group_int >= 13,
    old = c("Adult", "Older Adult")[old + 1]
  ) %>%
  group_by(Year, old) %>%
  summarize(n = sum(Total)) %>%
  spread(old, n) %>%
  {
    ggplot(.) +
      aes(x = Year) +
      geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) +
      geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) +
      geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange, size = 2)
  } %>%
  invisible()

 elder_dependency <-
  pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  select(1:3, age_group_int) %>%
  mutate(
    old = case_when(
      age_group_int < 3 ~ "Children",
      age_group_int >= 13 ~ "Older Adult",
      TRUE ~ "Adult"
    )
  ) %>%
  group_by(Year, old) %>%
  summarize(n = sum(Total)) %>%
  spread(old, n) %>%
  mutate(
    dependency_elder = `Older Adult` / Adult,
    dependency_all = (`Older Adult` + Children) / Adult
  )

 ga_elder_dep <-
  ggplot(elder_dependency) +
  aes(Year, dependency_elder) +
  geom_point(color = "#00589a") +
  geom_point(data = filter(elder_dependency, Year == 2018), size = 4, color = "#eb1455") +
  labs(y = "Dependency Ratio", x = NULL) +
  theme_minimal(base_size = 24, base_family = "Fira Sans Condensed") +
  scale_y_continuous(labels = scales::percent) +
  gganimate::transition_states(Year, 1, 0, FALSE) +
  gganimate::shadow_mark()

 grkmisc::logger(msg = "Writing mp4 elder dependency animation")
 anim_save(
  here::here(ANIM_OUT, "pop-dependency.mp4"),
  animate(ga_elder_dep, width = 1040*2, height = 480*2, res = 144,
          fps = 18, nframes = 161, renderer = av_renderer())
 )
 grkmisc::logger(msg = "Writing gif elder dependency animation")
 anim_save(
  here::here(ANIM_OUT, "pop-dependency.gif"),
  animate(ga_elder_dep, width = 1040*2, height = 480*2, res = 144,
          fps = 18, nframes = 161,
          renderer = gganimate::gifski_renderer(loop = FALSE))
 )

 elder_dep_together <-
  elder_dependency %>%
  ungroup() %>%
  gather(timeseries, ratio, starts_with("depend")) %>%
  select(-Adult:-`Older Adult`) %>%
  mutate(timeseries = ifelse(timeseries == "dependency_elder", 0, 1))

 ga_dep_together <-
  ggplot(elder_dep_together) +
  aes(Year, ratio) +
  geom_point(color = "#00589a") +
  # geom_point(color = "#82c878") +
  # geom_point(data = filter(., Year == 2018), size = 4, color = "#eb1455") +
  labs(y = "Dependency Ratio", x = NULL) +
  theme_minimal(base_size = 24, base_family = "Fira Sans Condensed") +
  scale_y_continuous(labels = scales::percent) +
  gganimate::transition_states(timeseries, 1, 0, FALSE) +
  gganimate::view_follow(fixed_x = TRUE) +
  gganimate::ease_aes("sine-in-out")+
  gganimate::shadow_mark()

 grkmisc::logger(msg = "Writing mp4 all dependency animation")
 anim_save(
  here::here(ANIM_OUT, "pop-dependency-both.mp4"),
  animate(ga_dep_together, width = 1040*2, height = 480*2, res = 144,
          fps = 18, nframes = 80,
          renderer = av_renderer())
 )
 grkmisc::logger(msg = "Writing gif all dependency animation")
 anim_save(
  here::here(ANIM_OUT, "pop-dependency-both.gif"),
  animate(ga_dep_together, width = 1040*2, height = 480*2, res = 144,
          fps = 18, nframes = 80,
          renderer = gganimate::gifski_renderer(loop = FALSE))
 )
--- a/R/2018-10-21-population-animations.R
+++ b/R/2018-10-21-population-animations.R
@@ -1,357 +0,0 @@
 ## ----setup, include=FALSE------------------------------------------------
 knitr::opts_chunk$set(echo=FALSE)

 ## ----library-------------------------------------------------------------
 library(dplyr)
 library(purrr)
 library(tidyr)
 library(stringr)
 library(readr)
 library(forcats)
 library(gganimate)

 DATA_DIR <- "data-raw"
 ANIM_OUT <- "anim"

 fs::dir_create(here::here(DATA_DIR))
 fs::dir_create(here::here(ANIM_OUT))

 DOWNLOAD_DATA <- TRUE


 download_file <- function(filename, url, basedir) {
  filename <- gsub(" ", "_", filename)
  filename <- gsub("/", "-", filename)
  fs::dir_create(basedir)
  download.file(url, destfile = fs::path(basedir, filename))
 }

 if (DOWNLOAD_DATA) {
  ## ----download-popest-tables-1900-1980, eval=FALSE------------------------
  x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/")

  xx <-
    data_frame(
      filename = rvest::html_nodes(x, "a") %>% rvest::html_text(),
      url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href")
    ) %>%
    filter(str_detect(url, "\\.csv")) %>%
    mutate(url = str_c(
      "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/",
      url
    ))

  pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980"))

  ## ----download-popest-tables-1980-1990, eval=FALSE------------------------
  xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>%
    rvest::html_nodes(".list.section .uscb-text-link") %>%
    rvest::html_attrs() %>%
    purrr::transpose() %>%
    as_tibble() %>%
    unnest() %>%
    select(filename = name, url = href) %>%
    mutate(url = paste0("https:", url)) %>%
    pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990"))

  ## ----download-popest-tables-1990-2000, eval=FALSE------------------------
  xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>%
    rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>%
    rvest::html_attrs() %>%
    purrr::transpose() %>%
    as_tibble() %>%
    unnest() %>%
    select(filename = name, url = href) %>%
    mutate(url = paste0("https:", url),
           filename = paste0(filename, ".csv")) %>%
    pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000"))

  ## ----download-popest-tables-2000-2010----
  ## "2000-2010/us-est00int-01.xls"
  download_file(
    "us-est00int-alldata-5yr.csv",
    "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv",
    here::here(DATA_DIR, "2000-2010")
  )


  ## ----download-popest-tables-2010----
  ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"
  ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US

  ## ----download-popest-projections
  ## "np2017_d1.csv"
  download_file(
    "np2017_d1.csv",
    "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv",
    here::here(DATA_DIR)
  )
 }

 ## ----import-historical---------------------------------------------------
 pop <- list()

 import_pre1980 <- function(file) {
  # cli::cat_bullet(file)
  year <- str_extract(file, "\\d{4}\\.csv")
  year <- as.integer(sub("\\.csv", "", year))
  skip <- if (year >= 1960) 8 else 9
  x <- read_csv(file, col_names = FALSE, skip = skip,
                col_types = cols_only(
                  X1 = col_guess(), X2 = col_guess(),
                  X3 = col_guess(), X4 = col_guess()

                ))
  colnames(x) <- c("Age", "Total", "Total Male", "Total Female")
  x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ]
  stopifnot(nrow(x) %in% c(76L, 86L))
  x %>%
    mutate(
      Year = year,
      Age = sub("\\+", "", Age),
      Age = as.integer(Age)
    ) %>%
    select(Year, everything())
 }

 pop[["1900-1980"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1900-1980")
  ) %>%
  map_dfr(import_pre1980)

 import_1980_1990 <- function(file) {
  read_fwf(
    file,
    col_positions = fwf_positions(
      start = c(3, 5, 7, 12, 22, 32),
      end = c(4, 6, 9, 20, 30, 40),
      col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female")
    ),
    skip = 0
  ) %>%
    filter(Age < 101, Month == 4, Year == min(Year)) %>%
    mutate(Year = Year + 1900)
 }

 pop[["1980-1990"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1980-1990"),
  regexp = "TXT"
 ) %>%
  map_dfr(import_1980_1990)

 import_1990_2000 <- function(file) {
  x <- read_csv(file, col_names = FALSE, skip = 3)
  colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female")

  x %>%
    filter(
      !is.na(Total),
      !str_detect(Age, "All")
    ) %>%
    mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>%
    filter(
      lubridate::month(Month) == 4
    ) %>%
    mutate(
      Year = lubridate::year(Month),
      Age = str_remove(Age, "\\+"),
      Age = as.integer(Age)
    ) %>%
    select(Year, everything(), -Month)
 }

 pop[["1990-2000"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1990-2000"),
  regexp = "\\d{4}\\.csv"
 ) %>%
  map_dfr(import_1990_2000)

 # pop[["2000-2010"]] <-  read_csv(
 #   here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv")
 # ) %>%
 #   select(Month = MONTH, Year = YEAR, Age = AGE,
 #          Total = TOT_POP, `Total Male` = TOT_MALE,
 #          `Total Female` = TOT_FEMALE) %>%
 #   filter(Year > 2000, Age <= 125)

 pop[["2000-2010"]] <-  readxl::read_xls(
  here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"),
  sheet = "Sheet1"
 ) %>%
  mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>%
  select(-age_label) %>%
  gather(Year, value, -1:-2) %>%
  spread(Group, value) %>%
  mutate(Year = as.integer(Year)) %>%
  filter(Year > 2000)

 pop[["2010-2017"]] <- readr::read_csv(
  here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"),
  skip = 1
 ) %>%
  select(-1:-4) %>%
  gather(key, value, -Sex) %>%
  filter(str_detect(key, "Population Estimate")) %>%
  mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>%
  filter(!str_detect(key, "Total|Median")) %>%
  separate(key, c("Year", "Age"), sep = ";") %>%
  mutate(
    Year = as.integer(Year),
    Age = sub("+", "", Age, fixed = TRUE),
    Age = as.integer(Age),
    Sex = recode(
      Sex,
      "Both Sexes" = "Total",
      "Female" = "Total Female",
      "Male" = "Total Male"
    )
  ) %>%
  spread(Sex, value)

 # pop[["2018"]] <- readr::read_tsv(
 #   here::here(DATA_DIR, "national-population-projections_2014-2060.txt")
 # ) %>%
 #   select(-1) %>%
 #   filter(!is.na(Year)) %>%
 #   select(contains("Code"), contains("Pop")) %>%
 #   {colnames(.) <- sub(" Code", "", colnames(.)); .} %>%
 #   group_by(Year, Age) %>%
 #   mutate(Total = sum(`Projected Populations`)) %>%
 #   ungroup() %>%
 #   spread(Gender, `Projected Populations`) %>%
 #   mutate(
 #     Age = sub("+", "", Age, fixed = TRUE),
 #     Age = as.integer(Age)
 #   ) %>%
 #   filter(Year > 2017)

 pop[["2018"]] <-
  readr::read_csv(
    here::here(DATA_DIR, "np2017_d1.csv")
  ) %>%
    gather(Age, n, -SEX:-YEAR) %>%
    mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>%
    spread(SEX, n) %>%
    filter(Age != "TOTAL_POP") %>%
    mutate(
      Age = sub("POP_", "", Age, fixed = TRUE),
      Age = as.integer(Age)
    ) %>%
    select(Year = YEAR, Age, starts_with("Total"))



 ## ------------------------------------------------------------------------
 ga <-
  pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  group_by(age_group_int) %>%
  mutate(
    # age_group = paste(min(Age), max(Age), sep = " - "),
    age_group = paste(min(Age)),
    age_group = ifelse(Age >= 100, "100+", age_group)
  ) %>%
  ungroup() %>%
  arrange(age_group_int, Year) %>%
  mutate(age_group = fct_inorder(age_group)) %>%
  select(-age_group_int) %>%
  group_by(Year, age_group) %>%
  summarize(Total = sum(Total)) %>%
  group_by(Year) %>%
  mutate(GrandTotal  = sum(Total)) %>%
  ungroup() %>%
  mutate(Total = Total/GrandTotal) %>%
  arrange(Year, age_group) %>% View()
  # complete(Year, age_group, fill = list(Total = 0)) %>%
  # filter(Year > 2000) %>%
  {
    ggplot(.) +
      # aes(age_group, Total) +
      # geom_vline(xintercept = c(1980, 1990), color = "grey50") +
      # geom_line(aes(group = age_group)) +
      # geom_point(size = 0.2) +
      # geom_col(fill = "grey40") +
      # facet_wrap(~ Year) +
      # coord_flip() +
      # scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) +
      geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) +
      geom_segment(aes(yend = age_group, y = age_group, xend = Total),
                   x = 0,
                   size = 5,
                   color = "grey40",
                   alpha = 0.25,
                   data = filter(., Year == min(Year)) %>% select(-Year)
      ) +
      # annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) +
      theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
      theme(panel.grid.major.y = element_blank()) +
      scale_x_continuous(labels = scales::percent, expand = c(0, 0)) +
      # labs(x = "Percent of Population", y = NULL) +
      theme(axis.title.x = element_text(hjust = 0.5, size = 30)) +
      labs(x = "{closest_state}", y = NULL) +
      gganimate::transition_time(Year) +
      gganimate::transition_states(Year, 1, 0, wrap = FALSE) +
      gganimate::ease_aes("linear") +
      gganimate::enter_fade()
  }

 anim_save(
  here::here(ANIM_OUT, "pop-anim.mp4"),
  animate(ga, width = 1040*2, height = 480*2, res = 144,
          fps = 20, nframes = 161*2,
          renderer = av_renderer())
 )

 pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  select(1:3, age_group_int) %>%
  filter(age_group_int > 5) %>%
  mutate(
    old = age_group_int >= 13,
    old = c("Adult", "Older Adult")[old + 1]
  ) %>%
  group_by(Year, old) %>%
  summarize(n = sum(Total)) %>%
  spread(old, n) %>%
  {
    ggplot(.) +
      aes(x = Year) +
      geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) +
      geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) +
      geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange)
  }

 ga2 <-
  pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  select(1:3, age_group_int) %>%
  filter(age_group_int >= 3) %>%
  mutate(
    old = age_group_int >= 13,
    old = c("Adult", "Older Adult")[old + 1]
  ) %>%
  group_by(Year, old) %>%
  summarize(n = sum(Total)) %>%
  spread(old, n) %>%
  mutate(ratio = `Older Adult` / Adult) %>%
  {
    ggplot(.) +
      aes(Year, ratio) +
      geom_point() +
      geom_point(data = filter(., Year == 2018), color = "red") +
      labs(y = "Dependency Ratio", x = NULL) +
      theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
      scale_y_continuous(labels = scales::percent) +
      gganimate::transition_states(Year, 1, 0, FALSE) +
      gganimate::shadow_mark()
  }

 anim_save(
  here::here(ANIM_OUT, "population-projections/pop-dependency.mp4"),
  animate(ga2, width = 1040*2, height = 480*2, res = 144,
          fps = 18, nframes = 161,
          renderer = av_renderer())
 )