7 years ago · c4a0dff494
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -0,0 +1 @@
 ^README\.Rmd$
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,75 @@
 ---- Default .gitignore From grkmisc ----
 .Rproj.user
 .Rhistory
 .RData
 .DS_Store

 # Directories that start with _
 _*/

 ## https://github.com/github/gitignore/blob/master/R.gitignore
 # History files
 .Rhistory
 .Rapp.history

 # Session Data files
 .RData

 # Example code in package build process
 *-Ex.R

 # Output files from R CMD build
 /*.tar.gz

 # Output files from R CMD check
 /*.Rcheck/

 # RStudio files
 .Rproj.user/

 # produced vignettes
 vignettes/*.html
 vignettes/*.pdf

 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 .httr-oauth

 # knitr and R markdown default cache directories
 /*_cache/
 /cache/

 # Temporary files created by R markdown
 *.utf8.md
 *.knit.md

 # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
 rsconnect/

 ## https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
 # General
 .DS_Store
 .AppleDouble
 .LSOverride

 # Icon must end with two \r
 Icon


 # Thumbnails
 ._*

 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent

 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
--- a/NEWS.md
+++ b/NEWS.md
@@ -0,0 +1,5 @@
 # News

 ## 2018-10-25

 Project Started
--- a/R/2018-10-21-population-animations.R
+++ b/R/2018-10-21-population-animations.R
@@ -0,0 +1,357 @@
 ## ----setup, include=FALSE------------------------------------------------
 knitr::opts_chunk$set(echo=FALSE)

 ## ----library-------------------------------------------------------------
 library(dplyr)
 library(purrr)
 library(tidyr)
 library(stringr)
 library(readr)
 library(forcats)
 library(gganimate)

 DATA_DIR <- "data-raw"
 ANIM_OUT <- "anim"

 fs::dir_create(here::here(DATA_DIR))
 fs::dir_create(here::here(ANIM_OUT))

 DOWNLOAD_DATA <- TRUE


 download_file <- function(filename, url, basedir) {
  filename <- gsub(" ", "_", filename)
  filename <- gsub("/", "-", filename)
  fs::dir_create(basedir)
  download.file(url, destfile = fs::path(basedir, filename))
 }

 if (DOWNLOAD_DATA) {
  ## ----download-popest-tables-1900-1980, eval=FALSE------------------------
  x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/")

  xx <-
    data_frame(
      filename = rvest::html_nodes(x, "a") %>% rvest::html_text(),
      url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href")
    ) %>%
    filter(str_detect(url, "\\.csv")) %>%
    mutate(url = str_c(
      "https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/",
      url
    ))

  pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980"))

  ## ----download-popest-tables-1980-1990, eval=FALSE------------------------
  xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>%
    rvest::html_nodes(".list.section .uscb-text-link") %>%
    rvest::html_attrs() %>%
    purrr::transpose() %>%
    as_tibble() %>%
    unnest() %>%
    select(filename = name, url = href) %>%
    mutate(url = paste0("https:", url)) %>%
    pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990"))

  ## ----download-popest-tables-1990-2000, eval=FALSE------------------------
  xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>%
    rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>%
    rvest::html_attrs() %>%
    purrr::transpose() %>%
    as_tibble() %>%
    unnest() %>%
    select(filename = name, url = href) %>%
    mutate(url = paste0("https:", url),
           filename = paste0(filename, ".csv")) %>%
    pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000"))

  ## ----download-popest-tables-2000-2010----
  ## "2000-2010/us-est00int-01.xls"
  download_file(
    "us-est00int-alldata-5yr.csv",
    "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv",
    here::here(DATA_DIR, "2000-2010")
  )


  ## ----download-popest-tables-2010----
  ## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"
  ## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US

  ## ----download-popest-projections
  ## "np2017_d1.csv"
  download_file(
    "np2017_d1.csv",
    "https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv",
    here::here(DATA_DIR)
  )
 }

 ## ----import-historical---------------------------------------------------
 pop <- list()

 import_pre1980 <- function(file) {
  # cli::cat_bullet(file)
  year <- str_extract(file, "\\d{4}\\.csv")
  year <- as.integer(sub("\\.csv", "", year))
  skip <- if (year >= 1960) 8 else 9
  x <- read_csv(file, col_names = FALSE, skip = skip,
                col_types = cols_only(
                  X1 = col_guess(), X2 = col_guess(),
                  X3 = col_guess(), X4 = col_guess()

                ))
  colnames(x) <- c("Age", "Total", "Total Male", "Total Female")
  x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ]
  stopifnot(nrow(x) %in% c(76L, 86L))
  x %>%
    mutate(
      Year = year,
      Age = sub("\\+", "", Age),
      Age = as.integer(Age)
    ) %>%
    select(Year, everything())
 }

 pop[["1900-1980"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1900-1980")
  ) %>%
  map_dfr(import_pre1980)

 import_1980_1990 <- function(file) {
  read_fwf(
    file,
    col_positions = fwf_positions(
      start = c(3, 5, 7, 12, 22, 32),
      end = c(4, 6, 9, 20, 30, 40),
      col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female")
    ),
    skip = 0
  ) %>%
    filter(Age < 101, Month == 4, Year == min(Year)) %>%
    mutate(Year = Year + 1900)
 }

 pop[["1980-1990"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1980-1990"),
  regexp = "TXT"
 ) %>%
  map_dfr(import_1980_1990)

 import_1990_2000 <- function(file) {
  x <- read_csv(file, col_names = FALSE, skip = 3)
  colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female")

  x %>%
    filter(
      !is.na(Total),
      !str_detect(Age, "All")
    ) %>%
    mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>%
    filter(
      lubridate::month(Month) == 4
    ) %>%
    mutate(
      Year = lubridate::year(Month),
      Age = str_remove(Age, "\\+"),
      Age = as.integer(Age)
    ) %>%
    select(Year, everything(), -Month)
 }

 pop[["1990-2000"]] <- fs::dir_ls(
  here::here(DATA_DIR, "1990-2000"),
  regexp = "\\d{4}\\.csv"
 ) %>%
  map_dfr(import_1990_2000)

 # pop[["2000-2010"]] <-  read_csv(
 #   here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv")
 # ) %>%
 #   select(Month = MONTH, Year = YEAR, Age = AGE,
 #          Total = TOT_POP, `Total Male` = TOT_MALE,
 #          `Total Female` = TOT_FEMALE) %>%
 #   filter(Year > 2000, Age <= 125)

 pop[["2000-2010"]] <-  readxl::read_xls(
  here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"),
  sheet = "Sheet1"
 ) %>%
  mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>%
  select(-age_label) %>%
  gather(Year, value, -1:-2) %>%
  spread(Group, value) %>%
  mutate(Year = as.integer(Year)) %>%
  filter(Year > 2000)

 pop[["2010-2017"]] <- readr::read_csv(
  here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"),
  skip = 1
 ) %>%
  select(-1:-4) %>%
  gather(key, value, -Sex) %>%
  filter(str_detect(key, "Population Estimate")) %>%
  mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>%
  filter(!str_detect(key, "Total|Median")) %>%
  separate(key, c("Year", "Age"), sep = ";") %>%
  mutate(
    Year = as.integer(Year),
    Age = sub("+", "", Age, fixed = TRUE),
    Age = as.integer(Age),
    Sex = recode(
      Sex,
      "Both Sexes" = "Total",
      "Female" = "Total Female",
      "Male" = "Total Male"
    )
  ) %>%
  spread(Sex, value)

 # pop[["2018"]] <- readr::read_tsv(
 #   here::here(DATA_DIR, "national-population-projections_2014-2060.txt")
 # ) %>%
 #   select(-1) %>%
 #   filter(!is.na(Year)) %>%
 #   select(contains("Code"), contains("Pop")) %>%
 #   {colnames(.) <- sub(" Code", "", colnames(.)); .} %>%
 #   group_by(Year, Age) %>%
 #   mutate(Total = sum(`Projected Populations`)) %>%
 #   ungroup() %>%
 #   spread(Gender, `Projected Populations`) %>%
 #   mutate(
 #     Age = sub("+", "", Age, fixed = TRUE),
 #     Age = as.integer(Age)
 #   ) %>%
 #   filter(Year > 2017)

 pop[["2018"]] <-
  readr::read_csv(
    here::here(DATA_DIR, "np2017_d1.csv")
  ) %>%
    gather(Age, n, -SEX:-YEAR) %>%
    mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>%
    spread(SEX, n) %>%
    filter(Age != "TOTAL_POP") %>%
    mutate(
      Age = sub("POP_", "", Age, fixed = TRUE),
      Age = as.integer(Age)
    ) %>%
    select(Year = YEAR, Age, starts_with("Total"))



 ## ------------------------------------------------------------------------
 ga <-
  pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  group_by(age_group_int) %>%
  mutate(
    # age_group = paste(min(Age), max(Age), sep = " - "),
    age_group = paste(min(Age)),
    age_group = ifelse(Age >= 100, "100+", age_group)
  ) %>%
  ungroup() %>%
  arrange(age_group_int, Year) %>%
  mutate(age_group = fct_inorder(age_group)) %>%
  select(-age_group_int) %>%
  group_by(Year, age_group) %>%
  summarize(Total = sum(Total)) %>%
  group_by(Year) %>%
  mutate(GrandTotal  = sum(Total)) %>%
  ungroup() %>%
  mutate(Total = Total/GrandTotal) %>%
  arrange(Year, age_group) %>% View()
  # complete(Year, age_group, fill = list(Total = 0)) %>%
  # filter(Year > 2000) %>%
  {
    ggplot(.) +
      # aes(age_group, Total) +
      # geom_vline(xintercept = c(1980, 1990), color = "grey50") +
      # geom_line(aes(group = age_group)) +
      # geom_point(size = 0.2) +
      # geom_col(fill = "grey40") +
      # facet_wrap(~ Year) +
      # coord_flip() +
      # scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) +
      geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) +
      geom_segment(aes(yend = age_group, y = age_group, xend = Total),
                   x = 0,
                   size = 5,
                   color = "grey40",
                   alpha = 0.25,
                   data = filter(., Year == min(Year)) %>% select(-Year)
      ) +
      # annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) +
      theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
      theme(panel.grid.major.y = element_blank()) +
      scale_x_continuous(labels = scales::percent, expand = c(0, 0)) +
      # labs(x = "Percent of Population", y = NULL) +
      theme(axis.title.x = element_text(hjust = 0.5, size = 30)) +
      labs(x = "{closest_state}", y = NULL) +
      gganimate::transition_time(Year) +
      gganimate::transition_states(Year, 1, 0, wrap = FALSE) +
      gganimate::ease_aes("linear") +
      gganimate::enter_fade()
  }

 anim_save(
  here::here(ANIM_OUT, "pop-anim.mp4"),
  animate(ga, width = 1040*2, height = 480*2, res = 144,
          fps = 20, nframes = 161*2,
          renderer = av_renderer())
 )

 pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  select(1:3, age_group_int) %>%
  filter(age_group_int > 5) %>%
  mutate(
    old = age_group_int >= 13,
    old = c("Adult", "Older Adult")[old + 1]
  ) %>%
  group_by(Year, old) %>%
  summarize(n = sum(Total)) %>%
  spread(old, n) %>%
  {
    ggplot(.) +
      aes(x = Year) +
      geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) +
      geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) +
      geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange)
  }

 ga2 <-
  pop %>%
  bind_rows() %>%
  mutate(age_group_int = as.integer(Age) %/% 5) %>%
  select(1:3, age_group_int) %>%
  filter(age_group_int >= 3) %>%
  mutate(
    old = age_group_int >= 13,
    old = c("Adult", "Older Adult")[old + 1]
  ) %>%
  group_by(Year, old) %>%
  summarize(n = sum(Total)) %>%
  spread(old, n) %>%
  mutate(ratio = `Older Adult` / Adult) %>%
  {
    ggplot(.) +
      aes(Year, ratio) +
      geom_point() +
      geom_point(data = filter(., Year == 2018), color = "red") +
      labs(y = "Dependency Ratio", x = NULL) +
      theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
      scale_y_continuous(labels = scales::percent) +
      gganimate::transition_states(Year, 1, 0, FALSE) +
      gganimate::shadow_mark()
  }

 anim_save(
  here::here(ANIM_OUT, "population-projections/pop-dependency.mp4"),
  animate(ga2, width = 1040*2, height = 480*2, res = 144,
          fps = 18, nframes = 161,
          renderer = av_renderer())
 )
--- a/README.Rmd
+++ b/README.Rmd
@@ -0,0 +1,31 @@
 ---
 output: github_document
 ---

 <!-- README.md is generated from README.Rmd. Please edit that file -->

 ```{r setup, include = FALSE}
 knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
 )
 ```
 # us-popest-animation

 The goal of us-popest-animation is to ...

 What is special about using `README.Rmd` instead of just `README.md`? You can include R chunks like so:

 ```{r cars}
 summary(cars)
 ```

 You'll still need to render `README.Rmd` regularly, to keep `README.md` up-to-date.

 You can also embed plots, for example:

 ```{r pressure, echo = FALSE}
 plot(pressure)
 ```

 In that case, don't forget to commit and push the resulting figure files, so they display on GitHub!
--- a/us-popest-animation.Rproj
+++ b/us-popest-animation.Rproj
@@ -0,0 +1,21 @@
 Version: 1.0

 RestoreWorkspace: No
 SaveWorkspace: No
 AlwaysSaveHistory: Default

 EnableCodeIndexing: Yes
 UseSpacesForTab: Yes
 NumSpacesForTab: 2
 Encoding: UTF-8

 RnwWeave: Sweave
 LaTeX: pdfLaTeX

 AutoAppendNewline: Yes
 StripTrailingWhitespace: Yes

 BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
 PackageRoxygenize: rd,collate,namespace