Przeglądaj źródła

Initial commit

master
Garrick Aden-Buie 7 lat temu
commit
c4a0dff494
6 zmienionych plików z 490 dodań i 0 usunięć
  1. +1
    -0
      .Rbuildignore
  2. +75
    -0
      .gitignore
  3. +5
    -0
      NEWS.md
  4. +357
    -0
      R/2018-10-21-population-animations.R
  5. +31
    -0
      README.Rmd
  6. +21
    -0
      us-popest-animation.Rproj

+ 1
- 0
.Rbuildignore Wyświetl plik

@@ -0,0 +1 @@
^README\.Rmd$

+ 75
- 0
.gitignore Wyświetl plik

@@ -0,0 +1,75 @@
---- Default .gitignore From grkmisc ----
.Rproj.user
.Rhistory
.RData
.DS_Store

# Directories that start with _
_*/

## https://github.com/github/gitignore/blob/master/R.gitignore
# History files
.Rhistory
.Rapp.history

# Session Data files
.RData

# Example code in package build process
*-Ex.R

# Output files from R CMD build
/*.tar.gz

# Output files from R CMD check
/*.Rcheck/

# RStudio files
.Rproj.user/

# produced vignettes
vignettes/*.html
vignettes/*.pdf

# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth

# knitr and R markdown default cache directories
/*_cache/
/cache/

# Temporary files created by R markdown
*.utf8.md
*.knit.md

# Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
rsconnect/

## https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon


# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

+ 5
- 0
NEWS.md Wyświetl plik

@@ -0,0 +1,5 @@
# News

## 2018-10-25

Project Started

+ 357
- 0
R/2018-10-21-population-animations.R Wyświetl plik

@@ -0,0 +1,357 @@
## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo=FALSE)

## ----library-------------------------------------------------------------
library(dplyr)
library(purrr)
library(tidyr)
library(stringr)
library(readr)
library(forcats)
library(gganimate)

DATA_DIR <- "data-raw"
ANIM_OUT <- "anim"

fs::dir_create(here::here(DATA_DIR))
fs::dir_create(here::here(ANIM_OUT))

DOWNLOAD_DATA <- TRUE


download_file <- function(filename, url, basedir) {
filename <- gsub(" ", "_", filename)
filename <- gsub("/", "-", filename)
fs::dir_create(basedir)
download.file(url, destfile = fs::path(basedir, filename))
}

if (DOWNLOAD_DATA) {
## ----download-popest-tables-1900-1980, eval=FALSE------------------------
x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/")

xx <-
data_frame(
filename = rvest::html_nodes(x, "a") %>% rvest::html_text(),
url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href")
) %>%
filter(str_detect(url, "\\.csv")) %>%
mutate(url = str_c(
"https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/",
url
))

pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980"))

## ----download-popest-tables-1980-1990, eval=FALSE------------------------
xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>%
rvest::html_nodes(".list.section .uscb-text-link") %>%
rvest::html_attrs() %>%
purrr::transpose() %>%
as_tibble() %>%
unnest() %>%
select(filename = name, url = href) %>%
mutate(url = paste0("https:", url)) %>%
pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990"))

## ----download-popest-tables-1990-2000, eval=FALSE------------------------
xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>%
rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>%
rvest::html_attrs() %>%
purrr::transpose() %>%
as_tibble() %>%
unnest() %>%
select(filename = name, url = href) %>%
mutate(url = paste0("https:", url),
filename = paste0(filename, ".csv")) %>%
pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000"))

## ----download-popest-tables-2000-2010----
## "2000-2010/us-est00int-01.xls"
download_file(
"us-est00int-alldata-5yr.csv",
"https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv",
here::here(DATA_DIR, "2000-2010")
)


## ----download-popest-tables-2010----
## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"
## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US

## ----download-popest-projections
## "np2017_d1.csv"
download_file(
"np2017_d1.csv",
"https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv",
here::here(DATA_DIR)
)
}

## ----import-historical---------------------------------------------------
pop <- list()

import_pre1980 <- function(file) {
# cli::cat_bullet(file)
year <- str_extract(file, "\\d{4}\\.csv")
year <- as.integer(sub("\\.csv", "", year))
skip <- if (year >= 1960) 8 else 9
x <- read_csv(file, col_names = FALSE, skip = skip,
col_types = cols_only(
X1 = col_guess(), X2 = col_guess(),
X3 = col_guess(), X4 = col_guess()

))
colnames(x) <- c("Age", "Total", "Total Male", "Total Female")
x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ]
stopifnot(nrow(x) %in% c(76L, 86L))
x %>%
mutate(
Year = year,
Age = sub("\\+", "", Age),
Age = as.integer(Age)
) %>%
select(Year, everything())
}

pop[["1900-1980"]] <- fs::dir_ls(
here::here(DATA_DIR, "1900-1980")
) %>%
map_dfr(import_pre1980)

import_1980_1990 <- function(file) {
read_fwf(
file,
col_positions = fwf_positions(
start = c(3, 5, 7, 12, 22, 32),
end = c(4, 6, 9, 20, 30, 40),
col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female")
),
skip = 0
) %>%
filter(Age < 101, Month == 4, Year == min(Year)) %>%
mutate(Year = Year + 1900)
}

pop[["1980-1990"]] <- fs::dir_ls(
here::here(DATA_DIR, "1980-1990"),
regexp = "TXT"
) %>%
map_dfr(import_1980_1990)

import_1990_2000 <- function(file) {
x <- read_csv(file, col_names = FALSE, skip = 3)
colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female")

x %>%
filter(
!is.na(Total),
!str_detect(Age, "All")
) %>%
mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>%
filter(
lubridate::month(Month) == 4
) %>%
mutate(
Year = lubridate::year(Month),
Age = str_remove(Age, "\\+"),
Age = as.integer(Age)
) %>%
select(Year, everything(), -Month)
}

pop[["1990-2000"]] <- fs::dir_ls(
here::here(DATA_DIR, "1990-2000"),
regexp = "\\d{4}\\.csv"
) %>%
map_dfr(import_1990_2000)

# pop[["2000-2010"]] <- read_csv(
# here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv")
# ) %>%
# select(Month = MONTH, Year = YEAR, Age = AGE,
# Total = TOT_POP, `Total Male` = TOT_MALE,
# `Total Female` = TOT_FEMALE) %>%
# filter(Year > 2000, Age <= 125)

pop[["2000-2010"]] <- readxl::read_xls(
here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"),
sheet = "Sheet1"
) %>%
mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>%
select(-age_label) %>%
gather(Year, value, -1:-2) %>%
spread(Group, value) %>%
mutate(Year = as.integer(Year)) %>%
filter(Year > 2000)

pop[["2010-2017"]] <- readr::read_csv(
here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"),
skip = 1
) %>%
select(-1:-4) %>%
gather(key, value, -Sex) %>%
filter(str_detect(key, "Population Estimate")) %>%
mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>%
filter(!str_detect(key, "Total|Median")) %>%
separate(key, c("Year", "Age"), sep = ";") %>%
mutate(
Year = as.integer(Year),
Age = sub("+", "", Age, fixed = TRUE),
Age = as.integer(Age),
Sex = recode(
Sex,
"Both Sexes" = "Total",
"Female" = "Total Female",
"Male" = "Total Male"
)
) %>%
spread(Sex, value)

# pop[["2018"]] <- readr::read_tsv(
# here::here(DATA_DIR, "national-population-projections_2014-2060.txt")
# ) %>%
# select(-1) %>%
# filter(!is.na(Year)) %>%
# select(contains("Code"), contains("Pop")) %>%
# {colnames(.) <- sub(" Code", "", colnames(.)); .} %>%
# group_by(Year, Age) %>%
# mutate(Total = sum(`Projected Populations`)) %>%
# ungroup() %>%
# spread(Gender, `Projected Populations`) %>%
# mutate(
# Age = sub("+", "", Age, fixed = TRUE),
# Age = as.integer(Age)
# ) %>%
# filter(Year > 2017)

pop[["2018"]] <-
readr::read_csv(
here::here(DATA_DIR, "np2017_d1.csv")
) %>%
gather(Age, n, -SEX:-YEAR) %>%
mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>%
spread(SEX, n) %>%
filter(Age != "TOTAL_POP") %>%
mutate(
Age = sub("POP_", "", Age, fixed = TRUE),
Age = as.integer(Age)
) %>%
select(Year = YEAR, Age, starts_with("Total"))



## ------------------------------------------------------------------------
ga <-
pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
group_by(age_group_int) %>%
mutate(
# age_group = paste(min(Age), max(Age), sep = " - "),
age_group = paste(min(Age)),
age_group = ifelse(Age >= 100, "100+", age_group)
) %>%
ungroup() %>%
arrange(age_group_int, Year) %>%
mutate(age_group = fct_inorder(age_group)) %>%
select(-age_group_int) %>%
group_by(Year, age_group) %>%
summarize(Total = sum(Total)) %>%
group_by(Year) %>%
mutate(GrandTotal = sum(Total)) %>%
ungroup() %>%
mutate(Total = Total/GrandTotal) %>%
arrange(Year, age_group) %>% View()
# complete(Year, age_group, fill = list(Total = 0)) %>%
# filter(Year > 2000) %>%
{
ggplot(.) +
# aes(age_group, Total) +
# geom_vline(xintercept = c(1980, 1990), color = "grey50") +
# geom_line(aes(group = age_group)) +
# geom_point(size = 0.2) +
# geom_col(fill = "grey40") +
# facet_wrap(~ Year) +
# coord_flip() +
# scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) +
geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) +
geom_segment(aes(yend = age_group, y = age_group, xend = Total),
x = 0,
size = 5,
color = "grey40",
alpha = 0.25,
data = filter(., Year == min(Year)) %>% select(-Year)
) +
# annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) +
theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
theme(panel.grid.major.y = element_blank()) +
scale_x_continuous(labels = scales::percent, expand = c(0, 0)) +
# labs(x = "Percent of Population", y = NULL) +
theme(axis.title.x = element_text(hjust = 0.5, size = 30)) +
labs(x = "{closest_state}", y = NULL) +
gganimate::transition_time(Year) +
gganimate::transition_states(Year, 1, 0, wrap = FALSE) +
gganimate::ease_aes("linear") +
gganimate::enter_fade()
}

anim_save(
here::here(ANIM_OUT, "pop-anim.mp4"),
animate(ga, width = 1040*2, height = 480*2, res = 144,
fps = 20, nframes = 161*2,
renderer = av_renderer())
)

pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
select(1:3, age_group_int) %>%
filter(age_group_int > 5) %>%
mutate(
old = age_group_int >= 13,
old = c("Adult", "Older Adult")[old + 1]
) %>%
group_by(Year, old) %>%
summarize(n = sum(Total)) %>%
spread(old, n) %>%
{
ggplot(.) +
aes(x = Year) +
geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) +
geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) +
geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange)
}

ga2 <-
pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
select(1:3, age_group_int) %>%
filter(age_group_int >= 3) %>%
mutate(
old = age_group_int >= 13,
old = c("Adult", "Older Adult")[old + 1]
) %>%
group_by(Year, old) %>%
summarize(n = sum(Total)) %>%
spread(old, n) %>%
mutate(ratio = `Older Adult` / Adult) %>%
{
ggplot(.) +
aes(Year, ratio) +
geom_point() +
geom_point(data = filter(., Year == 2018), color = "red") +
labs(y = "Dependency Ratio", x = NULL) +
theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
scale_y_continuous(labels = scales::percent) +
gganimate::transition_states(Year, 1, 0, FALSE) +
gganimate::shadow_mark()
}

anim_save(
here::here(ANIM_OUT, "population-projections/pop-dependency.mp4"),
animate(ga2, width = 1040*2, height = 480*2, res = 144,
fps = 18, nframes = 161,
renderer = av_renderer())
)

+ 31
- 0
README.Rmd Wyświetl plik

@@ -0,0 +1,31 @@
---
output: github_document
---

<!-- README.md is generated from README.Rmd. Please edit that file -->

```{r setup, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```
# us-popest-animation

The goal of us-popest-animation is to ...

What is special about using `README.Rmd` instead of just `README.md`? You can include R chunks like so:

```{r cars}
summary(cars)
```

You'll still need to render `README.Rmd` regularly, to keep `README.md` up-to-date.

You can also embed plots, for example:

```{r pressure, echo = FALSE}
plot(pressure)
```

In that case, don't forget to commit and push the resulting figure files, so they display on GitHub!

+ 21
- 0
us-popest-animation.Rproj Wyświetl plik

@@ -0,0 +1,21 @@
Version: 1.0

RestoreWorkspace: No
SaveWorkspace: No
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace

Ładowanie…
Anuluj
Zapisz