Garrick Aden-Buie пре 7 година
родитељ
комит
15498ec765
6 измењених фајлова са 468 додато и 363 уклоњено
  1. +0
    -1
      .Rbuildignore
  2. +0
    -5
      NEWS.md
  3. +99
    -0
      R/00_download-data.R
  4. +181
    -0
      R/01_process-data.R
  5. +188
    -0
      R/02_population-animations.R
  6. +0
    -357
      R/2018-10-21-population-animations.R

+ 0
- 1
.Rbuildignore Прегледај датотеку

@@ -1 +0,0 @@
^README\.Rmd$

+ 0
- 5
NEWS.md Прегледај датотеку

@@ -1,5 +0,0 @@
# News

## 2018-10-25

Project Started

+ 99
- 0
R/00_download-data.R Прегледај датотеку

@@ -0,0 +1,99 @@
library(dplyr)
library(purrr)
library(tidyr)
library(stringr)
library(readr)
library(forcats)
library(gganimate)

DATA_DIR <- "data-raw"

fs::dir_create(here::here(DATA_DIR))

DOWNLOAD_DATA <- if (!exists(DOWNLOAD_DATA)) TRUE else DOWNLOAD_DATA

download_file <- function(filename, url, basedir) {
filename <- gsub(" ", "_", filename)
filename <- gsub("/", "-", filename)
fs::dir_create(basedir)
download.file(url, destfile = fs::path(basedir, filename))
}

if (DOWNLOAD_DATA) {
## ----download-popest-tables-1900-1980, eval=FALSE------------------------
x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/")

xx <-
data_frame(
filename = rvest::html_nodes(x, "a") %>% rvest::html_text(),
url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href")
) %>%
filter(str_detect(url, "\\.csv")) %>%
mutate(url = str_c(
"https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/",
url
))

pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980"))

## ----download-popest-tables-1980-1990, eval=FALSE------------------------
xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>%
rvest::html_nodes(".list.section .uscb-text-link") %>%
rvest::html_attrs() %>%
purrr::transpose() %>%
as_tibble() %>%
unnest() %>%
filter(str_detect(href, "rqi\\.zip")) %>%
select(filename = name, url = href) %>%
mutate(
url = paste0("https:", url),
filename = basename(url)
) %>%
pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990"))

withr::with_dir(here::here(DATA_DIR, "1980-1990"), {
fs::dir_ls(regexp = "zip") %>%
purrr::walk(unzip)
})

## ----download-popest-tables-1990-2000, eval=FALSE------------------------
xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>%
rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>%
rvest::html_attrs() %>%
purrr::transpose() %>%
as_tibble() %>%
unnest() %>%
select(filename = name, url = href) %>%
mutate(url = paste0("https:", url),
filename = paste0(filename, ".csv")) %>%
pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000"))

## ----download-popest-tables-2000-2010----
## "2000-2010/us-est00int-01.xls"
download_file(
"us-est00int-alldata-5yr.csv",
"https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv",
here::here(DATA_DIR, "2000-2010")
)


## ----download-popest-tables-2010----
## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"
## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US
fs::dir_create(here::here(DATA_DIR, "2010-2017"))
if (!fs::file_exists(here::here(DATA_DIR, "2010-2017", "PEP_2017_PEPSYASEXN_with_ann.csv"))) {
rlang::abort(paste(
"Download the Annual Estimates of Resident Population by Single Year of Age from:\n",
"https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US\n",
"and extract `PEP_2017_PEPSYASEXN_with_ann.csv` into:", here::here(DATA_DIR, "2010-2017")
))
}

## ----download-popest-projections
## "np2017_d1.csv"
download_file(
"np2017_d1.csv",
"https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv",
here::here(DATA_DIR)
)
}

+ 181
- 0
R/01_process-data.R Прегледај датотеку

@@ -0,0 +1,181 @@
library(dplyr)
library(purrr)
library(tidyr)
library(stringr)
library(readr)
library(readxl)
library(fs)
# library(here)
library(lubridate)
library(forcats)

DATA_DIR <- "data-raw"

pop <- list()

import_pre1980 <- function(file) {
# cli::cat_bullet(file)
year <- str_extract(file, "\\d{4}\\.csv")
year <- as.integer(sub("\\.csv", "", year))
skip <- if (year >= 1960) 8 else 9
x <- read_csv(file, col_names = FALSE, skip = skip,
col_types = cols_only(
X1 = col_guess(), X2 = col_guess(),
X3 = col_guess(), X4 = col_guess()

))
colnames(x) <- c("Age", "Total", "Total Male", "Total Female")
x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ]
stopifnot(nrow(x) %in% c(76L, 86L))
x %>%
mutate(
Year = year,
Age = sub("\\+", "", Age),
Age = as.integer(Age)
) %>%
select(Year, everything())
}

pop[["1900-1980"]] <- fs::dir_ls(
here::here(DATA_DIR, "1900-1980")
) %>%
map_dfr(import_pre1980)

import_1980_1990 <- function(file) {
read_fwf(
file,
col_positions = fwf_positions(
start = c(3, 5, 7, 12, 22, 32),
end = c(4, 6, 9, 20, 30, 40),
col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female")
),
skip = 0
) %>%
filter(Age < 101, Month == 4, Year == min(Year)) %>%
mutate(Year = Year + 1900)
}

pop[["1980-1990"]] <- fs::dir_ls(
here::here(DATA_DIR, "1980-1990"),
regexp = "TXT"
) %>%
map_dfr(import_1980_1990)

import_1990_2000 <- function(file) {
x <- read_csv(file, col_names = FALSE, skip = 3)
colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female")

x %>%
filter(
!is.na(Total),
!str_detect(Age, "All")
) %>%
mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>%
filter(
lubridate::month(Month) == 4
) %>%
mutate(
Year = lubridate::year(Month),
Age = str_remove(Age, "\\+"),
Age = as.integer(Age)
) %>%
select(Year, everything(), -Month)
}

pop[["1990-2000"]] <- fs::dir_ls(
here::here(DATA_DIR, "1990-2000"),
regexp = "\\d{4}\\.csv"
) %>%
map_dfr(import_1990_2000)

# pop[["2000-2010"]] <- read_csv(
# here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv")
# ) %>%
# select(Month = MONTH, Year = YEAR, Age = AGE,
# Total = TOT_POP, `Total Male` = TOT_MALE,
# `Total Female` = TOT_FEMALE) %>%
# filter(Year > 2000, Age <= 125)

# pop[["2000-2010"]] <- readxl::read_xls(
# here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"),
# sheet = "Sheet1"
# ) %>%
# mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>%
# select(-age_label) %>%
# gather(Year, value, -1:-2) %>%
# spread(Group, value) %>%
# mutate(Year = as.integer(Year)) %>%
# filter(Year > 2000)

pop[["2000-2010"]] <-
readr::read_csv(
here::here(DATA_DIR, "2000-2010", "us-est00int-alldata-5yr.csv")
) %>%
select(
Year = year,
Age = AGEGRP,
Total = TOT_POP,
`Total Male` = TOT_MALE,
`Total Female` = TOT_FEMALE
) %>%
filter(Age != 0, Year > 2000, Year < 2010) %>%
mutate(Age = (Age - 1) * 5)

pop[["2010-2017"]] <- readr::read_csv(
here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"),
skip = 1
) %>%
select(-1:-4) %>%
gather(key, value, -Sex) %>%
filter(str_detect(key, "Population Estimate")) %>%
mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>%
filter(!str_detect(key, "Total|Median")) %>%
separate(key, c("Year", "Age"), sep = ";") %>%
mutate(
Year = as.integer(Year),
Age = sub("+", "", Age, fixed = TRUE),
Age = as.integer(Age),
Sex = recode(
Sex,
"Both Sexes" = "Total",
"Female" = "Total Female",
"Male" = "Total Male"
)
) %>%
filter(Year < 2016) %>%
spread(Sex, value)

# pop[["2018"]] <- readr::read_tsv(
# here::here(DATA_DIR, "national-population-projections_2014-2060.txt")
# ) %>%
# select(-1) %>%
# filter(!is.na(Year)) %>%
# select(contains("Code"), contains("Pop")) %>%
# {colnames(.) <- sub(" Code", "", colnames(.)); .} %>%
# group_by(Year, Age) %>%
# mutate(Total = sum(`Projected Populations`)) %>%
# ungroup() %>%
# spread(Gender, `Projected Populations`) %>%
# mutate(
# Age = sub("+", "", Age, fixed = TRUE),
# Age = as.integer(Age)
# ) %>%
# filter(Year > 2017)

pop[["2018"]] <-
readr::read_csv(
here::here(DATA_DIR, "np2017_d1.csv")
) %>%
filter(ORIGIN == 0, RACE == 0) %>%
gather(Age, n, -SEX:-YEAR) %>%
mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>%
spread(SEX, n) %>%
filter(Age != "TOTAL_POP") %>%
mutate(
Age = sub("POP_", "", Age, fixed = TRUE),
Age = as.integer(Age)
) %>%
select(Year = YEAR, Age, starts_with("Total"))

fs::dir_create(here::here("data"))
saveRDS(pop, here::here("data", "pop_estimates_by_age.rds"))

+ 188
- 0
R/02_population-animations.R Прегледај датотеку

@@ -0,0 +1,188 @@
## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo=FALSE)

## ----library-------------------------------------------------------------
library(dplyr)
library(purrr)
library(tidyr)
library(stringr)
library(readr)
library(forcats)
library(gganimate)

DATA_DIR <- "data-raw"
ANIM_OUT <- "anim"
fs::dir_create(here::here(ANIM_OUT))

pop <- readRDS(here::here("data", "pop_estimates_by_age.rds"))

## ------------------------------------------------------------------------
ga <-
pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
group_by(age_group_int) %>%
mutate(
# age_group = paste(min(Age), max(Age), sep = " - "),
age_group = paste(min(Age)),
age_group = ifelse(Age >= 100, "100+", age_group)
) %>%
ungroup() %>%
arrange(age_group_int, Year) %>%
mutate(age_group = fct_inorder(age_group)) %>%
select(-age_group_int) %>%
group_by(Year, age_group) %>%
summarize(Total = sum(Total)) %>%
group_by(Year) %>%
mutate(GrandTotal = sum(Total)) %>%
ungroup() %>%
mutate(Total = Total/GrandTotal) %>%
arrange(Year, age_group) %>%
# complete(Year, age_group, fill = list(Total = 0)) %>%
# filter(Year > 2000) %>%
{
ggplot(.) +
# aes(age_group, Total) +
# geom_vline(xintercept = c(1980, 1990), color = "grey50") +
# geom_line(aes(group = age_group)) +
# geom_point(size = 0.2) +
# geom_col(fill = "grey40") +
# facet_wrap(~ Year) +
# coord_flip() +
# scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) +
geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) +
geom_segment(aes(yend = age_group, y = age_group, xend = Total),
x = 0,
size = 5,
color = "grey40",
alpha = 0.25,
data = filter(., Year == min(Year)) %>% select(-Year)
) +
# annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) +
theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
theme(panel.grid.major.y = element_blank()) +
scale_x_continuous(labels = scales::percent, expand = c(0, 0, 0.05, 0)) +
# labs(x = "Percent of Population", y = NULL) +
theme(axis.title.x = element_text(hjust = 0.5, size = 30)) +
labs(x = "{closest_state}", y = NULL) +
gganimate::transition_time(Year) +
gganimate::transition_states(Year, 1, 0, wrap = FALSE) +
gganimate::ease_aes("linear") +
gganimate::enter_fade()
}

grkmisc::logger(msg = "Writing MP4 population bar animation")
anim_save(
here::here(ANIM_OUT, "pop-anim.mp4"),
animate(ga, width = 1040*2, height = 480*2, res = 144,
fps = 20, nframes = 161*2, renderer = av_renderer())
)
grkmisc::logger(msg = "Writing gif population bar animation")
anim_save(
here::here(ANIM_OUT, "pop-anim.gif"),
animate(ga, width = 1040*2, height = 480*2, res = 144,
fps = 20, nframes = 161*2,
renderer = gganimate::gifski_renderer(loop = FALSE))
)

pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
select(1:3, age_group_int) %>%
filter(age_group_int > 5) %>%
mutate(
old = age_group_int >= 13,
old = c("Adult", "Older Adult")[old + 1]
) %>%
group_by(Year, old) %>%
summarize(n = sum(Total)) %>%
spread(old, n) %>%
{
ggplot(.) +
aes(x = Year) +
geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) +
geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) +
geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange, size = 2)
} %>%
invisible()

elder_dependency <-
pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
select(1:3, age_group_int) %>%
mutate(
old = case_when(
age_group_int < 3 ~ "Children",
age_group_int >= 13 ~ "Older Adult",
TRUE ~ "Adult"
)
) %>%
group_by(Year, old) %>%
summarize(n = sum(Total)) %>%
spread(old, n) %>%
mutate(
dependency_elder = `Older Adult` / Adult,
dependency_all = (`Older Adult` + Children) / Adult
)

ga_elder_dep <-
ggplot(elder_dependency) +
aes(Year, dependency_elder) +
geom_point(color = "#00589a") +
geom_point(data = filter(elder_dependency, Year == 2018), size = 4, color = "#eb1455") +
labs(y = "Dependency Ratio", x = NULL) +
theme_minimal(base_size = 24, base_family = "Fira Sans Condensed") +
scale_y_continuous(labels = scales::percent) +
gganimate::transition_states(Year, 1, 0, FALSE) +
gganimate::shadow_mark()

grkmisc::logger(msg = "Writing mp4 elder dependency animation")
anim_save(
here::here(ANIM_OUT, "pop-dependency.mp4"),
animate(ga_elder_dep, width = 1040*2, height = 480*2, res = 144,
fps = 18, nframes = 161, renderer = av_renderer())
)
grkmisc::logger(msg = "Writing gif elder dependency animation")
anim_save(
here::here(ANIM_OUT, "pop-dependency.gif"),
animate(ga_elder_dep, width = 1040*2, height = 480*2, res = 144,
fps = 18, nframes = 161,
renderer = gganimate::gifski_renderer(loop = FALSE))
)

elder_dep_together <-
elder_dependency %>%
ungroup() %>%
gather(timeseries, ratio, starts_with("depend")) %>%
select(-Adult:-`Older Adult`) %>%
mutate(timeseries = ifelse(timeseries == "dependency_elder", 0, 1))

ga_dep_together <-
ggplot(elder_dep_together) +
aes(Year, ratio) +
geom_point(color = "#00589a") +
# geom_point(color = "#82c878") +
# geom_point(data = filter(., Year == 2018), size = 4, color = "#eb1455") +
labs(y = "Dependency Ratio", x = NULL) +
theme_minimal(base_size = 24, base_family = "Fira Sans Condensed") +
scale_y_continuous(labels = scales::percent) +
gganimate::transition_states(timeseries, 1, 0, FALSE) +
gganimate::view_follow(fixed_x = TRUE) +
gganimate::ease_aes("sine-in-out")+
gganimate::shadow_mark()

grkmisc::logger(msg = "Writing mp4 all dependency animation")
anim_save(
here::here(ANIM_OUT, "pop-dependency-both.mp4"),
animate(ga_dep_together, width = 1040*2, height = 480*2, res = 144,
fps = 18, nframes = 80,
renderer = av_renderer())
)
grkmisc::logger(msg = "Writing gif all dependency animation")
anim_save(
here::here(ANIM_OUT, "pop-dependency-both.gif"),
animate(ga_dep_together, width = 1040*2, height = 480*2, res = 144,
fps = 18, nframes = 80,
renderer = gganimate::gifski_renderer(loop = FALSE))
)

+ 0
- 357
R/2018-10-21-population-animations.R Прегледај датотеку

@@ -1,357 +0,0 @@
## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo=FALSE)

## ----library-------------------------------------------------------------
library(dplyr)
library(purrr)
library(tidyr)
library(stringr)
library(readr)
library(forcats)
library(gganimate)

DATA_DIR <- "data-raw"
ANIM_OUT <- "anim"

fs::dir_create(here::here(DATA_DIR))
fs::dir_create(here::here(ANIM_OUT))

DOWNLOAD_DATA <- TRUE


download_file <- function(filename, url, basedir) {
filename <- gsub(" ", "_", filename)
filename <- gsub("/", "-", filename)
fs::dir_create(basedir)
download.file(url, destfile = fs::path(basedir, filename))
}

if (DOWNLOAD_DATA) {
## ----download-popest-tables-1900-1980, eval=FALSE------------------------
x <- xml2::read_html("https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/")

xx <-
data_frame(
filename = rvest::html_nodes(x, "a") %>% rvest::html_text(),
url = rvest::html_nodes(x, "a") %>% rvest::html_attr("href")
) %>%
filter(str_detect(url, "\\.csv")) %>%
mutate(url = str_c(
"https://www2.census.gov/programs-surveys/popest/tables/1900-1980/national/asrh/",
url
))

pwalk(xx, download_file, basedir = here::here(DATA_DIR, "1900-1980"))

## ----download-popest-tables-1980-1990, eval=FALSE------------------------
xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/1980s-national.html") %>%
rvest::html_nodes(".list.section .uscb-text-link") %>%
rvest::html_attrs() %>%
purrr::transpose() %>%
as_tibble() %>%
unnest() %>%
select(filename = name, url = href) %>%
mutate(url = paste0("https:", url)) %>%
pwalk(download_file, basedir = here::here(DATA_DIR, "1980-1990"))

## ----download-popest-tables-1990-2000, eval=FALSE------------------------
xml2::read_html("https://www.census.gov/data/datasets/time-series/demo/popest/intercensal-1990-2000-national.html") %>%
rvest::html_nodes("#listArticlesContainer_list_0 .uscb-text-link") %>%
rvest::html_attrs() %>%
purrr::transpose() %>%
as_tibble() %>%
unnest() %>%
select(filename = name, url = href) %>%
mutate(url = paste0("https:", url),
filename = paste0(filename, ".csv")) %>%
pwalk(download_file, basedir = here::here(DATA_DIR, "1990-2000"))

## ----download-popest-tables-2000-2010----
## "2000-2010/us-est00int-01.xls"
download_file(
"us-est00int-alldata-5yr.csv",
"https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/national/us-est00int-alldata-5yr.csv",
here::here(DATA_DIR, "2000-2010")
)


## ----download-popest-tables-2010----
## "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"
## Download manually from https://factfinder.census.gov/bkmk/table/1.0/en/PEP/2017/PEPSYASEXN/0100000US

## ----download-popest-projections
## "np2017_d1.csv"
download_file(
"np2017_d1.csv",
"https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1.csv",
here::here(DATA_DIR)
)
}

## ----import-historical---------------------------------------------------
pop <- list()

import_pre1980 <- function(file) {
# cli::cat_bullet(file)
year <- str_extract(file, "\\d{4}\\.csv")
year <- as.integer(sub("\\.csv", "", year))
skip <- if (year >= 1960) 8 else 9
x <- read_csv(file, col_names = FALSE, skip = skip,
col_types = cols_only(
X1 = col_guess(), X2 = col_guess(),
X3 = col_guess(), X4 = col_guess()

))
colnames(x) <- c("Age", "Total", "Total Male", "Total Female")
x <- x[1:max(which(x$Age %in% c("75+", "85+"))), ]
stopifnot(nrow(x) %in% c(76L, 86L))
x %>%
mutate(
Year = year,
Age = sub("\\+", "", Age),
Age = as.integer(Age)
) %>%
select(Year, everything())
}

pop[["1900-1980"]] <- fs::dir_ls(
here::here(DATA_DIR, "1900-1980")
) %>%
map_dfr(import_pre1980)

import_1980_1990 <- function(file) {
read_fwf(
file,
col_positions = fwf_positions(
start = c(3, 5, 7, 12, 22, 32),
end = c(4, 6, 9, 20, 30, 40),
col_names = c("Month", "Year", "Age", "Total", "Total Male", "Total Female")
),
skip = 0
) %>%
filter(Age < 101, Month == 4, Year == min(Year)) %>%
mutate(Year = Year + 1900)
}

pop[["1980-1990"]] <- fs::dir_ls(
here::here(DATA_DIR, "1980-1990"),
regexp = "TXT"
) %>%
map_dfr(import_1980_1990)

import_1990_2000 <- function(file) {
x <- read_csv(file, col_names = FALSE, skip = 3)
colnames(x) <- c("Month", "Age", "Total", "Total Male", "Total Female")

x %>%
filter(
!is.na(Total),
!str_detect(Age, "All")
) %>%
mutate(Month = lubridate::parse_date_time(Month, "mdy")) %>%
filter(
lubridate::month(Month) == 4
) %>%
mutate(
Year = lubridate::year(Month),
Age = str_remove(Age, "\\+"),
Age = as.integer(Age)
) %>%
select(Year, everything(), -Month)
}

pop[["1990-2000"]] <- fs::dir_ls(
here::here(DATA_DIR, "1990-2000"),
regexp = "\\d{4}\\.csv"
) %>%
map_dfr(import_1990_2000)

# pop[["2000-2010"]] <- read_csv(
# here::here(DATA_DIR, "2000-2010/us-est00int-alldata.csv")
# ) %>%
# select(Month = MONTH, Year = YEAR, Age = AGE,
# Total = TOT_POP, `Total Male` = TOT_MALE,
# `Total Female` = TOT_FEMALE) %>%
# filter(Year > 2000, Age <= 125)

pop[["2000-2010"]] <- readxl::read_xls(
here::here(DATA_DIR, "2000-2010/us-est00int-01.xls"),
sheet = "Sheet1"
) %>%
mutate(age_label = Age, Age = str_extract(Age, "\\d{1,3}"), Age = as.integer(Age) - str_detect(age_label, "Under") * 5) %>%
select(-age_label) %>%
gather(Year, value, -1:-2) %>%
spread(Group, value) %>%
mutate(Year = as.integer(Year)) %>%
filter(Year > 2000)

pop[["2010-2017"]] <- readr::read_csv(
here::here(DATA_DIR, "2010-2017/PEP_2017_PEPSYASEXN_with_ann.csv"),
skip = 1
) %>%
select(-1:-4) %>%
gather(key, value, -Sex) %>%
filter(str_detect(key, "Population Estimate")) %>%
mutate(key = str_remove(key, "Pop.+\\) - "), key = str_remove(key, " Total - ")) %>%
filter(!str_detect(key, "Total|Median")) %>%
separate(key, c("Year", "Age"), sep = ";") %>%
mutate(
Year = as.integer(Year),
Age = sub("+", "", Age, fixed = TRUE),
Age = as.integer(Age),
Sex = recode(
Sex,
"Both Sexes" = "Total",
"Female" = "Total Female",
"Male" = "Total Male"
)
) %>%
spread(Sex, value)

# pop[["2018"]] <- readr::read_tsv(
# here::here(DATA_DIR, "national-population-projections_2014-2060.txt")
# ) %>%
# select(-1) %>%
# filter(!is.na(Year)) %>%
# select(contains("Code"), contains("Pop")) %>%
# {colnames(.) <- sub(" Code", "", colnames(.)); .} %>%
# group_by(Year, Age) %>%
# mutate(Total = sum(`Projected Populations`)) %>%
# ungroup() %>%
# spread(Gender, `Projected Populations`) %>%
# mutate(
# Age = sub("+", "", Age, fixed = TRUE),
# Age = as.integer(Age)
# ) %>%
# filter(Year > 2017)

pop[["2018"]] <-
readr::read_csv(
here::here(DATA_DIR, "np2017_d1.csv")
) %>%
gather(Age, n, -SEX:-YEAR) %>%
mutate(SEX = c("Total", "Total Male", "Total Female")[SEX + 1]) %>%
spread(SEX, n) %>%
filter(Age != "TOTAL_POP") %>%
mutate(
Age = sub("POP_", "", Age, fixed = TRUE),
Age = as.integer(Age)
) %>%
select(Year = YEAR, Age, starts_with("Total"))



## ------------------------------------------------------------------------
ga <-
pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
group_by(age_group_int) %>%
mutate(
# age_group = paste(min(Age), max(Age), sep = " - "),
age_group = paste(min(Age)),
age_group = ifelse(Age >= 100, "100+", age_group)
) %>%
ungroup() %>%
arrange(age_group_int, Year) %>%
mutate(age_group = fct_inorder(age_group)) %>%
select(-age_group_int) %>%
group_by(Year, age_group) %>%
summarize(Total = sum(Total)) %>%
group_by(Year) %>%
mutate(GrandTotal = sum(Total)) %>%
ungroup() %>%
mutate(Total = Total/GrandTotal) %>%
arrange(Year, age_group) %>% View()
# complete(Year, age_group, fill = list(Total = 0)) %>%
# filter(Year > 2000) %>%
{
ggplot(.) +
# aes(age_group, Total) +
# geom_vline(xintercept = c(1980, 1990), color = "grey50") +
# geom_line(aes(group = age_group)) +
# geom_point(size = 0.2) +
# geom_col(fill = "grey40") +
# facet_wrap(~ Year) +
# coord_flip() +
# scale_y_continuous(labels = function(x) {grkmisc::pretty_num(x, decimal_digits = 0)}) +
geom_segment(aes(yend = age_group, y = age_group, xend = Total), x = 0, size = 5) +
geom_segment(aes(yend = age_group, y = age_group, xend = Total),
x = 0,
size = 5,
color = "grey40",
alpha = 0.25,
data = filter(., Year == min(Year)) %>% select(-Year)
) +
# annotate("text", label = "{closest_state}", x = max(.$Total) * 0.8, y = 20, size = 12) +
theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
theme(panel.grid.major.y = element_blank()) +
scale_x_continuous(labels = scales::percent, expand = c(0, 0)) +
# labs(x = "Percent of Population", y = NULL) +
theme(axis.title.x = element_text(hjust = 0.5, size = 30)) +
labs(x = "{closest_state}", y = NULL) +
gganimate::transition_time(Year) +
gganimate::transition_states(Year, 1, 0, wrap = FALSE) +
gganimate::ease_aes("linear") +
gganimate::enter_fade()
}

anim_save(
here::here(ANIM_OUT, "pop-anim.mp4"),
animate(ga, width = 1040*2, height = 480*2, res = 144,
fps = 20, nframes = 161*2,
renderer = av_renderer())
)

pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
select(1:3, age_group_int) %>%
filter(age_group_int > 5) %>%
mutate(
old = age_group_int >= 13,
old = c("Adult", "Older Adult")[old + 1]
) %>%
group_by(Year, old) %>%
summarize(n = sum(Total)) %>%
spread(old, n) %>%
{
ggplot(.) +
aes(x = Year) +
geom_segment(aes(xend = Year, y = Adult, yend = `Older Adult`)) +
geom_point(aes(y = Adult), color = grkmisc::moffitt_colors$blue) +
geom_point(aes(y = `Older Adult`), color = grkmisc::moffitt_colors$orange)
}

ga2 <-
pop %>%
bind_rows() %>%
mutate(age_group_int = as.integer(Age) %/% 5) %>%
select(1:3, age_group_int) %>%
filter(age_group_int >= 3) %>%
mutate(
old = age_group_int >= 13,
old = c("Adult", "Older Adult")[old + 1]
) %>%
group_by(Year, old) %>%
summarize(n = sum(Total)) %>%
spread(old, n) %>%
mutate(ratio = `Older Adult` / Adult) %>%
{
ggplot(.) +
aes(Year, ratio) +
geom_point() +
geom_point(data = filter(., Year == 2018), color = "red") +
labs(y = "Dependency Ratio", x = NULL) +
theme_minimal(base_size = 16, base_family = "Fira Sans Condensed") +
scale_y_continuous(labels = scales::percent) +
gganimate::transition_states(Year, 1, 0, FALSE) +
gganimate::shadow_mark()
}

anim_save(
here::here(ANIM_OUT, "population-projections/pop-dependency.mp4"),
animate(ga2, width = 1040*2, height = 480*2, res = 144,
fps = 18, nframes = 161,
renderer = av_renderer())
)

Loading…
Откажи
Сачувај