Add tidyr::spread and tidyr::gathermaster
| # Animated dplyr joins with gganimate | |||||
| # * Garrick Aden-Buie | |||||
| # * garrickadenbuie.com | |||||
| # * MIT License: https://opensource.org/licenses/MIT | |||||
| library(tidyverse) | |||||
| library(gganimate) | |||||
| if (!getOption("tidy_verb_anim.font_registered", FALSE)) { | |||||
| source(here::here("R", "01_register-fonts.R")) | |||||
| } | |||||
| if (!getOption("tidy_verb_anim.functions_loaded", FALSE)) { | |||||
| source(here::here("R", "02_functions.R")) | |||||
| } | |||||
| source(here::here("R", "03_check-folders.R")) | |||||
| # Data ---- | |||||
| set.seed(42) | |||||
| wide <- data_frame( | |||||
| id = rep(1:2), | |||||
| x = letters[1:2], | |||||
| y = letters[3:4], | |||||
| z = letters[5:6] | |||||
| ) | |||||
| long <- tidyr::gather(wide, key, val, x:z) |
| df | df | ||||
| } | } | ||||
| colorize_wide_tidyr <- function(df, n_colors, key_col = "id") { | |||||
| n_colors <- n_colors + length(setdiff(unique(df$label), key_col)) | |||||
| colors <- scales::brewer_pal(type = "qual", "Set1")(n_colors) | |||||
| df$value_int <- as.integer(gsub("[a-zA-Z]", "0", df$value)) | |||||
| max_id_color <- max(df$value_int) | |||||
| df %>% | |||||
| bind_rows( | |||||
| filter(df, .y == "-1") %>% mutate(.y = 0) | |||||
| ) %>% | |||||
| mutate( | |||||
| idcp = max_id_color - 1L, | |||||
| idc = case_when( | |||||
| label == "id" ~ value_int, | |||||
| TRUE ~ map_int(label, ~which(. == unique(label))) + idcp | |||||
| ) | |||||
| ) %>% | |||||
| select(-idcp, -value_int) %>% | |||||
| mutate( | |||||
| idc = ifelse(.y == 0 & label == "id", 100, idc), | |||||
| value = ifelse(.y == 0, label, value), | |||||
| .id = ifelse(.y == 0, "n", .id), | |||||
| color = colors[idc], | |||||
| ) %>% | |||||
| filter(!is.na(color)) %>% | |||||
| mutate(alpha = ifelse(label != "id" & .y < 0, 0.6, 1.0)) %>% | |||||
| select(-idc) | |||||
| } | |||||
| plot_data <- function(x, title = "") { | plot_data <- function(x, title = "") { | ||||
| if (!"alpha" %in% colnames(x)) x$alpha <- 1 | if (!"alpha" %in% colnames(x)) x$alpha <- 1 | ||||
| if (!".width" %in% colnames(x)) x$`.width` <- 1 | if (!".width" %in% colnames(x)) x$`.width` <- 1 | ||||
| if (!".text_color" %in% colnames(x)) x$`.text_color` <- "white" | |||||
| if (!".text_size" %in% colnames(x)) x$`.text_size` <- 12 | |||||
| ggplot(x) + | ggplot(x) + | ||||
| aes(.x, .y, fill = color, label = value) + | aes(.x, .y, fill = color, label = value) + | ||||
| geom_tile(aes(width = .width, alpha = alpha), color = "white", size = 3) + | geom_tile(aes(width = .width, alpha = alpha), color = "white", size = 3) + | ||||
| geom_text(aes(x = .x), hjust = 0.5, size = 12, family = "Fira Sans", color = "white") + | |||||
| geom_text(aes(x = .x, color = .text_color, size = .text_size), hjust = 0.5, family = "Fira Sans") + | |||||
| scale_fill_identity() + | scale_fill_identity() + | ||||
| scale_alpha_identity() + | scale_alpha_identity() + | ||||
| scale_color_identity() + | |||||
| scale_size_identity() + | |||||
| coord_equal() + | coord_equal() + | ||||
| ggtitle(title) + | ggtitle(title) + | ||||
| theme_void() + | theme_void() + |
| source(here::here("R", "00_base_tidyr.R")) | |||||
| sg_wide <- wide %>% | |||||
| proc_data("0-wide", colorize_wide_tidyr) %>% | |||||
| mutate(frame = 1, .id = "0-wide") | |||||
| sg_long <- wide %>% | |||||
| tidyr::gather("key", "val", -id) %>% | |||||
| proc_data("3-tall", color_fun = function(x, y) x) %>% | |||||
| split(.$label) | |||||
| sg_long$id <- | |||||
| sg_wide %>% | |||||
| filter(label == "id") %>% | |||||
| select(value, color) %>% | |||||
| left_join(sg_long$id, ., by = "value") %>% | |||||
| mutate(alpha = 1) | |||||
| sg_long$key <- | |||||
| sg_wide %>% | |||||
| filter(label != "id") %>% | |||||
| select(label, color) %>% | |||||
| left_join(sg_long$key, ., by = c("value" = "label")) %>% | |||||
| distinct() %>% | |||||
| mutate(alpha = 1) | |||||
| sg_long$val <- | |||||
| sg_wide %>% | |||||
| filter(label != "id", .y < 0) %>% | |||||
| select(value, color) %>% | |||||
| left_join(sg_long$val, ., by = "value") %>% | |||||
| mutate(alpha = 0.6) | |||||
| sg_long <- bind_rows(sg_long) %>% mutate(frame = 2) | |||||
| sg_long_labels <- data_frame(id = 1, a = "id", x = "key", y = "val") %>% | |||||
| proc_data("4-label") %>% | |||||
| filter(label != "id") %>% | |||||
| mutate(color = "#FFFFFF", .y = 0, .x = .x -1, frame = 2, alpha = 1, label = recode(label, "a" = "id")) | |||||
| sg_wide_labels <- data_frame(id = 1, a = "id") %>% | |||||
| proc_data("2-label") %>% | |||||
| filter(label != "id") %>% | |||||
| mutate(color = "#FFFFFF", .y = 0, .x = .x -1, frame = 1, alpha = 1, label = recode(label, "a" = "id")) | |||||
| sg_long_extra_keys <- map_dfr( | |||||
| seq_len(nrow(wide) - 1), | |||||
| ~ filter(sg_wide, .y > -1) # Extra key blocks in long column | |||||
| ) | |||||
| n_key_cols <- length(setdiff(colnames(wide), "id")) | |||||
| sg_long_extra_id <- map_dfr( | |||||
| seq_len(n_key_cols - 1), | |||||
| ~ filter(sg_wide, .x == 1) # Extra id column blocks for long column | |||||
| ) | |||||
| sg_data <- bind_rows( | |||||
| sg_wide, | |||||
| sg_wide_labels, | |||||
| sg_long, | |||||
| sg_long_labels, | |||||
| sg_long_extra_keys, | |||||
| sg_long_extra_id | |||||
| ) %>% | |||||
| mutate( | |||||
| label = ifelse(value %in% setdiff(colnames(wide), "id"), "key", label), | |||||
| label = ifelse(value %in% c("key", "val"), "zzz", label), | |||||
| .text_color = ifelse(grepl("label", .id), "black", "white"), | |||||
| .text_size = ifelse(grepl("label", .id), 8, 12) | |||||
| ) %>% | |||||
| arrange(label, .id, value) %>% | |||||
| mutate(frame = factor(frame, labels = c('spread(long, key, val)', 'gather(wide, key, val, x:z)'))) %>% | |||||
| select(.x, .y, everything()) | |||||
| sg_static <- | |||||
| sg_data %>% | |||||
| split(.$frame) %>% | |||||
| imap(~ plot_data(.x, .y) + | |||||
| ylim(-6.5, 0.5) + | |||||
| labs(subtitle = "returns") + | |||||
| theme(plot.subtitle = element_text(family = "Fira Sans", size = 14, color = "grey50", hjust = 0.5, margin = margin(25))) | |||||
| ) | |||||
| save_static_plot(cowplot::plot_grid(plotlist = sg_static, axis = 't'), "tidyr-spread-gather") | |||||
| save_static_plot(sg_static[[1]], "tidyr-spread") | |||||
| save_static_plot(sg_static[[2]], "tidyr-gather") | |||||
| sg_anim <- | |||||
| sg_data %>% | |||||
| plot_data() %>% | |||||
| animate_plot() + | |||||
| view_follow() + | |||||
| labs(title = "{ifelse(transitioning, next_state, ifelse(grepl('gather', next_state), 'long', 'wide'))}") + | |||||
| ease_aes("sine-in-out", x = "exponential-out") | |||||
| sg_anim <- animate(sg_anim) | |||||
| anim_save(here::here("images", "tidyr-spread-gather.gif"), sg_anim) |
| - Filtering Joins: [`semi_join()`](#semi-join), [`anti_join()`](#anti-join) | - Filtering Joins: [`semi_join()`](#semi-join), [`anti_join()`](#anti-join) | ||||
| - Set Operations: [`union()`](#union), [`union_all()`](#union-all), [`intersect()`](#intersect), [`setdiff()`](#setdiff) | - Set Operations: [`union()`](#union), [`union_all()`](#union-all), [`intersect()`](#intersect), [`setdiff()`](#setdiff) | ||||
| - Tidy Data: [`spread()` and `gather()`](#spread-and-gather) | |||||
| - Learn more about | - Learn more about | ||||
| - [Relational Data](#relational-data) | - [Relational Data](#relational-data) | ||||
| - [gganimate](#gganimate) | - [gganimate](#gganimate) | ||||
| setdiff(y, x) | setdiff(y, x) | ||||
| ``` | ``` | ||||
| ## Tidy Data | |||||
| ```{r tidyr-wide-long} | |||||
| source("R/tidyr_spread_gather.R") | |||||
| tidy_plots <- list() | |||||
| tidy_plots$wide <- bind_rows(sg_wide, sg_wide_labels) | |||||
| tidy_plots$long <- bind_rows(sg_long, sg_long_labels) | |||||
| tidy_plots <- map(tidy_plots, ~ mutate(., | |||||
| .text_color = ifelse(grepl("id|key|val", value), "black", "white"), | |||||
| .text_size = ifelse(grepl("id|key|val", value), 6, 10) | |||||
| )) %>% | |||||
| imap(~ plot_data(.x, .y)) | |||||
| tidy_plots$wide <- tidy_plots$wide + ylim(-6.5, 0.5) | |||||
| save_static_plot(cowplot::plot_grid(plotlist = tidy_plots, axis = "t"), "original-dfs-tidy") | |||||
| ``` | |||||
|  | |||||
| ### Spread and Gather | |||||
| `spread(data, key, value)` | |||||
| > Spread a key-value pair across multiple columns. | |||||
| `gather(data, key = "key", value = "value", ...)` | |||||
| > Gather takes multiple columns and collapses into key-value pairs, duplicating all other columns as needed. You use `gather()` when you notice that you have columns that are not variables. | |||||
|  | |||||
| ```{r echo=TRUE} | |||||
| gather(wide, key, val, x:z) | |||||
| spread(long, key, val) | |||||
| ``` | |||||
| ## Learn More | ## Learn More | ||||
| ### Relational Data | ### Relational Data |
| - Set Operations: [`union()`](#union), [`union_all()`](#union-all), | - Set Operations: [`union()`](#union), [`union_all()`](#union-all), | ||||
| [`intersect()`](#intersect), [`setdiff()`](#setdiff) | [`intersect()`](#intersect), [`setdiff()`](#setdiff) | ||||
| - Tidy Data: [`spread()` and `gather()`](#spread-and-gather) | |||||
| - Learn more about | - Learn more about | ||||
| - [Relational Data](#relational-data) | - [Relational Data](#relational-data) | ||||
| #> 1 2 b | #> 1 2 b | ||||
| ``` | ``` | ||||
| ## Tidy Data | |||||
|  | |||||
| ### Spread and Gather | |||||
| `spread(data, key, value)` | |||||
| > Spread a key-value pair across multiple columns. | |||||
| `gather(data, key = "key", value = "value", ...)` | |||||
| > Gather takes multiple columns and collapses into key-value pairs, | |||||
| > duplicating all other columns as needed. You use `gather()` when you | |||||
| > notice that you have columns that are not variables. | |||||
|  | |||||
| ``` r | |||||
| gather(wide, key, val, x:z) | |||||
| #> # A tibble: 6 x 3 | |||||
| #> id key val | |||||
| #> <int> <chr> <chr> | |||||
| #> 1 1 x a | |||||
| #> 2 2 x b | |||||
| #> 3 1 y c | |||||
| #> 4 2 y d | |||||
| #> 5 1 z e | |||||
| #> 6 2 z f | |||||
| spread(long, key, val) | |||||
| #> # A tibble: 2 x 4 | |||||
| #> id x y z | |||||
| #> <int> <chr> <chr> <chr> | |||||
| #> 1 1 a c e | |||||
| #> 2 2 b d f | |||||
| ``` | |||||
| ## Learn More | ## Learn More | ||||
| ### Relational Data | ### Relational Data |
| install.packages("rmarkdown") | install.packages("rmarkdown") | ||||
| install.packages("here") | install.packages("here") | ||||
| install.packages(c("sysfonts", "jsonlite", "curl", "showtext")) | install.packages(c("sysfonts", "jsonlite", "curl", "showtext")) | ||||
| install.packages("cowplot") | |||||
| install.packages("devtools") | install.packages("devtools") | ||||
| devtools::install_github("thomasp85/gganimate") | devtools::install_github("thomasp85/gganimate") |