Add tidyr::spread and tidyr::gathermaster
| @@ -0,0 +1,28 @@ | |||
| # Animated dplyr joins with gganimate | |||
| # * Garrick Aden-Buie | |||
| # * garrickadenbuie.com | |||
| # * MIT License: https://opensource.org/licenses/MIT | |||
| library(tidyverse) | |||
| library(gganimate) | |||
| if (!getOption("tidy_verb_anim.font_registered", FALSE)) { | |||
| source(here::here("R", "01_register-fonts.R")) | |||
| } | |||
| if (!getOption("tidy_verb_anim.functions_loaded", FALSE)) { | |||
| source(here::here("R", "02_functions.R")) | |||
| } | |||
| source(here::here("R", "03_check-folders.R")) | |||
| # Data ---- | |||
| set.seed(42) | |||
| wide <- data_frame( | |||
| id = rep(1:2), | |||
| x = letters[1:2], | |||
| y = letters[3:4], | |||
| z = letters[5:6] | |||
| ) | |||
| long <- tidyr::gather(wide, key, val, x:z) | |||
| @@ -39,15 +39,49 @@ colorize_row_id <- function(df, n_colors, key_col = "id") { | |||
| df | |||
| } | |||
| colorize_wide_tidyr <- function(df, n_colors, key_col = "id") { | |||
| n_colors <- n_colors + length(setdiff(unique(df$label), key_col)) | |||
| colors <- scales::brewer_pal(type = "qual", "Set1")(n_colors) | |||
| df$value_int <- as.integer(gsub("[a-zA-Z]", "0", df$value)) | |||
| max_id_color <- max(df$value_int) | |||
| df %>% | |||
| bind_rows( | |||
| filter(df, .y == "-1") %>% mutate(.y = 0) | |||
| ) %>% | |||
| mutate( | |||
| idcp = max_id_color - 1L, | |||
| idc = case_when( | |||
| label == "id" ~ value_int, | |||
| TRUE ~ map_int(label, ~which(. == unique(label))) + idcp | |||
| ) | |||
| ) %>% | |||
| select(-idcp, -value_int) %>% | |||
| mutate( | |||
| idc = ifelse(.y == 0 & label == "id", 100, idc), | |||
| value = ifelse(.y == 0, label, value), | |||
| .id = ifelse(.y == 0, "n", .id), | |||
| color = colors[idc], | |||
| ) %>% | |||
| filter(!is.na(color)) %>% | |||
| mutate(alpha = ifelse(label != "id" & .y < 0, 0.6, 1.0)) %>% | |||
| select(-idc) | |||
| } | |||
| plot_data <- function(x, title = "") { | |||
| if (!"alpha" %in% colnames(x)) x$alpha <- 1 | |||
| if (!".width" %in% colnames(x)) x$`.width` <- 1 | |||
| if (!".text_color" %in% colnames(x)) x$`.text_color` <- "white" | |||
| if (!".text_size" %in% colnames(x)) x$`.text_size` <- 12 | |||
| ggplot(x) + | |||
| aes(.x, .y, fill = color, label = value) + | |||
| geom_tile(aes(width = .width, alpha = alpha), color = "white", size = 3) + | |||
| geom_text(aes(x = .x), hjust = 0.5, size = 12, family = "Fira Sans", color = "white") + | |||
| geom_text(aes(x = .x, color = .text_color, size = .text_size), hjust = 0.5, family = "Fira Sans") + | |||
| scale_fill_identity() + | |||
| scale_alpha_identity() + | |||
| scale_color_identity() + | |||
| scale_size_identity() + | |||
| coord_equal() + | |||
| ggtitle(title) + | |||
| theme_void() + | |||
| @@ -0,0 +1,98 @@ | |||
| source(here::here("R", "00_base_tidyr.R")) | |||
| sg_wide <- wide %>% | |||
| proc_data("0-wide", colorize_wide_tidyr) %>% | |||
| mutate(frame = 1, .id = "0-wide") | |||
| sg_long <- wide %>% | |||
| tidyr::gather("key", "val", -id) %>% | |||
| proc_data("3-tall", color_fun = function(x, y) x) %>% | |||
| split(.$label) | |||
| sg_long$id <- | |||
| sg_wide %>% | |||
| filter(label == "id") %>% | |||
| select(value, color) %>% | |||
| left_join(sg_long$id, ., by = "value") %>% | |||
| mutate(alpha = 1) | |||
| sg_long$key <- | |||
| sg_wide %>% | |||
| filter(label != "id") %>% | |||
| select(label, color) %>% | |||
| left_join(sg_long$key, ., by = c("value" = "label")) %>% | |||
| distinct() %>% | |||
| mutate(alpha = 1) | |||
| sg_long$val <- | |||
| sg_wide %>% | |||
| filter(label != "id", .y < 0) %>% | |||
| select(value, color) %>% | |||
| left_join(sg_long$val, ., by = "value") %>% | |||
| mutate(alpha = 0.6) | |||
| sg_long <- bind_rows(sg_long) %>% mutate(frame = 2) | |||
| sg_long_labels <- data_frame(id = 1, a = "id", x = "key", y = "val") %>% | |||
| proc_data("4-label") %>% | |||
| filter(label != "id") %>% | |||
| mutate(color = "#FFFFFF", .y = 0, .x = .x -1, frame = 2, alpha = 1, label = recode(label, "a" = "id")) | |||
| sg_wide_labels <- data_frame(id = 1, a = "id") %>% | |||
| proc_data("2-label") %>% | |||
| filter(label != "id") %>% | |||
| mutate(color = "#FFFFFF", .y = 0, .x = .x -1, frame = 1, alpha = 1, label = recode(label, "a" = "id")) | |||
| sg_long_extra_keys <- map_dfr( | |||
| seq_len(nrow(wide) - 1), | |||
| ~ filter(sg_wide, .y > -1) # Extra key blocks in long column | |||
| ) | |||
| n_key_cols <- length(setdiff(colnames(wide), "id")) | |||
| sg_long_extra_id <- map_dfr( | |||
| seq_len(n_key_cols - 1), | |||
| ~ filter(sg_wide, .x == 1) # Extra id column blocks for long column | |||
| ) | |||
| sg_data <- bind_rows( | |||
| sg_wide, | |||
| sg_wide_labels, | |||
| sg_long, | |||
| sg_long_labels, | |||
| sg_long_extra_keys, | |||
| sg_long_extra_id | |||
| ) %>% | |||
| mutate( | |||
| label = ifelse(value %in% setdiff(colnames(wide), "id"), "key", label), | |||
| label = ifelse(value %in% c("key", "val"), "zzz", label), | |||
| .text_color = ifelse(grepl("label", .id), "black", "white"), | |||
| .text_size = ifelse(grepl("label", .id), 8, 12) | |||
| ) %>% | |||
| arrange(label, .id, value) %>% | |||
| mutate(frame = factor(frame, labels = c('spread(long, key, val)', 'gather(wide, key, val, x:z)'))) %>% | |||
| select(.x, .y, everything()) | |||
| sg_static <- | |||
| sg_data %>% | |||
| split(.$frame) %>% | |||
| imap(~ plot_data(.x, .y) + | |||
| ylim(-6.5, 0.5) + | |||
| labs(subtitle = "returns") + | |||
| theme(plot.subtitle = element_text(family = "Fira Sans", size = 14, color = "grey50", hjust = 0.5, margin = margin(25))) | |||
| ) | |||
| save_static_plot(cowplot::plot_grid(plotlist = sg_static, axis = 't'), "tidyr-spread-gather") | |||
| save_static_plot(sg_static[[1]], "tidyr-spread") | |||
| save_static_plot(sg_static[[2]], "tidyr-gather") | |||
| sg_anim <- | |||
| sg_data %>% | |||
| plot_data() %>% | |||
| animate_plot() + | |||
| view_follow() + | |||
| labs(title = "{ifelse(transitioning, next_state, ifelse(grepl('gather', next_state), 'long', 'wide'))}") + | |||
| ease_aes("sine-in-out", x = "exponential-out") | |||
| sg_anim <- animate(sg_anim) | |||
| anim_save(here::here("images", "tidyr-spread-gather.gif"), sg_anim) | |||
| @@ -33,6 +33,9 @@ Garrick Aden-Buie -- [@grrrck](https://twitter.com/grrrck) -- [garrickade | |||
| - Filtering Joins: [`semi_join()`](#semi-join), [`anti_join()`](#anti-join) | |||
| - Set Operations: [`union()`](#union), [`union_all()`](#union-all), [`intersect()`](#intersect), [`setdiff()`](#setdiff) | |||
| - Tidy Data: [`spread()` and `gather()`](#spread-and-gather) | |||
| - Learn more about | |||
| - [Relational Data](#relational-data) | |||
| - [gganimate](#gganimate) | |||
| @@ -272,6 +275,45 @@ setdiff(x, y) | |||
| setdiff(y, x) | |||
| ``` | |||
| ## Tidy Data | |||
| ```{r tidyr-wide-long} | |||
| source("R/tidyr_spread_gather.R") | |||
| tidy_plots <- list() | |||
| tidy_plots$wide <- bind_rows(sg_wide, sg_wide_labels) | |||
| tidy_plots$long <- bind_rows(sg_long, sg_long_labels) | |||
| tidy_plots <- map(tidy_plots, ~ mutate(., | |||
| .text_color = ifelse(grepl("id|key|val", value), "black", "white"), | |||
| .text_size = ifelse(grepl("id|key|val", value), 6, 10) | |||
| )) %>% | |||
| imap(~ plot_data(.x, .y)) | |||
| tidy_plots$wide <- tidy_plots$wide + ylim(-6.5, 0.5) | |||
| save_static_plot(cowplot::plot_grid(plotlist = tidy_plots, axis = "t"), "original-dfs-tidy") | |||
| ``` | |||
|  | |||
| ### Spread and Gather | |||
| `spread(data, key, value)` | |||
| > Spread a key-value pair across multiple columns. | |||
| `gather(data, key = "key", value = "value", ...)` | |||
| > Gather takes multiple columns and collapses into key-value pairs, duplicating all other columns as needed. You use `gather()` when you notice that you have columns that are not variables. | |||
|  | |||
| ```{r echo=TRUE} | |||
| gather(wide, key, val, x:z) | |||
| spread(long, key, val) | |||
| ``` | |||
| ## Learn More | |||
| ### Relational Data | |||
| @@ -22,6 +22,8 @@ Smith](https://github.com/TylerGrantSmith). | |||
| - Set Operations: [`union()`](#union), [`union_all()`](#union-all), | |||
| [`intersect()`](#intersect), [`setdiff()`](#setdiff) | |||
| - Tidy Data: [`spread()` and `gather()`](#spread-and-gather) | |||
| - Learn more about | |||
| - [Relational Data](#relational-data) | |||
| @@ -295,6 +297,43 @@ setdiff(y, x) | |||
| #> 1 2 b | |||
| ``` | |||
| ## Tidy Data | |||
|  | |||
| ### Spread and Gather | |||
| `spread(data, key, value)` | |||
| > Spread a key-value pair across multiple columns. | |||
| `gather(data, key = "key", value = "value", ...)` | |||
| > Gather takes multiple columns and collapses into key-value pairs, | |||
| > duplicating all other columns as needed. You use `gather()` when you | |||
| > notice that you have columns that are not variables. | |||
|  | |||
| ``` r | |||
| gather(wide, key, val, x:z) | |||
| #> # A tibble: 6 x 3 | |||
| #> id key val | |||
| #> <int> <chr> <chr> | |||
| #> 1 1 x a | |||
| #> 2 2 x b | |||
| #> 3 1 y c | |||
| #> 4 2 y d | |||
| #> 5 1 z e | |||
| #> 6 2 z f | |||
| spread(long, key, val) | |||
| #> # A tibble: 2 x 4 | |||
| #> id x y z | |||
| #> <int> <chr> <chr> <chr> | |||
| #> 1 1 a c e | |||
| #> 2 2 b d f | |||
| ``` | |||
| ## Learn More | |||
| ### Relational Data | |||
| @@ -2,5 +2,6 @@ install.packages("tidyverse") | |||
| install.packages("rmarkdown") | |||
| install.packages("here") | |||
| install.packages(c("sysfonts", "jsonlite", "curl", "showtext")) | |||
| install.packages("cowplot") | |||
| install.packages("devtools") | |||
| devtools::install_github("thomasp85/gganimate") | |||