Bladeren bron

Close #1 with PR #7 add tidyr spread and gather

Add tidyr::spread and tidyr::gather
master
Garrick Aden-Buie 7 jaren geleden
bovenliggende
commit
01f945064a
No account linked to committer's email address
15 gewijzigde bestanden met toevoegingen van 731 en 1 verwijderingen
  1. +28
    -0
      R/00_base_tidyr.R
  2. +35
    -1
      R/02_functions.R
  3. +98
    -0
      R/tidyr_spread_gather.R
  4. +42
    -0
      README.Rmd
  5. +39
    -0
      README.md
  6. BIN
      images/static/png/original-dfs-tidy.png
  7. BIN
      images/static/png/tidyr-gather.png
  8. BIN
      images/static/png/tidyr-spread-gather.png
  9. BIN
      images/static/png/tidyr-spread.png
  10. +115
    -0
      images/static/svg/original-dfs-tidy.svg
  11. +103
    -0
      images/static/svg/tidyr-gather.svg
  12. +179
    -0
      images/static/svg/tidyr-spread-gather.svg
  13. +91
    -0
      images/static/svg/tidyr-spread.svg
  14. BIN
      images/tidyr-spread-gather.gif
  15. +1
    -0
      install.R

+ 28
- 0
R/00_base_tidyr.R Bestand weergeven

@@ -0,0 +1,28 @@
# Animated dplyr joins with gganimate
# * Garrick Aden-Buie
# * garrickadenbuie.com
# * MIT License: https://opensource.org/licenses/MIT

library(tidyverse)
library(gganimate)

if (!getOption("tidy_verb_anim.font_registered", FALSE)) {
source(here::here("R", "01_register-fonts.R"))
}

if (!getOption("tidy_verb_anim.functions_loaded", FALSE)) {
source(here::here("R", "02_functions.R"))
}

source(here::here("R", "03_check-folders.R"))

# Data ----
set.seed(42)
wide <- data_frame(
id = rep(1:2),
x = letters[1:2],
y = letters[3:4],
z = letters[5:6]
)

long <- tidyr::gather(wide, key, val, x:z)

+ 35
- 1
R/02_functions.R Bestand weergeven

@@ -39,15 +39,49 @@ colorize_row_id <- function(df, n_colors, key_col = "id") {
df
}

colorize_wide_tidyr <- function(df, n_colors, key_col = "id") {
n_colors <- n_colors + length(setdiff(unique(df$label), key_col))
colors <- scales::brewer_pal(type = "qual", "Set1")(n_colors)

df$value_int <- as.integer(gsub("[a-zA-Z]", "0", df$value))
max_id_color <- max(df$value_int)

df %>%
bind_rows(
filter(df, .y == "-1") %>% mutate(.y = 0)
) %>%
mutate(
idcp = max_id_color - 1L,
idc = case_when(
label == "id" ~ value_int,
TRUE ~ map_int(label, ~which(. == unique(label))) + idcp
)
) %>%
select(-idcp, -value_int) %>%
mutate(
idc = ifelse(.y == 0 & label == "id", 100, idc),
value = ifelse(.y == 0, label, value),
.id = ifelse(.y == 0, "n", .id),
color = colors[idc],
) %>%
filter(!is.na(color)) %>%
mutate(alpha = ifelse(label != "id" & .y < 0, 0.6, 1.0)) %>%
select(-idc)
}

plot_data <- function(x, title = "") {
if (!"alpha" %in% colnames(x)) x$alpha <- 1
if (!".width" %in% colnames(x)) x$`.width` <- 1
if (!".text_color" %in% colnames(x)) x$`.text_color` <- "white"
if (!".text_size" %in% colnames(x)) x$`.text_size` <- 12
ggplot(x) +
aes(.x, .y, fill = color, label = value) +
geom_tile(aes(width = .width, alpha = alpha), color = "white", size = 3) +
geom_text(aes(x = .x), hjust = 0.5, size = 12, family = "Fira Sans", color = "white") +
geom_text(aes(x = .x, color = .text_color, size = .text_size), hjust = 0.5, family = "Fira Sans") +
scale_fill_identity() +
scale_alpha_identity() +
scale_color_identity() +
scale_size_identity() +
coord_equal() +
ggtitle(title) +
theme_void() +

+ 98
- 0
R/tidyr_spread_gather.R Bestand weergeven

@@ -0,0 +1,98 @@
source(here::here("R", "00_base_tidyr.R"))

sg_wide <- wide %>%
proc_data("0-wide", colorize_wide_tidyr) %>%
mutate(frame = 1, .id = "0-wide")

sg_long <- wide %>%
tidyr::gather("key", "val", -id) %>%
proc_data("3-tall", color_fun = function(x, y) x) %>%
split(.$label)

sg_long$id <-
sg_wide %>%
filter(label == "id") %>%
select(value, color) %>%
left_join(sg_long$id, ., by = "value") %>%
mutate(alpha = 1)

sg_long$key <-
sg_wide %>%
filter(label != "id") %>%
select(label, color) %>%
left_join(sg_long$key, ., by = c("value" = "label")) %>%
distinct() %>%
mutate(alpha = 1)

sg_long$val <-
sg_wide %>%
filter(label != "id", .y < 0) %>%
select(value, color) %>%
left_join(sg_long$val, ., by = "value") %>%
mutate(alpha = 0.6)

sg_long <- bind_rows(sg_long) %>% mutate(frame = 2)

sg_long_labels <- data_frame(id = 1, a = "id", x = "key", y = "val") %>%
proc_data("4-label") %>%
filter(label != "id") %>%
mutate(color = "#FFFFFF", .y = 0, .x = .x -1, frame = 2, alpha = 1, label = recode(label, "a" = "id"))

sg_wide_labels <- data_frame(id = 1, a = "id") %>%
proc_data("2-label") %>%
filter(label != "id") %>%
mutate(color = "#FFFFFF", .y = 0, .x = .x -1, frame = 1, alpha = 1, label = recode(label, "a" = "id"))

sg_long_extra_keys <- map_dfr(
seq_len(nrow(wide) - 1),
~ filter(sg_wide, .y > -1) # Extra key blocks in long column
)

n_key_cols <- length(setdiff(colnames(wide), "id"))

sg_long_extra_id <- map_dfr(
seq_len(n_key_cols - 1),
~ filter(sg_wide, .x == 1) # Extra id column blocks for long column
)

sg_data <- bind_rows(
sg_wide,
sg_wide_labels,
sg_long,
sg_long_labels,
sg_long_extra_keys,
sg_long_extra_id
) %>%
mutate(
label = ifelse(value %in% setdiff(colnames(wide), "id"), "key", label),
label = ifelse(value %in% c("key", "val"), "zzz", label),
.text_color = ifelse(grepl("label", .id), "black", "white"),
.text_size = ifelse(grepl("label", .id), 8, 12)
) %>%
arrange(label, .id, value) %>%
mutate(frame = factor(frame, labels = c('spread(long, key, val)', 'gather(wide, key, val, x:z)'))) %>%
select(.x, .y, everything())

sg_static <-
sg_data %>%
split(.$frame) %>%
imap(~ plot_data(.x, .y) +
ylim(-6.5, 0.5) +
labs(subtitle = "returns") +
theme(plot.subtitle = element_text(family = "Fira Sans", size = 14, color = "grey50", hjust = 0.5, margin = margin(25)))
)

save_static_plot(cowplot::plot_grid(plotlist = sg_static, axis = 't'), "tidyr-spread-gather")
save_static_plot(sg_static[[1]], "tidyr-spread")
save_static_plot(sg_static[[2]], "tidyr-gather")

sg_anim <-
sg_data %>%
plot_data() %>%
animate_plot() +
view_follow() +
labs(title = "{ifelse(transitioning, next_state, ifelse(grepl('gather', next_state), 'long', 'wide'))}") +
ease_aes("sine-in-out", x = "exponential-out")

sg_anim <- animate(sg_anim)
anim_save(here::here("images", "tidyr-spread-gather.gif"), sg_anim)

+ 42
- 0
README.Rmd Bestand weergeven

@@ -33,6 +33,9 @@ Garrick Aden-Buie -- [&commat;grrrck](https://twitter.com/grrrck) -- [garrickade
- Filtering Joins: [`semi_join()`](#semi-join), [`anti_join()`](#anti-join)

- Set Operations: [`union()`](#union), [`union_all()`](#union-all), [`intersect()`](#intersect), [`setdiff()`](#setdiff)

- Tidy Data: [`spread()` and `gather()`](#spread-and-gather)

- Learn more about
- [Relational Data](#relational-data)
- [gganimate](#gganimate)
@@ -272,6 +275,45 @@ setdiff(x, y)
setdiff(y, x)
```

## Tidy Data

```{r tidyr-wide-long}
source("R/tidyr_spread_gather.R")

tidy_plots <- list()
tidy_plots$wide <- bind_rows(sg_wide, sg_wide_labels)
tidy_plots$long <- bind_rows(sg_long, sg_long_labels)

tidy_plots <- map(tidy_plots, ~ mutate(.,
.text_color = ifelse(grepl("id|key|val", value), "black", "white"),
.text_size = ifelse(grepl("id|key|val", value), 6, 10)
)) %>%
imap(~ plot_data(.x, .y))

tidy_plots$wide <- tidy_plots$wide + ylim(-6.5, 0.5)

save_static_plot(cowplot::plot_grid(plotlist = tidy_plots, axis = "t"), "original-dfs-tidy")
```

![](images/static/png/original-dfs-tidy.png)

### Spread and Gather

`spread(data, key, value)`

> Spread a key-value pair across multiple columns.

`gather(data, key = "key", value = "value", ...)`

> Gather takes multiple columns and collapses into key-value pairs, duplicating all other columns as needed. You use `gather()` when you notice that you have columns that are not variables.

![](images/tidyr-spread-gather.gif)

```{r echo=TRUE}
gather(wide, key, val, x:z)
spread(long, key, val)
```

## Learn More

### Relational Data

+ 39
- 0
README.md Bestand weergeven

@@ -22,6 +22,8 @@ Smith](https://github.com/TylerGrantSmith).
- Set Operations: [`union()`](#union), [`union_all()`](#union-all),
[`intersect()`](#intersect), [`setdiff()`](#setdiff)

- Tidy Data: [`spread()` and `gather()`](#spread-and-gather)

- Learn more about
- [Relational Data](#relational-data)
@@ -295,6 +297,43 @@ setdiff(y, x)
#> 1 2 b
```

## Tidy Data

![](images/static/png/original-dfs-tidy.png)

### Spread and Gather

`spread(data, key, value)`

> Spread a key-value pair across multiple columns.

`gather(data, key = "key", value = "value", ...)`

> Gather takes multiple columns and collapses into key-value pairs,
> duplicating all other columns as needed. You use `gather()` when you
> notice that you have columns that are not variables.

![](images/tidyr-spread-gather.gif)

``` r
gather(wide, key, val, x:z)
#> # A tibble: 6 x 3
#> id key val
#> <int> <chr> <chr>
#> 1 1 x a
#> 2 2 x b
#> 3 1 y c
#> 4 2 y d
#> 5 1 z e
#> 6 2 z f
spread(long, key, val)
#> # A tibble: 2 x 4
#> id x y z
#> <int> <chr> <chr> <chr>
#> 1 1 a c e
#> 2 2 b d f
```

## Learn More

### Relational Data

BIN
images/static/png/original-dfs-tidy.png Bestand weergeven

Before After
Width: 2099  |  Height: 1499  |  Size: 135KB

BIN
images/static/png/tidyr-gather.png Bestand weergeven

Before After
Width: 2099  |  Height: 1499  |  Size: 138KB

BIN
images/static/png/tidyr-spread-gather.png Bestand weergeven

Before After
Width: 2099  |  Height: 1499  |  Size: 165KB

BIN
images/static/png/tidyr-spread.png Bestand weergeven

Before After
Width: 2099  |  Height: 1499  |  Size: 114KB

+ 115
- 0
images/static/svg/original-dfs-tidy.svg
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 103
- 0
images/static/svg/tidyr-gather.svg
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 179
- 0
images/static/svg/tidyr-spread-gather.svg
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 91
- 0
images/static/svg/tidyr-spread.svg
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


BIN
images/tidyr-spread-gather.gif Bestand weergeven

Before After
Width: 480  |  Height: 480  |  Size: 978KB

+ 1
- 0
install.R Bestand weergeven

@@ -2,5 +2,6 @@ install.packages("tidyverse")
install.packages("rmarkdown")
install.packages("here")
install.packages(c("sysfonts", "jsonlite", "curl", "showtext"))
install.packages("cowplot")
install.packages("devtools")
devtools::install_github("thomasp85/gganimate")

Laden…
Annuleren
Opslaan