| @@ -5,8 +5,7 @@ Version: 0.1 | |||
| Date: 2018-08-19 | |||
| Authors@R: c( | |||
| person("Aden-Buie", "Garrick", , "g.adenbuie@gmail.com", c("aut", "cre")), | |||
| person("Zimmermann", "David", , "david_j_zimmermann@hotmail.com", "aut") | |||
| ) | |||
| person("Zimmermann", "David", , "david_j_zimmermann@hotmail.com", "aut")) | |||
| Maintainer: Name <email@email.com> | |||
| Description: This package allows to visualise the verbs of dplyr and tidyr | |||
| License: CC0-1.0 | |||
| @@ -2,11 +2,13 @@ | |||
| export(animate_anti_join) | |||
| export(animate_full_join) | |||
| export(animate_gather) | |||
| export(animate_inner_join) | |||
| export(animate_intersect) | |||
| export(animate_left_join) | |||
| export(animate_right_join) | |||
| export(animate_semi_join) | |||
| export(animate_setdiff) | |||
| export(animate_spread) | |||
| export(animate_union) | |||
| export(animate_union_all) | |||
| @@ -70,6 +70,7 @@ animate_set <- function(x, y, type, export = "gif", ...) { | |||
| #' @return either a gif or a ggplot | |||
| #' | |||
| #' @name animate_join_function | |||
| #' | |||
| #' @examples | |||
| #' NULL | |||
| animate_join <- function(x, y, by, type, export = "gif", ...) { | |||
| @@ -14,6 +14,7 @@ | |||
| #' @seealso \code{\link[dplyr]{setops}} | |||
| #' | |||
| #' @name animate_set | |||
| #' | |||
| #' @examples | |||
| #' x <- data_frame( | |||
| #' x = c(1, 1, 2), | |||
| @@ -0,0 +1,128 @@ | |||
| #' Animates the gather function | |||
| #' | |||
| #' @param w a data_frame in the wide format | |||
| #' @param key the key | |||
| #' @param value the value | |||
| #' @param ... further arguments passed to gather, static_plot, or animate_plot | |||
| #' @param export the export type, either gif, first or last. The latter two | |||
| #' export ggplots of the first/last state of the gather function | |||
| #' @param detailed boolean value if the animation should show one step for each | |||
| #' key value | |||
| #' | |||
| #' @return a gif or a ggplot | |||
| #' @export | |||
| #' | |||
| #' @examples | |||
| #' wide <- data_frame( | |||
| #' year = 2010:2011, | |||
| #' Alice = c(105, 110), | |||
| #' Bob = c(100, 97), | |||
| #' Charlie = c(90, 95) | |||
| #' ) | |||
| #' animate_gather(wide, "key", "value", -year, export = "first") | |||
| #' animate_gather(wide, "person", "sales", -year, export = "last") | |||
| #' | |||
| #' \donttest{ | |||
| #' animate_gather(wide, "person", "sales", -year, export = "gif") | |||
| #' # if you want to have a less detailed animation, you can also use | |||
| #' animate_gather(wide, "person", "sales", -year, export = "gif", detailed = FALSE) | |||
| #' } | |||
| animate_gather <- function(w, key, value, ..., export = "gif", detailed = TRUE) { | |||
| lhs <- w | |||
| rhs <- tidyr::gather(w, !!key, !!value, ...) | |||
| # construct the title sequence | |||
| lname <- deparse(substitute(w)) | |||
| ids <- get_quos_names(...) | |||
| # ids <- "" | |||
| # what happens if ids := -year or ids := x:y | |||
| ids <- ids[!ids %in% c(key, value)] | |||
| ids <- ids[ids != "-"] | |||
| id_string <- paste0(", ", paste(sprintf("-%s", ids), collapse = ", ")) | |||
| sequence <- c( | |||
| current_state = "Wide", | |||
| final_state = "Long", | |||
| operation = sprintf("gather(%s, %s, %s%s)", | |||
| lname, | |||
| dput_parser(key), | |||
| dput_parser(value), | |||
| id_string), | |||
| reverse_operation = sprintf("spread(%s, %s, %s)", | |||
| "long_df", | |||
| dput_parser(key), | |||
| dput_parser(value)) | |||
| ) | |||
| key_values <- rhs %>% pull(key) %>% unique() | |||
| lhs_proc <- process_wide(lhs, ids, key, key_values, value, ...) | |||
| rhs_proc <- process_long(rhs, ids, key, value, ...) | |||
| gather_spread(lhs_proc, rhs_proc, sequence = sequence, key_values = key_values, | |||
| export = export, detailed = detailed, ...) | |||
| } | |||
| #' Animates the spread function | |||
| #' | |||
| #' @param l a data_frame in the long/tidy format | |||
| #' @param key the key | |||
| #' @param value the values | |||
| #' @param export the export type, either gif, first or last. The latter two | |||
| #' export ggplots of the first/last state of the spread function | |||
| #' @param detailed boolean value if the animation should show one step for each | |||
| #' key value | |||
| #' @param ... further arguments passed to static_plot | |||
| #' | |||
| #' @return a ggplot or a gif | |||
| #' @export | |||
| #' | |||
| #' @examples | |||
| #' long <- data_frame( | |||
| #' year = c(2010L, 2011L, 2010L, 2011L, 2010L, 2011L), | |||
| #' person = c("Alice", "Alice", "Bob", "Bob", "Charlie", "Charlie"), | |||
| #' sales = c(105, 110, 100, 97, 90, 95) | |||
| #' ) | |||
| #' animate_spread(long, key = "person", value = "sales", export = "first") | |||
| #' animate_spread(long, key = "person", value = "sales", export = "last") | |||
| #' | |||
| #' \donttest{ | |||
| #' animate_spread(long, key = "person", value = "sales", export = "gif") | |||
| #' # if you want to have a less detailed animation, you can also use | |||
| #' animate_spread(long, key = "person", value = "sales", export = "gif", detailed = FALSE) | |||
| #' } | |||
| animate_spread <- function(l, key, value, export = "gif", detailed = TRUE, ...) { | |||
| lhs <- l | |||
| rhs <- tidyr::spread(l, key = key, value = value) | |||
| # construct the title sequence | |||
| lname <- deparse(substitute(l)) | |||
| ids <- names(lhs) | |||
| ids <- ids[!ids %in% c(key, value)] | |||
| id_string <- paste0(", ", paste(sprintf("-%s", ids), collapse = ", ")) | |||
| sequence <- c( | |||
| current_state = "Long", | |||
| final_state = "Wide", | |||
| operation = sprintf("spread(%s, %s, %s)", | |||
| "long_df", | |||
| dput_parser(key), | |||
| dput_parser(value)), | |||
| reverse_operation = sprintf("gather(%s, %s, %s%s)", | |||
| lname, | |||
| dput_parser(key), | |||
| dput_parser(value), | |||
| id_string) | |||
| ) | |||
| lhs_proc <- process_long(lhs, ids, key, value, ...) | |||
| rhs_proc <- process_wide(rhs, ids, key, value, ...) | |||
| key_values <- lhs %>% pull(key) %>% unique() | |||
| gather_spread(lhs_proc, rhs_proc, sequence, key_values, export, detailed, ...) | |||
| } | |||
| @@ -1,17 +1,22 @@ | |||
| set_text_color <- function(a) ifelse(apply(col2rgb(a), 2, mean) > 127, "black", "white") | |||
| #' Animates a plot | |||
| #' | |||
| #' @param d a preprocessed dataset | |||
| #' @param title the plot title | |||
| #' @param transition_length see transition_states | |||
| #' @param state_length see transition_states | |||
| #' @param ... further arguments passed to static_plot | |||
| #' | |||
| #' @return a gif | |||
| #' | |||
| #' @examples | |||
| #' NULL | |||
| animate_plot <- function(d, title = "", ...) { | |||
| animate_plot <- function(d, title = "", transition_length = 2, state_length = 1, ...) { | |||
| static_plot(d, title, ...) + | |||
| transition_states(.frame, 2, 1) + | |||
| transition_states(.frame, transition_length, state_length) + | |||
| enter_fade() + | |||
| exit_fade() + | |||
| ease_aes("sine-in-out") | |||
| @@ -33,18 +38,18 @@ animate_plot <- function(d, title = "", ...) { | |||
| #' @examples | |||
| #' NULL | |||
| static_plot <- function(d, title = "", | |||
| text_family = "Fira Sans", title_family = "Fira Mono", | |||
| text_size = 7, title_size = 25, ...) { | |||
| text_family = "Fira Sans", title_family = "Fira Mono", | |||
| text_size = 7, title_size = 25, ...) { | |||
| if (!".alpha" %in% names(d)) d <- d %>% mutate(.alpha = 1) | |||
| if (!".textcolor" %in% names(d)) | |||
| d <- d %>% mutate(.textcolor = set_text_color(.color)) | |||
| if (".col" %in% names(d)) { | |||
| if (".id_long" %in% names(d)) { | |||
| d <- d %>% mutate(.item_id = paste(.id_long, .col, sep = "-")) | |||
| } else { | |||
| # tidyr | |||
| d <- d %>% mutate(.item_id = .id_long) | |||
| d <- d %>% mutate(.item_id = .id) | |||
| } | |||
| ggplot(d, aes(x = .x, group = .item_id, y = .y, fill = .color, alpha = .alpha)) + | |||
| @@ -59,3 +64,4 @@ static_plot <- function(d, title = "", | |||
| theme_void() + | |||
| theme(plot.title = element_text(family = title_family, hjust = 0.5, size = title_size)) | |||
| } | |||
| @@ -146,4 +146,3 @@ add_color_join <- function(x, ids, by, | |||
| return(res) | |||
| } | |||
| set_text_color <- function(a) ifelse(mean(col2rgb(a)) > 127, "black", "white") | |||
| @@ -0,0 +1,298 @@ | |||
| #' Gets the ... names | |||
| #' | |||
| #' Used to get the -year | |||
| #' | |||
| #' @param ... arguments | |||
| #' | |||
| #' @return a vector of the names of ... | |||
| #' | |||
| #' @examples | |||
| #' x <- 1:10 | |||
| #' y <- 1 | |||
| #' get_quos_names(-x) | |||
| #' get_quos_names(x:y) | |||
| get_quos_names <- function(...) { | |||
| q <- quos(...) | |||
| sapply(q, function(i) as.character(i[[2]])) | |||
| } | |||
| #' Parses a simple vector so that it looks like its input | |||
| #' | |||
| #' @param x a vector | |||
| #' | |||
| #' @return a string | |||
| #' | |||
| #' @examples | |||
| #' dput_parser("x") | |||
| #' dput_parser(c("x", "y")) | |||
| dput_parser <- function(x) { | |||
| ifelse(length(x) == 1, | |||
| sprintf("'%s'", x), | |||
| paste0("c(", | |||
| paste(sprintf("'%s'", x), collapse = ", "), | |||
| ")")) | |||
| } | |||
| #' Adds color to processed tidy data | |||
| #' | |||
| #' @param x a processed data-frame as outputted by process_long or process_wide | |||
| #' @param key_values the unique key-values | |||
| #' @param color_fun the color function | |||
| #' @param color_header the color for the header | |||
| #' @param ... not used | |||
| #' | |||
| #' @return a data-frame with the colors | |||
| #' | |||
| #' @examples | |||
| #' NULL | |||
| add_color_tidyr <- function(x, key_values, | |||
| color_fun = scales::brewer_pal(type = "qual", "Set1"), | |||
| color_header = "#737373", | |||
| color_id = "#d0d0d0") { | |||
| color_dict <- color_fun(3) | |||
| names(color_dict) <- c("id", "key", "value") | |||
| x %>% mutate(.color = ifelse(.id_map == ".header" & !.val %in% key_values, | |||
| color_header, | |||
| color_dict[.type])) | |||
| } | |||
| #' Processes a wide dataframe and converts it into a dataset that can be plotted | |||
| #' | |||
| #' @param x a wide data frame | |||
| #' @param ids a vector of id-variables that are already in the tidy-format | |||
| #' @param key a vector of key-variables | |||
| #' @param color_id the color for the id-body | |||
| #' @param ... | |||
| #' | |||
| #' @return | |||
| #' | |||
| #' @examples | |||
| #' wide <- data_frame( | |||
| #' year = 2010:2011, | |||
| #' Alice = c(105, 110), | |||
| #' Bob = c(100, 97), | |||
| #' Charlie = c(90, 95) | |||
| #' ) | |||
| #' process_wide(wide, ids = "year", key = "person") | |||
| #' process_wide(wide, ids = "year", key = "person") %>% static_plot | |||
| process_wide <- function(x, ids, key, color_id = "lightgray", ...) { | |||
| if (!all(ids %in% names(x))) | |||
| stop("all ids must be in x") | |||
| nr <- nrow(x) | |||
| nc <- ncol(x) | |||
| key_values <- names(x) | |||
| key_values <- key_values[!key_values %in% ids] | |||
| id_values <- x %>% select(one_of(ids)) | |||
| id_values <- id_values %>% gather(key = ".key_map", value = ".id_map") | |||
| x <- x %>% mutate(.r = row_number()) %>% | |||
| unite(one_of(ids), col = ".id_map", remove = F) | |||
| x <- x %>% | |||
| gather(key = ".col", value = ".val", names(x) %>% str_subset("^[^\\.]")) %>% | |||
| mutate(.key_map = .col, | |||
| .type = ifelse(.col %in% ids, "id", "value"), | |||
| .val = as.character(.val), | |||
| .x = rep(1:nc, each = nr), | |||
| .y = -rep(1:nr, nc)) | |||
| # the .key_map == ids need to be redirected to the key-values and multiplied | |||
| ids_key_map <- tidyr::crossing(.key_map = key_values, .col = ids) | |||
| x <- bind_rows( | |||
| x %>% filter(!.key_map %in% ids), | |||
| x %>% filter(.key_map %in% ids) %>% select(-.key_map) %>% left_join(ids_key_map, by = ".col") | |||
| ) | |||
| # due to the untidiness of the wide-data, we need to treat the keys in th header | |||
| # specially | |||
| key_mapper <- tidyr::crossing(id_values %>% select(.id_map), | |||
| .key_map = key_values) %>% | |||
| mutate(.id_map = as.character(.id_map)) | |||
| key_frame <- data_frame(.r = 0, .col = key_values, | |||
| .val = key_values, .x = 1:length(key_values) + length(ids), | |||
| .y = 0, .type = "key", .key_map = key_values) %>% | |||
| left_join(key_mapper, by = ".key_map") | |||
| # add headers | |||
| x <- x %>% bind_rows( | |||
| data_frame(.id_map = ".header", .r = 0, .col = ids, .val = ids, | |||
| .x = 1:length(ids), .y = 0, .type = "id", .key_map = key_values), | |||
| key_frame, | |||
| . | |||
| ) %>% | |||
| unite(.id_map, .key_map, .val, col = ".id", remove = F) | |||
| x %>% add_color_tidyr(key_values = key_values) %>% | |||
| mutate(.alpha = ifelse(.id_map == ".header", 1, 0.6)) | |||
| } | |||
| #' Processes a long dataframe and converts it into a dataset that can be plotted | |||
| #' | |||
| #' @param x a long data frame | |||
| #' @param ids a vector of id-variables that are already in the tidy-format | |||
| #' @param key a vector of key-variables | |||
| #' @param ... | |||
| #' | |||
| #' @return | |||
| #' | |||
| #' @examples | |||
| #' long <- data_frame( | |||
| #' year = c(2010L, 2011L, 2010L, 2011L, 2010L, 2011L), | |||
| #' person = c("Alice", "Alice", "Bob", "Bob", "Charlie", "Charlie"), | |||
| #' sales = c(105, 110, 100, 97, 90, 95) | |||
| #' ) | |||
| #' process_long(long, ids = "year", key = "person", value = "sales") | |||
| #' process_long(long, ids = "year", key = "person", value = "sales") %>% static_plot | |||
| process_long <- function(x, ids, key, value, ...) { | |||
| if (!all(c(ids, key, value) %in% names(x))) | |||
| stop("all ids, key, and value must be names of x") | |||
| nr <- nrow(x) | |||
| nc <- ncol(x) | |||
| x <- x %>% mutate(.r = row_number()) %>% | |||
| unite(ids, col = ".id_map", remove = F) %>% | |||
| unite(key, col = ".key_map", remove = F) | |||
| key_values <- x %>% pull(key) %>% unique() | |||
| type_dict <- c(rep("id", length(ids)), rep("key", length(key)), rep("value", length(value))) | |||
| names(type_dict) <- c(rep(ids, length(ids)), rep(key, length(key)), rep(value, length(value))) | |||
| x <- x %>% | |||
| gather(key = ".col", value = ".val", names(x) %>% str_subset("^[^\\.]")) %>% | |||
| mutate( | |||
| .x = rep(1:nc, each = nr), | |||
| .y = -rep(1:nr, nc), | |||
| .type = type_dict[.col], | |||
| .val = as.character(.val) | |||
| ) %>% | |||
| bind_rows( | |||
| tidyr::crossing(.id_map = ".header", .r = 0, .col = ids, .val = ids, | |||
| .x = 1:length(ids), .y = 0, .type = "id", | |||
| .key_map = key_values), | |||
| data_frame(.id_map = ".header", .r = 0, .col = key, .val = key, | |||
| .x = 1 + 1:length(key), .y = 0, .type = "key", | |||
| .key_map = key_values), | |||
| data_frame(.id_map = ".header", .r = 0, .col = value, .val = value, | |||
| .x = 1 + length(key) + 1:length(value), .y = 0, .type = "value", | |||
| .key_map = "value"), | |||
| . | |||
| ) %>% | |||
| unite(.id_map, .key_map, .val, col = ".id", remove = F) | |||
| x %>% add_color_tidyr(key_values = key_values) %>% | |||
| mutate(.alpha = ifelse(.id_map == ".header", 1, 0.6)) | |||
| } | |||
| #' Animates a gather or spread function | |||
| #' | |||
| #' internally used by animate_spread and animate_gather | |||
| #' | |||
| #' @param lhs the (processed) dataset on the left-side | |||
| #' @param rhs the (processed) dataset on the right-side | |||
| #' @param sequence a named vector of the sequence titles | |||
| #' (current_state, final_state, operation, and reverse_operation) | |||
| #' @param key_values the unique key-values | |||
| #' @param export the export type, either gif, first or last. The latter two | |||
| #' export ggplots of the first/last state of the join | |||
| #' @param detailed boolean value if the animation should show one step for each | |||
| #' key value | |||
| #' @param ... further arguments passed to animate_plot | |||
| #' | |||
| #' @return the plot or the gif | |||
| #' | |||
| #' @examples | |||
| #' NULL | |||
| gather_spread <- function(lhs, rhs, sequence, key_values, export, detailed, ...) { | |||
| # lhs is the one state of the df | |||
| # rhs is the target state | |||
| # animate the four steps: inital with sequence[["current_state]], | |||
| # transformations by the unique key-values with sequence[["operation"]], | |||
| # final with sequence[["final_state"]] | |||
| # and back transformation with sequence[["reverse_operation]] | |||
| # have lhs and rhs in the right format: preprocessed with ids, .x, .y etc. | |||
| # have a color function that makes coloring easier | |||
| # transformations: for each key-variable: respective ids "fly in", keys fly in and ids fly in (all in one step for one key. i.e., Alice) | |||
| # how much is the rhs to the left of lhs? | |||
| xshift <- 2 | |||
| state_start <- lhs %>% mutate(.frame = 0) | |||
| step_0 <- lhs %>% mutate(.frame = 1) | |||
| state_end <- rhs %>% mutate(.frame = length(key_values) + 2, .x = .x + max(lhs$.x) + xshift) | |||
| if (detailed) { | |||
| # take one instance of the first headers | |||
| start_headers <- lhs %>% filter(.id_map == ".header" & !.val %in% key_values) %>% | |||
| group_by(.col, .val) %>% slice(1) %>% ungroup() | |||
| end_headers <- state_end %>% filter(.id_map == ".header") | |||
| # for each unique key-value move the respective entries | |||
| keys_to_shift <- lhs %>% filter(.key_map %in% key_values) | |||
| keys_shifted <- lhs[0, ] | |||
| key_steps <- lhs[0, ] | |||
| i <- 1 | |||
| for (keyval in key_values) { | |||
| i <- i + 1 | |||
| keys_shifted <- bind_rows(keys_shifted, filter(state_end, .key_map == keyval)) | |||
| keys_to_shift <- keys_to_shift %>% filter(.key_map != keyval) | |||
| if (keyval == key_values[length(key_values)]) { | |||
| # in the last round, we dont want to save the start headers | |||
| start_headers <- NULL | |||
| } | |||
| round_n <- bind_rows(end_headers, start_headers, | |||
| keys_shifted, keys_to_shift) %>% mutate(.frame = i) | |||
| key_steps <- bind_rows(key_steps, round_n) | |||
| } | |||
| anim_df <- bind_rows(state_start, step_0, key_steps, state_end) | |||
| # form the .frame as proper factors | |||
| frame_labels <- c( | |||
| sequence[["current_state"]], | |||
| paste(sequence[["operation"]], key_values), | |||
| sequence[["final_state"]], | |||
| sequence[["reverse_operation"]] | |||
| ) | |||
| title_string <- "{gsub('\\\\) [a-zA-Z]+$', ')', previous_state)}" | |||
| } else { | |||
| anim_df <- bind_rows(state_start, state_end) | |||
| frame_labels <- c( | |||
| sequence[["operation"]], | |||
| sequence[["reverse_operation"]] | |||
| ) | |||
| title_string <- "{ifelse(transitioning, previous_state, ifelse(grepl('gather', next_state), 'Wide', 'Long'))}" | |||
| } | |||
| frame_levels <- anim_df$.frame %>% unique() | |||
| anim_df <- anim_df %>% | |||
| mutate(.frame = factor(.frame, | |||
| levels = frame_levels, | |||
| labels = frame_labels)) | |||
| if (export == "gif") { | |||
| animate_plot(anim_df, title = title_string) #... | |||
| } else if (export == "first") { | |||
| static_plot(state_start) #.... | |||
| } else if (export == "last") { | |||
| static_plot(state_end) #.... | |||
| } | |||
| # open issues: ... doesnt work properly. | |||
| # especially if the id-arguments are passed in the gather-style, i.e., -year, or year:var | |||
| } | |||
| @@ -8,7 +8,7 @@ output: github_document | |||
| knitr::opts_chunk$set( | |||
| collapse = TRUE, | |||
| comment = "#>", | |||
| echo = FALSE, | |||
| echo = TRUE, | |||
| warning = FALSE, | |||
| message = FALSE, | |||
| cache = TRUE | |||
| @@ -29,15 +29,18 @@ Garrick Aden-Buie -- [@grrrck](https://twitter.com/grrrck) -- [garrickade | |||
| [_-MIT-green.svg)](https://opensource.org/licenses/MIT) | |||
| - Mutating Joins: [`inner_join()`](#inner-join), [`left_join()`](#left-join), | |||
| [`right_join()`](#right-join), [`full_join()`](#full-join) | |||
| [`right_join()`](#right-join), [`full_join()`](#full-join) | |||
| - Filtering Joins: [`semi_join()`](#semi-join), [`anti_join()`](#anti-join) | |||
| - Set Operations: [`union()`](#union), [`union_all()`](#union-all), [`intersect()`](#intersect), [`setdiff()`](#setdiff) | |||
| - Tidyr Operations: [`gather()`](#gather), [`spread()`](#spread) | |||
| - Learn more about | |||
| - [Relational Data](#relational-data) | |||
| - [gganimate](#gganimate) | |||
| - [Relational Data](#relational-data) | |||
| - [gganimate](#gganimate) | |||
| Please feel free to use these images for teaching or learning about action verbs from the [tidyverse](https://tidyverse.org). | |||
| You can directly download the [original animations](images/) or static images in [svg](images/static/svg/) or [png](images/static/png/) formats, or you can use the [scripts](R/) to recreate the images locally. | |||
| @@ -174,18 +177,13 @@ anti_join(x, y, by = "id") | |||
| ## Set Operations | |||
| ```{r intial-dfs-so, echo=T} | |||
| x <- tibble::tribble( | |||
| ~x, ~y, | |||
| "1", "a", | |||
| "1", "b", | |||
| "2", "a" | |||
| x <- data_frame( | |||
| x = c(1, 1, 2), | |||
| y = c("a", "b", "a") | |||
| ) | |||
| y <- tibble::tribble( | |||
| ~x, ~y, | |||
| "1", "a", | |||
| "2", "b" | |||
| y <- data_frame( | |||
| x = c(1, 2), | |||
| y = c("a", "b") | |||
| ) | |||
| animate_union(x, y, export = "first") | |||
| @@ -266,6 +264,65 @@ animate_setdiff(y, x) | |||
| setdiff(y, x) | |||
| ``` | |||
| ## Tidy Data and `gather()`, `spread()` functionality | |||
| [Tidy data](http://r4ds.had.co.nz/tidy-data.html#tidy-data-1) follows | |||
| the following three rules: | |||
| 1. Each variable has its own column. | |||
| 2. Each observation has its own row. | |||
| 3. Each value has its own cell. | |||
| Many of the tools in the [tidyverse](https://tidyverse.org) expect data | |||
| to be formatted as a tidy dataset and the | |||
| [tidyr](https://tidyr.tidyverse.org) package provides functions to help | |||
| you organize your data into tidy data. | |||
| ```{r} | |||
| long <- data_frame( | |||
| year = c(2010, 2011, 2010, 2011, 2010, 2011), | |||
| person = c("Alice", "Alice", "Bob", "Bob", "Charlie", "Charlie"), | |||
| sales = c(105, 110, 100, 97, 90, 95) | |||
| ) | |||
| wide <- data_frame( | |||
| year = 2010:2011, | |||
| Alice = c(105, 110), | |||
| Bob = c(100, 97), | |||
| Charlie = c(90, 95) | |||
| ) | |||
| ``` | |||
| ### Gather | |||
| > Gather takes multiple columns and collapses into key-value pairs, duplicating all other columns as needed. You use gather() when you notice that your column names are not names of variables, but values of a variable. | |||
| ```{r} | |||
| animate_gather(wide, key = "person", value = "sales", -year) | |||
| ``` | |||
| ```{r} | |||
| gather(wide, key = "person", value = "sales", -year) | |||
| ``` | |||
| ### Spread | |||
| > Spread a key-value pair across multiple columns. Use it when an a column contains observations from multiple variables. | |||
| ```{r} | |||
| animate_spread(long, key = "person", value = "sales") | |||
| ``` | |||
| ```{r} | |||
| spread(long, key = "person", value = "sales") | |||
| ``` | |||
| ## Learn More | |||
| ### Relational Data | |||
| @@ -22,10 +22,13 @@ Smith](https://github.com/TylerGrantSmith). | |||
| - Set Operations: [`union()`](#union), [`union_all()`](#union-all), | |||
| [`intersect()`](#intersect), [`setdiff()`](#setdiff) | |||
| - Tidyr Operations: [`gather()`](#gather), [`spread()`](#spread) | |||
| - Learn more about | |||
| - [Relational Data](#relational-data) | |||
| - [gganimate](#gganimate) | |||
| - [Relational Data](#relational-data) | |||
| - [gganimate](#gganimate) | |||
| Please feel free to use these images for teaching or learning about | |||
| action verbs from the [tidyverse](https://tidyverse.org). You can | |||
| @@ -393,6 +396,68 @@ setdiff(y, x) | |||
| #> 1 2 b | |||
| ``` | |||
| ## Tidy Data and `gather()`, `spread()` functionality | |||
| ``` r | |||
| long <- data_frame( | |||
| year = c(2010, 2011, 2010, 2011, 2010, 2011), | |||
| person = c("Alice", "Alice", "Bob", "Bob", "Charlie", "Charlie"), | |||
| sales = c(105, 110, 100, 97, 90, 95) | |||
| ) | |||
| wide <- data_frame( | |||
| year = 2010:2011, | |||
| Alice = c(105, 110), | |||
| Bob = c(100, 97), | |||
| Charlie = c(90, 95) | |||
| ) | |||
| ``` | |||
| ### Gather | |||
| > Gather takes multiple columns and collapses into key-value pairs, | |||
| > duplicating all other columns as needed. You use gather() when you | |||
| > notice that your column names are not names of variables, but values | |||
| > of a variable. | |||
| ``` r | |||
| animate_gather(wide, key = "person", value = "sales", -year) | |||
| ``` | |||
| <!-- --> | |||
| ``` r | |||
| gather(wide, key = "person", value = "sales", -year) | |||
| #> # A tibble: 6 x 3 | |||
| #> year person sales | |||
| #> <int> <chr> <dbl> | |||
| #> 1 2010 Alice 105 | |||
| #> 2 2011 Alice 110 | |||
| #> 3 2010 Bob 100 | |||
| #> 4 2011 Bob 97 | |||
| #> 5 2010 Charlie 90 | |||
| #> 6 2011 Charlie 95 | |||
| ``` | |||
| ### Spread | |||
| > Spread a key-value pair across multiple columns. Use it when an a | |||
| > column contains observations from multiple variables. | |||
| ``` r | |||
| animate_spread(long, key = "person", value = "sales") | |||
| ``` | |||
| <!-- --> | |||
| ``` r | |||
| spread(long, key = "person", value = "sales") | |||
| #> # A tibble: 2 x 4 | |||
| #> year Alice Bob Charlie | |||
| #> <dbl> <dbl> <dbl> <dbl> | |||
| #> 1 2010 105 100 90 | |||
| #> 2 2011 110 97 95 | |||
| ``` | |||
| ## Learn More | |||
| ### Relational Data | |||
| @@ -0,0 +1,29 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/tidyr_helpers.R | |||
| \name{add_color_tidyr} | |||
| \alias{add_color_tidyr} | |||
| \title{Adds color to processed tidy data} | |||
| \usage{ | |||
| add_color_tidyr(x, key_values, color_fun = scales::brewer_pal(type = | |||
| "qual", "Set1"), color_header = "darkgray") | |||
| } | |||
| \arguments{ | |||
| \item{x}{a processed data-frame as outputted by process_long or process_wide} | |||
| \item{key_values}{the unique key-values} | |||
| \item{color_fun}{the color function} | |||
| \item{color_header}{the color for the header} | |||
| \item{...}{not used} | |||
| } | |||
| \value{ | |||
| a data-frame with the colors | |||
| } | |||
| \description{ | |||
| Adds color to processed tidy data | |||
| } | |||
| \examples{ | |||
| NULL | |||
| } | |||
| @@ -0,0 +1,45 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/animate_tidyr.R | |||
| \name{animate_gather} | |||
| \alias{animate_gather} | |||
| \title{Animates the gather function} | |||
| \usage{ | |||
| animate_gather(w, key, value, ..., export = "gif", detailed = TRUE) | |||
| } | |||
| \arguments{ | |||
| \item{w}{a data_frame in the wide format} | |||
| \item{key}{the key} | |||
| \item{value}{the value} | |||
| \item{...}{further arguments passed to gather, static_plot, or animate_plot} | |||
| \item{export}{the export type, either gif, first or last. The latter two | |||
| export ggplots of the first/last state of the gather function} | |||
| \item{detailed}{boolean value if the animation should show one step for each | |||
| key value} | |||
| } | |||
| \value{ | |||
| a gif or a ggplot | |||
| } | |||
| \description{ | |||
| Animates the gather function | |||
| } | |||
| \examples{ | |||
| wide <- data_frame( | |||
| year = 2010:2011, | |||
| Alice = c(105, 110), | |||
| Bob = c(100, 97), | |||
| Charlie = c(90, 95) | |||
| ) | |||
| animate_gather(wide, "key", "value", -year, export = "first") | |||
| animate_gather(wide, "person", "sales", -year, export = "last") | |||
| \donttest{ | |||
| animate_gather(wide, "person", "sales", -year, export = "gif") | |||
| # if you want to have a less detailed animation, you can also use | |||
| animate_gather(wide, "person", "sales", -year, export = "gif", detailed = FALSE) | |||
| } | |||
| } | |||
| @@ -4,13 +4,18 @@ | |||
| \alias{animate_plot} | |||
| \title{Animates a plot} | |||
| \usage{ | |||
| animate_plot(d, title = "", ...) | |||
| animate_plot(d, title = "", transition_length = 2, state_length = 1, | |||
| ...) | |||
| } | |||
| \arguments{ | |||
| \item{d}{a preprocessed dataset} | |||
| \item{title}{the plot title} | |||
| \item{transition_length}{see transition_states} | |||
| \item{state_length}{see transition_states} | |||
| \item{...}{further arguments passed to static_plot} | |||
| } | |||
| \value{ | |||
| @@ -0,0 +1,44 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/animate_tidyr.R | |||
| \name{animate_spread} | |||
| \alias{animate_spread} | |||
| \title{Animates the spread function} | |||
| \usage{ | |||
| animate_spread(l, key, value, export = "gif", detailed = TRUE, ...) | |||
| } | |||
| \arguments{ | |||
| \item{l}{a data_frame in the long/tidy format} | |||
| \item{key}{the key} | |||
| \item{value}{the values} | |||
| \item{export}{the export type, either gif, first or last. The latter two | |||
| export ggplots of the first/last state of the spread function} | |||
| \item{detailed}{boolean value if the animation should show one step for each | |||
| key value} | |||
| \item{...}{further arguments passed to static_plot} | |||
| } | |||
| \value{ | |||
| a ggplot or a gif | |||
| } | |||
| \description{ | |||
| Animates the spread function | |||
| } | |||
| \examples{ | |||
| long <- data_frame( | |||
| year = c(2010L, 2011L, 2010L, 2011L, 2010L, 2011L), | |||
| person = c("Alice", "Alice", "Bob", "Bob", "Charlie", "Charlie"), | |||
| sales = c(105, 110, 100, 97, 90, 95) | |||
| ) | |||
| animate_spread(long, key = "person", value = "sales", export = "first") | |||
| animate_spread(long, key = "person", value = "sales", export = "last") | |||
| \donttest{ | |||
| animate_spread(long, key = "person", value = "sales", export = "gif") | |||
| # if you want to have a less detailed animation, you can also use | |||
| animate_spread(long, key = "person", value = "sales", export = "gif", detailed = FALSE) | |||
| } | |||
| } | |||
| @@ -0,0 +1,21 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/tidyr_helpers.R | |||
| \name{dput_parser} | |||
| \alias{dput_parser} | |||
| \title{Parses a simple vector so that it looks like its input} | |||
| \usage{ | |||
| dput_parser(x) | |||
| } | |||
| \arguments{ | |||
| \item{x}{a vector} | |||
| } | |||
| \value{ | |||
| a string | |||
| } | |||
| \description{ | |||
| Parses a simple vector so that it looks like its input | |||
| } | |||
| \examples{ | |||
| dput_parser("x") | |||
| dput_parser(c("x", "y")) | |||
| } | |||
| @@ -0,0 +1,35 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/tidyr_helpers.R | |||
| \name{gather_spread} | |||
| \alias{gather_spread} | |||
| \title{Animates a gather or spread function} | |||
| \usage{ | |||
| gather_spread(lhs, rhs, sequence, key_values, export, detailed, ...) | |||
| } | |||
| \arguments{ | |||
| \item{lhs}{the (processed) dataset on the left-side} | |||
| \item{rhs}{the (processed) dataset on the right-side} | |||
| \item{sequence}{a named vector of the sequence titles | |||
| (current_state, final_state, operation, and reverse_operation)} | |||
| \item{key_values}{the unique key-values} | |||
| \item{export}{the export type, either gif, first or last. The latter two | |||
| export ggplots of the first/last state of the join} | |||
| \item{detailed}{boolean value if the animation should show one step for each | |||
| key value} | |||
| \item{...}{further arguments passed to animate_plot} | |||
| } | |||
| \value{ | |||
| the plot or the gif | |||
| } | |||
| \description{ | |||
| internally used by animate_spread and animate_gather | |||
| } | |||
| \examples{ | |||
| NULL | |||
| } | |||
| @@ -0,0 +1,23 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/tidyr_helpers.R | |||
| \name{get_quos_names} | |||
| \alias{get_quos_names} | |||
| \title{Gets the ... names} | |||
| \usage{ | |||
| get_quos_names(...) | |||
| } | |||
| \arguments{ | |||
| \item{...}{arguments} | |||
| } | |||
| \value{ | |||
| a vector of the names of ... | |||
| } | |||
| \description{ | |||
| Used to get the -year | |||
| } | |||
| \examples{ | |||
| x <- 1:10 | |||
| y <- 1 | |||
| get_quos_names(-x) | |||
| get_quos_names(x:y) | |||
| } | |||
| @@ -0,0 +1,34 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/tidyr_helpers.R | |||
| \name{process_long} | |||
| \alias{process_long} | |||
| \title{Processes a long dataframe and converts it into a dataset that can be plotted} | |||
| \usage{ | |||
| process_long(x, ids, key, value, color_id = "lightgray", ...) | |||
| } | |||
| \arguments{ | |||
| \item{x}{a long data frame} | |||
| \item{ids}{a vector of id-variables that are already in the tidy-format} | |||
| \item{key}{a vector of key-variables} | |||
| \item{color_id}{the color for the id-body} | |||
| \item{...}{} | |||
| } | |||
| \value{ | |||
| } | |||
| \description{ | |||
| Processes a long dataframe and converts it into a dataset that can be plotted | |||
| } | |||
| \examples{ | |||
| long <- data_frame( | |||
| year = c(2010L, 2011L, 2010L, 2011L, 2010L, 2011L), | |||
| person = c("Alice", "Alice", "Bob", "Bob", "Charlie", "Charlie"), | |||
| sales = c(105, 110, 100, 97, 90, 95) | |||
| ) | |||
| process_long(long, ids = "year", key = "person", value = "sales") | |||
| process_long(long, ids = "year", key = "person", value = "sales") \%>\% static_plot | |||
| } | |||
| @@ -0,0 +1,35 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/tidyr_helpers.R | |||
| \name{process_wide} | |||
| \alias{process_wide} | |||
| \title{Processes a wide dataframe and converts it into a dataset that can be plotted} | |||
| \usage{ | |||
| process_wide(x, ids, key, color_id = "lightgray", ...) | |||
| } | |||
| \arguments{ | |||
| \item{x}{a wide data frame} | |||
| \item{ids}{a vector of id-variables that are already in the tidy-format} | |||
| \item{key}{a vector of key-variables} | |||
| \item{color_id}{the color for the id-body} | |||
| \item{...}{} | |||
| } | |||
| \value{ | |||
| } | |||
| \description{ | |||
| Processes a wide dataframe and converts it into a dataset that can be plotted | |||
| } | |||
| \examples{ | |||
| wide <- data_frame( | |||
| year = 2010:2011, | |||
| Alice = c(105, 110), | |||
| Bob = c(100, 97), | |||
| Charlie = c(90, 95) | |||
| ) | |||
| process_wide(wide, ids = "year", key = "person") | |||
| process_wide(wide, ids = "year", key = "person") \%>\% static_plot | |||
| } | |||