#' Preprocess data #' #' @param x a left dataset #' @param y a right dataset #' @param by a by argument for joins / set operations #' @param fill if missing ids should be filled #' @param ... further arguments passed to add_color #' @param ao anim_options() #' #' @return a preprocessed dataset #' #' @examples #' NULL process_join <- function(x, y, by, fill = TRUE, ..., ao = anim_options(...)) { #' test for #' a <- c("unique", "mult", "mult", "also unique") #' add_duplicate_number(a) add_duplicate_number <- function(a) { data_frame(v = a) %>% group_by(v) %>% mutate(id = paste(v, 1:dplyr::n(), sep = "-")) %>% pull(id) } x <- x %>% tidyr::unite(dplyr::one_of(by), col = ".id", remove = FALSE) %>% mutate(.id_long = add_duplicate_number(.id)) y <- y %>% tidyr::unite(dplyr::one_of(by), col = ".id", remove = FALSE) %>% mutate(.id_long = add_duplicate_number(.id)) ids <- dplyr::union(x %>% dplyr::select(.id, .id_long), y %>% dplyr::select(.id, .id_long)) x_ <- process_data_join(x, ids, by, fill = fill, ao = ao) y_ <- process_data_join(y, ids, by, fill = fill, ao = ao) %>% mutate(.x = .x + ncol(x) - 1) list(x = x_, y = y_) } #' Processes the data #' #' @param x a preprocessed dataset #' @param ids a data_frame of ids (.id and .id_long) #' @param by a vector of by-arguments #' @param width the width of the tiles #' @param side the side (x or y, lhs or rhs, etc) #' @param fill if missing ids should be filled #' @param ... further arguments passed to add_color #' @param ao anim_options #' #' @return a data_frame including all necessary information #' #' @examples #' NULL process_data_join <- function(x, ids, by, width = 1, side = NA, fill = TRUE, ..., ao = anim_options(...)) { if (is.na(side)) side <- deparse(substitute(x)) x_names <- names(x)[grepl("^[^\\.]", names(x))] x_keys <- 1:length(x_names) names(x_keys) <- x_names special_vars <- names(x)[grepl("^\\.", names(x))] x <- x %>% mutate(.r = row_number()) %>% tidyr::gather_(key = ".col", value = ".val", names(x)[grepl("^[^.]", names(x))]) %>% mutate(.x = x_keys[.col], .y = -.r) %>% bind_rows(data_frame(.id = ".header", .id_long = paste(".header", x_names, sep = "_"), .r = 0, .col = x_names, .val = x_names, .x = x_keys, .y = 0), .) %>% mutate(.width = width, .side = side) # if there are multiple values in the ids (-2, -3 etc) but they are not present # in x, because it is in the second/other dataset, add these values here id_long <- ids$.id_long mis_ids <- id_long[!id_long %in% x$.id_long] # if the missing value is a -1, that means the missing value comes not from # missing dublicate ids mis_ids <- mis_ids[grepl("[^-1]$", mis_ids)] if (length(mis_ids) > 0 && fill) { mis_ids_short <- gsub("-[0-9]+$", "", mis_ids) # insert the missing ids at the right place for (i in mis_ids_short) { irow <- (1:nrow(x))[x$.id == i] irow <- irow[1] x <- bind_rows( x %>% slice(1:irow), x %>% filter(.id %in% mis_ids_short) %>% mutate(.id_long = mis_ids), x %>% slice((irow + 1):nrow(x)) ) } } add_color_join(x, rev(ids$.id), by, ao) } #' Adds Color to a processed data_frame #' #' @param x a processed data_frame #' @param ids a vector of ids for the color-matching #' @param by a vector of column names that constitute the by-argument of joins/sets #' @param color_header color for the header #' @param color_other color for "inactive" values #' @param color_missing color for missing values #' @param color_fun the function to generate the colors #' @param text_color the color for the text inside the tiles, #' defaults to white/black depending on tile color #' @param ... #' #' @return the processed data_frame with a new column .color #' #' @examples #' NULL add_color_join <- function(x, ids, by, ao, ...) { color_header <- ao$color_header %||% get_anim_opt("color_header") color_other <- ao$color_other %||% get_anim_opt("color_other") color_missing <- ao$color_missing %||% get_anim_opt("color_missing") color_fun <- ao$color_fun %||% get_anim_opt("color_fun") text_color <- ao$text_color %||% get_anim_opt("text_color") colors <- c(color_header, color_fun(length(ids))) names(colors) <- c(".header", ids) res <- x %>% mutate( .color = ifelse(is.na(.val), color_missing, ifelse(.col %in% by, colors[.id], color_other)), .color = ifelse(.id == ".header", color_header, .color), .textcolor = text_color) if (is.na(text_color)) res <- res %>% mutate(.textcolor = choose_text_color(.color)) return(res) }