8 лет назад · a29a10f3ae
--- a/R/animate_helpers.R
+++ b/R/animate_helpers.R
                   deparse(substitute(x)),
                   deparse(substitute(y)))
  ll <- preprocess_data(x, y, by = names(x))
  if (type %in% c("union", "intersect", "setdiff")) {
    x <- dplyr::distinct(x)
    y <- dplyr::distinct(y)
  }
  if (type == "union_all") {
    ll <- preprocess_data(x, y, by = names(x), fill = FALSE)
    ll <- lapply(ll, function(a)
      a %>% mutate(.id_long = paste(.id_long, .side, sep = "-"))
    )
  } else {
    ll <- preprocess_data(x, y, by = names(x))
  }
  step0 <- bind_rows(ll$x, ll$y) %>% mutate(.frame = 0, .alpha = 1)
  if (!export %in% c("gif", "first", "last"))
    stop("export must be either gif, first, or last")
  title <- sprintf(paste0(type, "(%s, %s, by = c(\"%s\"))"),
  by_args <- ifelse(length(by) == 1,
                    sprintf("\"%s\"", by),
                    sprintf("c(\"%s\")", paste(by, collapse = "\", \""))
                    )
  title <- sprintf(paste0(type, "(%s, %s, by = %s)"),
                   deparse(substitute(x)),
                   deparse(substitute(y)),
                   paste(by, collapse = "\", \""))
                   by_args)
  if (type %in% c("semi_join", "anti_join")) {
    # for semi and anti_joins, there is no adding of multiple rows
    y <- dplyr::distinct(y)
  }
  ll <- preprocess_data(x, y, by)
--- a/R/move_together.R
+++ b/R/move_together.R
 #' NULL
 move_together <- function(lhs, rhs, type) {
  all_ids <- bind_rows(lhs, rhs) %>% distinct(.id)
  all <- bind_rows(lhs, rhs)
  x_cols <- lhs %>% distinct(col)
  y_cols <- rhs %>% distinct(col)
  # separate column and row-filter (ids)
  x_cols <- lhs %>% distinct(.col)
  y_cols <- rhs %>% distinct(.col)
  # separate header columns from ids and treat them as columns
  x_ids <- lhs %>% distinct(.id, .id_long)
  y_ids <- rhs %>% distinct(.id, .id_long)
  x_ids <- lhs %>% distinct(.id)
  y_ids <- rhs %>% distinct(.id)
  x_headers <- x_ids %>% filter(str_detect(.id_long, "^\\.header"))
  y_headers <- y_ids %>% filter(str_detect(.id_long, "^\\.header"))
  x_ids <- x_ids %>% filter(!str_detect(.id_long, "^\\.header"))
  y_ids <- y_ids %>% filter(!str_detect(.id_long, "^\\.header"))
  if (type == "full_join") {
    col_combiner <- dplyr::full_join
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::right_join
  } else if (type == "semi_join") {
    col_combiner <- dplyr::semi_join
    col_combiner <- dplyr::left_join
    row_combiner <- dplyr::semi_join
  } else if (type == "anti_join") {
    col_combiner <- dplyr::semi_join
  } else if (type == "union_all") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::union_all
    x_ids <- lhs %>% distinct(.id = .id_long)
    y_ids <- rhs %>% distinct(.id = .id_long)
    all <- all %>% rename(id_old = .id, .id = .id_long)
    # all <- all %>% rename(.id = .id_long)
  } else if (type == "intersect") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::intersect
    stop("Unknown func")
  }
  take_cols <- col_combiner(x_cols, y_cols, by = "col")
  take_ids  <- row_combiner(x_ids, y_ids, by = ".id")
  # make sure .header is always the first
  id_number <- which(str_detect(take_ids$.id, "^.header"))
  if (length(id_number) != 0)
    take_ids <- take_ids[c(id_number, (1:nrow(take_ids))[-id_number]), ]
  if (!any(str_detect(take_ids$.id, "^.header")))
    take_ids <- bind_rows(data_frame(.id = ".header"), take_ids)
  take_cols <- col_combiner(x_cols, y_cols, by = ".col")
  take_ids  <- row_combiner(x_ids, y_ids, by = c(".id", ".id_long"))
  take_headers <- col_combiner(x_headers, y_headers, by = c(".id", ".id_long"))
  take_ids <- bind_rows(take_headers, take_ids)
  take <- tidyr::crossing(take_ids, take_cols)
  mid <- (2 + length(unique(lhs$col)) + length(unique(rhs$col))) / 2
  mid <- (2 + length(unique(lhs$.col)) + length(unique(rhs$.col))) / 2
  xvals <- 1:nrow(take_cols)
  xvals <- xvals - mean(xvals) + mid
  names(xvals) <- take_cols %>% pull(col)
  names(xvals) <- take_cols %>% pull(.col)
  n_non_header <- sum(str_detect(take_ids$.id, "^[^\\.header]"))
  yvals <- cumsum(ifelse(str_detect(take_ids$.id, "^\\.header"), 0, -1))
  names(yvals) <- take_ids %>% pull(.id)
  yvals <- cumsum(ifelse(str_detect(take_ids$.id_long, "^\\.header"), 0, -1))
  names(yvals) <- take_ids %>% pull(.id_long)
  take_vals <- semi_join(all, take, by = c(".id", "col")) %>%
  take_vals <- semi_join(all, take %>% select(".id", ".col"),
                         by = c(".id", ".col")) %>%
    mutate(.alpha = 1,
           .x = xvals[col],
           .y = yvals[.id])
  if (type ==  "union_all") {
    take_vals <- take_vals %>% rename(.id_long = .id, .id = id_old)
  }
           .x = xvals[.col],
           .y = yvals[.id_long])
  res <- bind_rows(
    # take,
    take_vals,
    # fade in place:
    all %>% filter(!.id %in% take_ids$.id) %>% mutate(.alpha = 0),
    all %>% filter(!.id_long %in% take_ids$.id_long) %>% mutate(.alpha = 0),
    # moving fade or fade in place as well:
    all %>% filter(.id %in% take_ids$.id & !col %in% take_cols$col) %>%
    all %>% filter(.id_long %in% take_ids$.id_long & !.col %in% take_cols$.col) %>%
      mutate(.alpha = 0)
  )
  return(res)
--- a/R/plot_helpers.R
+++ b/R/plot_helpers.R
  }
  if (!".alpha" %in% names(d)) d <- d %>% mutate(.alpha = 1)
  ggplot(d, aes(x = .x, group = .id_long, y = .y, fill = .color, alpha = .alpha)) +
  d <- d %>% mutate(.item_id = paste(.id_long, .col, sep = "-"))
  ggplot(d, aes(x = .x, group = .item_id, y = .y, fill = .color, alpha = .alpha)) +
    geom_tile(width = 0.9, height = 0.9) +
    coord_equal() +
    geom_text(data = d %>% filter(!is.na(val)), aes(label = val), color = "white",
    geom_text(data = d %>% filter(!is.na(.val)), aes(label = .val), color = "white",
              family = text_family, size = text_size) +
    scale_fill_identity() +
    scale_alpha_identity() +
--- a/R/process_data_helpers.R
+++ b/R/process_data_helpers.R
 #' @param x a left dataset
 #' @param y a right dataset
 #' @param by a by argument for joins / set operations
 #' @param fill if missing ids should be filled
 #'
 #' @return a preprocessed dataset
 #'
 #' @examples
 #' NULL
 preprocess_data <- function(x, y, by) {
 preprocess_data <- function(x, y, by, fill = TRUE) {
  xvars <- names(x) %>% str_subset("^[^\\.]")
  yvars <- names(y) %>% str_subset("^[^\\.]")
  #' test for
  #' a <- c("unique", "mult", "mult", "also unique")
  #' add_duplicate_number(a)
  add_duplicate_number <- function(a) {
    data_frame(v = a) %>%
      group_by(v) %>%
      mutate(id = paste(v, 1:n(), sep = "-")) %>%
      pull(id)
  }
  x <- x %>%
    unite(one_of(by), col = ".id", remove = FALSE) %>%
    unite(one_of(xvars), col = ".id_long", remove = FALSE)
    mutate(.id_long = add_duplicate_number(.id))
  y <- y %>%
    unite(one_of(by), col = ".id", remove = FALSE) %>%
    unite(one_of(yvars), col = ".id_long", remove = FALSE)
    unite(one_of(by), col = ".id", remove = FALSE)  %>%
    mutate(.id_long = add_duplicate_number(.id))
  ids <- unique(c(x$.id, y$.id))
  ids <- dplyr::union(x %>% dplyr::select(.id, .id_long),
                      y %>% dplyr::select(.id, .id_long))
  x_ <- process_data(x, ids, by) %>%
    mutate(.id_long = paste(.id_long, .side, .r, sep = "_"))
  y_ <- process_data(y, ids, by) %>%
    mutate(.x = .x + ncol(x),
           .id_long = paste(.id_long, .side, .r, sep = "_"))
  x_ <- process_data(x, ids, by, fill = fill)
  y_ <- process_data(y, ids, by, fill = fill) %>%
    mutate(.x = .x + ncol(x) - 1)
  return(list(x = x_, y = y_))
 }
 #' Processes the data
 #'
 #' @param x a preprocessed dataset
 #' @param ids a vector of ids
 #' @param ids a data_frame of ids (.id and .id_long)
 #' @param by a vector of by-arguments
 #' @param width the width of the tiles
 #' @param side the side (x or y, lhs or rhs, etc)
 #' @param fill if missing ids should be filled
 #'
 #' @return a data_frame including all necessary information
 #'
 #' @examples
 #' NULL
 process_data <- function(x, ids, by, width = 1, side = NA) {
 process_data <- function(x, ids, by, width = 1, side = NA, fill = TRUE) {
  if (is.na(side)) side <- deparse(substitute(x))
  x_names <- names(x) %>% str_subset("^[^\\.]")
  x <- x %>%
    mutate(.r = row_number()) %>%
    gather_(key = "col", value = "val", names(x) %>% str_subset("^[^.]")) %>%
    mutate(.x = x_keys[col],
    gather_(key = ".col", value = ".val", names(x) %>% str_subset("^[^.]")) %>%
    mutate(.x = x_keys[.col],
           .y = -.r) %>%
    bind_rows(data_frame(.id = ".header",
                         .id_long = paste(".header", x_names, sep = "_"),
                         .r = 0, col = x_names, val = x_names,
                         .r = 0,
                         .col = x_names,
                         .val = x_names,
                         .x = x_keys, .y = 0), .) %>%
    mutate(.width = width,
           .side = side)
  add_color(x, ids, by)
  # if there are multiple values in the ids (-2, -3 etc) but they are not present
  # in x, because it is in the second/other dataset, add these values here
  id_long <- ids$.id_long
  mis_ids <- id_long[!id_long %in% x$.id_long]
  # if the missing value is a -1, that means the missing value comes not from
  # missing dublicate ids
  mis_ids <- str_subset(mis_ids, "[^-1]$")
  if (length(mis_ids) > 0 && fill) {
    mis_ids_short <- str_replace(mis_ids, "-[0-9]+$", "")
    # insert the missing ids at the right place
    for (i in mis_ids_short) {
      irow <- (1:nrow(x))[x$.id == i]
      irow <- irow[1]
      x <- bind_rows(
        x %>% slice(1:irow),
        x %>% filter(.id %in% mis_ids_short) %>% mutate(.id_long = mis_ids),
        x %>% slice((irow + 1):nrow(x))
      )
    }
  }
  res <- add_color(x, ids$.id, by)
  return(res)
 }
 #' Adds Color to a processed data_frame
  colors <- c(color_header, scales::brewer_pal(type = "qual", "Set1")(length(ids)))
  names(colors) <- c(".header", ids)
  x %>%
    mutate(.color = ifelse(is.na(val), color_missing, colors[.id]),
           .color = ifelse(col %in% by, .color, color_other))
  res <- x %>%
    mutate(.color = ifelse(is.na(.val), color_missing, colors[.id]),
           .color = ifelse(.col %in% by, .color, color_other))
  return(res)
 }
--- a/README.Rmd
+++ b/README.Rmd
 > All rows from `x`, and all columns from `x` and `y`. Rows in `x` with no match in `y` will have `NA` values in the new columns.
 ```{r left-join, echo=T}
 animate_inner_join(x, y, by = "id")
 animate_left_join(x, y, by = "id")
 ```
 > ... If there are multiple matches between `x` and `y`, all combinations of the matches are returned.
 ```{r left-join-extra}
 # Not yet working
 # source("R/left_join_extra.R")
 ```{r left-join-extra, echo=T}
 y_extra <- bind_rows(y, data_frame(id = 2, y = "y5"))
 ```
 y_extra # has multiple rows with the key from `x`
 ![](images/left-join-extra.gif)
 animate_left_join(x, y_extra, by = "id")
 ```
 ```{r echo=TRUE}
 y_extra # has multiple rows with the key from `x`
 left_join(x, y_extra, by = "id")
 ```
 > All rows from `x` where there are not matching values in `y`, keeping just columns from `x`.
 ```{r anti-join}
 ```{r anti-join, echo=T}
 animate_anti_join(x, y, by = "id")
 ```
 > All unique rows from `x` and `y`.
 ```{r union, export=T}
 ```{r union, echo=T}
 animate_union(x, y)
 ```
--- a/README.md
+++ b/README.md
 > no match in `y` will have `NA` values in the new columns.
 ``` r
 animate_inner_join(x, y, by = "id")
 animate_left_join(x, y, by = "id")
 ```
 ![](README_files/figure-gfm/left-join-1.gif)<!-- -->
 > … If there are multiple matches between `x` and `y`, all combinations
 > of the matches are returned.
 ![](images/left-join-extra.gif)
 ``` r
 y_extra <- bind_rows(y, data_frame(id = 2, y = "y5"))
 y_extra # has multiple rows with the key from `x`
 #> # A tibble: 4 x 2
 #>      id y    
 #> 2     2 y2   
 #> 3     4 y4   
 #> 4     2 y5
 animate_left_join(x, y_extra, by = "id")
 ```
 ![](README_files/figure-gfm/left-join-extra-1.gif)<!-- -->
 ``` r
 left_join(x, y_extra, by = "id")
 #> # A tibble: 4 x 3
 #>      id x     y    
 > All rows from `x` where there are not matching values in `y`, keeping
 > just columns from `x`.
 ``` r
 animate_anti_join(x, y, by = "id")
 ```
 ![](README_files/figure-gfm/anti-join-1.gif)<!-- -->
 ``` r
 > All unique rows from `x` and `y`.
 ``` r
 animate_union(x, y)
 ```
 ![](README_files/figure-gfm/union-1.gif)<!-- -->
 ``` r
--- a/man/preprocess_data.Rd
+++ b/man/preprocess_data.Rd
 \alias{preprocess_data}
 \title{Preprocess data}
 \usage{
 preprocess_data(x, y, by)
 preprocess_data(x, y, by, fill = FALSE)
 }
 \arguments{
 \item{x}{a left dataset}
 \item{y}{a right dataset}
 \item{by}{a by argument for joins / set operations}
 \item{fill}{if missing ids should be filled}
 }
 \value{
 a preprocessed dataset
 }
 \examples{
 NULL
 test for
 a <- c("unique", "mult", "mult", "also unique")
 add_duplicate_number(a)
 }
--- a/man/process_data.Rd
+++ b/man/process_data.Rd
 \alias{process_data}
 \title{Processes the data}
 \usage{
 process_data(x, ids, by, width = 1, side = NA)
 process_data(x, ids, by, width = 1, side = NA, fill = TRUE)
 }
 \arguments{
 \item{x}{a preprocessed dataset}
 \item{ids}{a vector of ids}
 \item{ids}{a data_frame of ids (.id and .id_long)}
 \item{by}{a vector of by-arguments}
 \item{width}{the width of the tiles}
 \item{side}{the side (x or y, lhs or rhs, etc)}
 \item{fill}{if missing ids should be filled}
 }
 \value{
 a data_frame including all necessary information