garrick
/
tidyexplain


			
				
					
						
						
							
							
#' Combines two processed datasets and combines them for a given method
#'
#' @param lhs the left-hand side dataset
#' @param rhs the righ-hand side dataset
#' @param type a string of the desired combination method, allowed are all dplyr
#'             joins or sets
#'
#' @return  processed dataset of the combined values
#' @export
#'
#' @examples
#' NULL
combine <- function(lhs, rhs, type) {

  all_ids <- bind_rows(lhs, rhs) %>% distinct(.id)

  all <- bind_rows(lhs, rhs)

  x_cols <- lhs %>% distinct(col)
  y_cols <- rhs %>% distinct(col)

  x_ids <- lhs %>% distinct(.id)
  y_ids <- rhs %>% distinct(.id)

  if (type == "full_join") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::full_join
  } else if (type == "inner_join") {
    col_combiner <- dplyr::inner_join
    row_combiner <- dplyr::inner_join
  } else if (type == "left_join") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::left_join
  } else if (type == "right_join") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::right_join
  } else if (type == "semi_join") {
    col_combiner <- dplyr::semi_join
    row_combiner <- dplyr::semi_join
  } else if (type == "anti_join") {
    col_combiner <- dplyr::semi_join
    row_combiner <- dplyr::anti_join
  } else if (type == "union") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::union
  } else if (type == "union_all") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::union_all

    x_ids <- lhs %>% distinct(.id = .id_long)
    y_ids <- rhs %>% distinct(.id = .id_long)
    all <- all %>% rename(id_old = .id, .id = .id_long)
    # all <- all %>% rename(.id = .id_long)
  } else if (type == "intersect") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::intersect
  } else if (type == "setdiff") {
    col_combiner <- dplyr::full_join
    row_combiner <- dplyr::anti_join
  } else {
    stop("Unknown func")
  }

  take_cols <- col_combiner(x_cols, y_cols, by = "col")
  take_ids  <- row_combiner(x_ids, y_ids, by = ".id")
  # make sure .header is always the first
  id_number <- which(str_detect(take_ids$.id, "^.header"))
  if (length(id_number) != 0)
    take_ids <- take_ids[c(id_number, (1:nrow(take_ids))[-id_number]), ]
  if (!any(str_detect(take_ids$.id, "^.header")))
    take_ids <- bind_rows(data_frame(.id = ".header"), take_ids)

  take <- tidyr::crossing(take_ids, take_cols)

  mid <- (2 + length(unique(lhs$col)) + length(unique(rhs$col))) / 2
  xvals <- 1:nrow(take_cols)
  xvals <- xvals - mean(xvals) + mid
  names(xvals) <- take_cols %>% pull(col)

  n_non_header <- sum(str_detect(take_ids$.id, "^[^\\.header]"))
  yvals <- cumsum(ifelse(str_detect(take_ids$.id, "^\\.header"), 0, -1))
  names(yvals) <- take_ids %>% pull(.id)

  take_vals <- semi_join(all, take, by = c(".id", "col")) %>%
    mutate(.alpha = 1,
           .x = xvals[col],
           .y = yvals[.id])

  if (type ==  "union_all") {
    take_vals <- take_vals %>% rename(.id_long = .id, .id = id_old)
  }

  res <- bind_rows(
    # take,
    take_vals,
    # fade in place:
    all %>% filter(!.id %in% take_ids$.id) %>% mutate(.alpha = 0),
    # moving fade or fade in place as well:
    all %>% filter(.id %in% take_ids$.id & !col %in% take_cols$col) %>%
      mutate(.alpha = 0)
  )
  return(res)
}