|
-
- #' Combines two processed datasets and combines them for a given method
- #'
- #' @param lhs the left-hand side dataset
- #' @param rhs the righ-hand side dataset
- #' @param type a string of the desired combination method, allowed are all dplyr
- #' joins or sets
- #'
- #' @return processed dataset of the combined values
- #' @export
- #'
- #' @examples
- #' NULL
- combine <- function(lhs, rhs, type) {
-
- all_ids <- bind_rows(lhs, rhs) %>% distinct(.id)
-
- all <- bind_rows(lhs, rhs)
-
- x_cols <- lhs %>% distinct(col)
- y_cols <- rhs %>% distinct(col)
-
- x_ids <- lhs %>% distinct(.id)
- y_ids <- rhs %>% distinct(.id)
-
- if (type == "full_join") {
- col_combiner <- dplyr::full_join
- row_combiner <- dplyr::full_join
- } else if (type == "inner_join") {
- col_combiner <- dplyr::inner_join
- row_combiner <- dplyr::inner_join
- } else if (type == "left_join") {
- col_combiner <- dplyr::full_join
- row_combiner <- dplyr::left_join
- } else if (type == "right_join") {
- col_combiner <- dplyr::full_join
- row_combiner <- dplyr::right_join
- } else if (type == "semi_join") {
- col_combiner <- dplyr::semi_join
- row_combiner <- dplyr::semi_join
- } else if (type == "anti_join") {
- col_combiner <- dplyr::semi_join
- row_combiner <- dplyr::anti_join
- } else if (type == "union") {
- col_combiner <- dplyr::full_join
- row_combiner <- dplyr::union
- } else if (type == "union_all") {
- col_combiner <- dplyr::full_join
- row_combiner <- dplyr::union_all
-
- x_ids <- lhs %>% distinct(.id = .id_long)
- y_ids <- rhs %>% distinct(.id = .id_long)
- all <- all %>% rename(id_old = .id, .id = .id_long)
- # all <- all %>% rename(.id = .id_long)
- } else if (type == "intersect") {
- col_combiner <- dplyr::full_join
- row_combiner <- dplyr::intersect
- } else if (type == "setdiff") {
- col_combiner <- dplyr::full_join
- row_combiner <- dplyr::anti_join
- } else {
- stop("Unknown func")
- }
-
- take_cols <- col_combiner(x_cols, y_cols, by = "col")
- take_ids <- row_combiner(x_ids, y_ids, by = ".id")
- # make sure .header is always the first
- id_number <- which(str_detect(take_ids$.id, "^.header"))
- if (length(id_number) != 0)
- take_ids <- take_ids[c(id_number, (1:nrow(take_ids))[-id_number]), ]
- if (!any(str_detect(take_ids$.id, "^.header")))
- take_ids <- bind_rows(data_frame(.id = ".header"), take_ids)
-
- take <- tidyr::crossing(take_ids, take_cols)
-
- mid <- (2 + length(unique(lhs$col)) + length(unique(rhs$col))) / 2
- xvals <- 1:nrow(take_cols)
- xvals <- xvals - mean(xvals) + mid
- names(xvals) <- take_cols %>% pull(col)
-
- n_non_header <- sum(str_detect(take_ids$.id, "^[^\\.header]"))
- yvals <- cumsum(ifelse(str_detect(take_ids$.id, "^\\.header"), 0, -1))
- names(yvals) <- take_ids %>% pull(.id)
-
- take_vals <- semi_join(all, take, by = c(".id", "col")) %>%
- mutate(.alpha = 1,
- .x = xvals[col],
- .y = yvals[.id])
-
- if (type == "union_all") {
- take_vals <- take_vals %>% rename(.id_long = .id, .id = id_old)
- }
-
- res <- bind_rows(
- # take,
- take_vals,
- # fade in place:
- all %>% filter(!.id %in% take_ids$.id) %>% mutate(.alpha = 0),
- # moving fade or fade in place as well:
- all %>% filter(.id %in% take_ids$.id & !col %in% take_cols$col) %>%
- mutate(.alpha = 0)
- )
- return(res)
- }
|