Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

105 lines
3.2KB

  1. #' Combines two processed datasets and combines them for a given method
  2. #'
  3. #' @param lhs the left-hand side dataset
  4. #' @param rhs the righ-hand side dataset
  5. #' @param type a string of the desired combination method, allowed are all dplyr
  6. #' joins or sets
  7. #'
  8. #' @return processed dataset of the combined values
  9. #' @export
  10. #'
  11. #' @examples
  12. #' NULL
  13. combine <- function(lhs, rhs, type) {
  14. all_ids <- bind_rows(lhs, rhs) %>% distinct(.id)
  15. all <- bind_rows(lhs, rhs)
  16. x_cols <- lhs %>% distinct(col)
  17. y_cols <- rhs %>% distinct(col)
  18. x_ids <- lhs %>% distinct(.id)
  19. y_ids <- rhs %>% distinct(.id)
  20. if (type == "full_join") {
  21. col_combiner <- dplyr::full_join
  22. row_combiner <- dplyr::full_join
  23. } else if (type == "inner_join") {
  24. col_combiner <- dplyr::inner_join
  25. row_combiner <- dplyr::inner_join
  26. } else if (type == "left_join") {
  27. col_combiner <- dplyr::full_join
  28. row_combiner <- dplyr::left_join
  29. } else if (type == "right_join") {
  30. col_combiner <- dplyr::full_join
  31. row_combiner <- dplyr::right_join
  32. } else if (type == "semi_join") {
  33. col_combiner <- dplyr::semi_join
  34. row_combiner <- dplyr::semi_join
  35. } else if (type == "anti_join") {
  36. col_combiner <- dplyr::semi_join
  37. row_combiner <- dplyr::anti_join
  38. } else if (type == "union") {
  39. col_combiner <- dplyr::full_join
  40. row_combiner <- dplyr::union
  41. } else if (type == "union_all") {
  42. col_combiner <- dplyr::full_join
  43. row_combiner <- dplyr::union_all
  44. x_ids <- lhs %>% distinct(.id = .id_long)
  45. y_ids <- rhs %>% distinct(.id = .id_long)
  46. all <- all %>% rename(id_old = .id, .id = .id_long)
  47. # all <- all %>% rename(.id = .id_long)
  48. } else if (type == "intersect") {
  49. col_combiner <- dplyr::full_join
  50. row_combiner <- dplyr::intersect
  51. } else if (type == "setdiff") {
  52. col_combiner <- dplyr::full_join
  53. row_combiner <- dplyr::anti_join
  54. } else {
  55. stop("Unknown func")
  56. }
  57. take_cols <- col_combiner(x_cols, y_cols, by = "col")
  58. take_ids <- row_combiner(x_ids, y_ids, by = ".id")
  59. # make sure .header is always the first
  60. id_number <- which(str_detect(take_ids$.id, "^.header"))
  61. if (length(id_number) != 0)
  62. take_ids <- take_ids[c(id_number, (1:nrow(take_ids))[-id_number]), ]
  63. if (!any(str_detect(take_ids$.id, "^.header")))
  64. take_ids <- bind_rows(data_frame(.id = ".header"), take_ids)
  65. take <- tidyr::crossing(take_ids, take_cols)
  66. mid <- (2 + length(unique(lhs$col)) + length(unique(rhs$col))) / 2
  67. xvals <- 1:nrow(take_cols)
  68. xvals <- xvals - mean(xvals) + mid
  69. names(xvals) <- take_cols %>% pull(col)
  70. n_non_header <- sum(str_detect(take_ids$.id, "^[^\\.header]"))
  71. yvals <- cumsum(ifelse(str_detect(take_ids$.id, "^\\.header"), 0, -1))
  72. names(yvals) <- take_ids %>% pull(.id)
  73. take_vals <- semi_join(all, take, by = c(".id", "col")) %>%
  74. mutate(.alpha = 1,
  75. .x = xvals[col],
  76. .y = yvals[.id])
  77. if (type == "union_all") {
  78. take_vals <- take_vals %>% rename(.id_long = .id, .id = id_old)
  79. }
  80. res <- bind_rows(
  81. # take,
  82. take_vals,
  83. # fade in place:
  84. all %>% filter(!.id %in% take_ids$.id) %>% mutate(.alpha = 0),
  85. # moving fade or fade in place as well:
  86. all %>% filter(.id %in% take_ids$.id & !col %in% take_cols$col) %>%
  87. mutate(.alpha = 0)
  88. )
  89. return(res)
  90. }