Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

149 lines
4.6KB

  1. #' Preprocess data
  2. #'
  3. #' @param x a left dataset
  4. #' @param y a right dataset
  5. #' @param by a by argument for joins / set operations
  6. #' @param fill if missing ids should be filled
  7. #' @param ... further arguments passed to add_color
  8. #'
  9. #' @return a preprocessed dataset
  10. #'
  11. #' @examples
  12. #' NULL
  13. process_join <- function(x, y, by, fill = TRUE, ...) {
  14. #' test for
  15. #' a <- c("unique", "mult", "mult", "also unique")
  16. #' add_duplicate_number(a)
  17. add_duplicate_number <- function(a) {
  18. data_frame(v = a) %>%
  19. group_by(v) %>%
  20. mutate(id = paste(v, 1:n(), sep = "-")) %>%
  21. pull(id)
  22. }
  23. x <- x %>%
  24. unite(one_of(by), col = ".id", remove = FALSE) %>%
  25. mutate(.id_long = add_duplicate_number(.id))
  26. y <- y %>%
  27. unite(one_of(by), col = ".id", remove = FALSE) %>%
  28. mutate(.id_long = add_duplicate_number(.id))
  29. ids <- dplyr::union(x %>% dplyr::select(.id, .id_long),
  30. y %>% dplyr::select(.id, .id_long))
  31. x_ <- process_data_join(x, ids, by, fill = fill, ...)
  32. y_ <- process_data_join(y, ids, by, fill = fill, ...) %>%
  33. mutate(.x = .x + ncol(x) - 1)
  34. return(list(x = x_, y = y_))
  35. }
  36. #' Processes the data
  37. #'
  38. #' @param x a preprocessed dataset
  39. #' @param ids a data_frame of ids (.id and .id_long)
  40. #' @param by a vector of by-arguments
  41. #' @param width the width of the tiles
  42. #' @param side the side (x or y, lhs or rhs, etc)
  43. #' @param fill if missing ids should be filled
  44. #' @param ... further arguments passed to add_color
  45. #'
  46. #' @return a data_frame including all necessary information
  47. #'
  48. #' @examples
  49. #' NULL
  50. process_data_join <- function(x, ids, by, width = 1, side = NA, fill = TRUE, ...) {
  51. if (is.na(side)) side <- deparse(substitute(x))
  52. x_names <- names(x) %>% str_subset("^[^\\.]")
  53. x_keys <- 1:length(x_names)
  54. names(x_keys) <- x_names
  55. special_vars <- names(x) %>% str_subset("^\\.")
  56. x <- x %>%
  57. mutate(.r = row_number()) %>%
  58. gather_(key = ".col", value = ".val", names(x) %>% str_subset("^[^.]")) %>%
  59. mutate(.x = x_keys[.col],
  60. .y = -.r) %>%
  61. bind_rows(data_frame(.id = ".header",
  62. .id_long = paste(".header", x_names, sep = "_"),
  63. .r = 0,
  64. .col = x_names,
  65. .val = x_names,
  66. .x = x_keys, .y = 0), .) %>%
  67. mutate(.width = width,
  68. .side = side)
  69. # if there are multiple values in the ids (-2, -3 etc) but they are not present
  70. # in x, because it is in the second/other dataset, add these values here
  71. id_long <- ids$.id_long
  72. mis_ids <- id_long[!id_long %in% x$.id_long]
  73. # if the missing value is a -1, that means the missing value comes not from
  74. # missing dublicate ids
  75. mis_ids <- str_subset(mis_ids, "[^-1]$")
  76. if (length(mis_ids) > 0 && fill) {
  77. mis_ids_short <- str_replace(mis_ids, "-[0-9]+$", "")
  78. # insert the missing ids at the right place
  79. for (i in mis_ids_short) {
  80. irow <- (1:nrow(x))[x$.id == i]
  81. irow <- irow[1]
  82. x <- bind_rows(
  83. x %>% slice(1:irow),
  84. x %>% filter(.id %in% mis_ids_short) %>% mutate(.id_long = mis_ids),
  85. x %>% slice((irow + 1):nrow(x))
  86. )
  87. }
  88. }
  89. res <- add_color_join(x, rev(ids$.id), by, ...)
  90. return(res)
  91. }
  92. #' Adds Color to a processed data_frame
  93. #'
  94. #' @param x a processed data_frame
  95. #' @param ids a vector of ids for the color-matching
  96. #' @param by a vector of column names that constitute the by-argument of joins/sets
  97. #' @param color_header color for the header
  98. #' @param color_other color for "inactive" values
  99. #' @param color_missing color for missing values
  100. #' @param color_fun the function to generate the colors
  101. #' @param text_color the color for the text inside the tiles,
  102. #' defaults to white/black depending on tile color
  103. #' @param ...
  104. #'
  105. #' @return the processed data_frame with a new column .color
  106. #'
  107. #' @examples
  108. #' NULL
  109. add_color_join <- function(x, ids, by,
  110. color_header = "#737373", color_other = "#d0d0d0",
  111. color_missing = "#ffffff",
  112. color_fun = scales::brewer_pal(type = "qual", "Set1"),
  113. text_color = NA, ...) {
  114. colors <- c(color_header, color_fun(length(ids)))
  115. names(colors) <- c(".header", ids)
  116. res <- x %>%
  117. mutate(
  118. .color = ifelse(is.na(.val),
  119. color_missing,
  120. ifelse(.col %in% by,
  121. colors[.id],
  122. color_other)),
  123. .color = ifelse(.id == ".header", color_header, .color),
  124. .textcolor = text_color)
  125. if (is.na(text_color))
  126. res <- res %>% mutate(.textcolor = set_text_color(.color))
  127. return(res)
  128. }