Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

128 lines
3.8KB

  1. #' Preprocess data
  2. #'
  3. #' @param x a left dataset
  4. #' @param y a right dataset
  5. #' @param by a by argument for joins / set operations
  6. #' @param fill if missing ids should be filled
  7. #'
  8. #' @return a preprocessed dataset
  9. #'
  10. #' @examples
  11. #' NULL
  12. preprocess_data <- function(x, y, by, fill = TRUE) {
  13. #' test for
  14. #' a <- c("unique", "mult", "mult", "also unique")
  15. #' add_duplicate_number(a)
  16. add_duplicate_number <- function(a) {
  17. data_frame(v = a) %>%
  18. group_by(v) %>%
  19. mutate(id = paste(v, 1:n(), sep = "-")) %>%
  20. pull(id)
  21. }
  22. x <- x %>%
  23. unite(one_of(by), col = ".id", remove = FALSE) %>%
  24. mutate(.id_long = add_duplicate_number(.id))
  25. y <- y %>%
  26. unite(one_of(by), col = ".id", remove = FALSE) %>%
  27. mutate(.id_long = add_duplicate_number(.id))
  28. ids <- dplyr::union(x %>% dplyr::select(.id, .id_long),
  29. y %>% dplyr::select(.id, .id_long))
  30. x_ <- process_data(x, ids, by, fill = fill)
  31. y_ <- process_data(y, ids, by, fill = fill) %>%
  32. mutate(.x = .x + ncol(x) - 1)
  33. return(list(x = x_, y = y_))
  34. }
  35. #' Processes the data
  36. #'
  37. #' @param x a preprocessed dataset
  38. #' @param ids a data_frame of ids (.id and .id_long)
  39. #' @param by a vector of by-arguments
  40. #' @param width the width of the tiles
  41. #' @param side the side (x or y, lhs or rhs, etc)
  42. #' @param fill if missing ids should be filled
  43. #'
  44. #' @return a data_frame including all necessary information
  45. #'
  46. #' @examples
  47. #' NULL
  48. process_data <- function(x, ids, by, width = 1, side = NA, fill = TRUE) {
  49. if (is.na(side)) side <- deparse(substitute(x))
  50. x_names <- names(x) %>% str_subset("^[^\\.]")
  51. x_keys <- 1:length(x_names)
  52. names(x_keys) <- x_names
  53. special_vars <- names(x) %>% str_subset("^\\.")
  54. x <- x %>%
  55. mutate(.r = row_number()) %>%
  56. gather_(key = ".col", value = ".val", names(x) %>% str_subset("^[^.]")) %>%
  57. mutate(.x = x_keys[.col],
  58. .y = -.r) %>%
  59. bind_rows(data_frame(.id = ".header",
  60. .id_long = paste(".header", x_names, sep = "_"),
  61. .r = 0,
  62. .col = x_names,
  63. .val = x_names,
  64. .x = x_keys, .y = 0), .) %>%
  65. mutate(.width = width,
  66. .side = side)
  67. # if there are multiple values in the ids (-2, -3 etc) but they are not present
  68. # in x, because it is in the second/other dataset, add these values here
  69. id_long <- ids$.id_long
  70. mis_ids <- id_long[!id_long %in% x$.id_long]
  71. # if the missing value is a -1, that means the missing value comes not from
  72. # missing dublicate ids
  73. mis_ids <- str_subset(mis_ids, "[^-1]$")
  74. if (length(mis_ids) > 0 && fill) {
  75. mis_ids_short <- str_replace(mis_ids, "-[0-9]+$", "")
  76. # insert the missing ids at the right place
  77. for (i in mis_ids_short) {
  78. irow <- (1:nrow(x))[x$.id == i]
  79. irow <- irow[1]
  80. x <- bind_rows(
  81. x %>% slice(1:irow),
  82. x %>% filter(.id %in% mis_ids_short) %>% mutate(.id_long = mis_ids),
  83. x %>% slice((irow + 1):nrow(x))
  84. )
  85. }
  86. }
  87. res <- add_color(x, ids$.id, by)
  88. return(res)
  89. }
  90. #' Adds Color to a processed data_frame
  91. #'
  92. #' @param x a processed data_frame
  93. #' @param ids a vector of ids for the color-matching
  94. #' @param by a vector of column names that constitute the by-argument of joins/sets
  95. #' @param color_header color for the header
  96. #' @param color_other color for "inactive" values
  97. #' @param color_missing color for missing values
  98. #'
  99. #' @return the processed data_frame with a new column .color
  100. #'
  101. #' @examples
  102. #' NULL
  103. add_color <- function(x, ids, by, color_header = "#bdbdbd", color_other = "#d0d0d0", color_missing = "#ffffff") {
  104. colors <- c(color_header, scales::brewer_pal(type = "qual", "Set1")(length(ids)))
  105. names(colors) <- c(".header", ids)
  106. res <- x %>%
  107. mutate(.color = ifelse(is.na(.val), color_missing, colors[.id]),
  108. .color = ifelse(.col %in% by, .color, color_other))
  109. return(res)
  110. }