Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

140 linhas
4.3KB

  1. #' Preprocess data
  2. #'
  3. #' @param x a left dataset
  4. #' @param y a right dataset
  5. #' @param by a by argument for joins / set operations
  6. #' @param fill if missing ids should be filled
  7. #' @param ... further arguments passed to add_color
  8. #'
  9. #' @return a preprocessed dataset
  10. #'
  11. #' @examples
  12. #' NULL
  13. preprocess_data <- function(x, y, by, fill = TRUE, ...) {
  14. #' test for
  15. #' a <- c("unique", "mult", "mult", "also unique")
  16. #' add_duplicate_number(a)
  17. add_duplicate_number <- function(a) {
  18. data_frame(v = a) %>%
  19. group_by(v) %>%
  20. mutate(id = paste(v, 1:n(), sep = "-")) %>%
  21. pull(id)
  22. }
  23. x <- x %>%
  24. unite(one_of(by), col = ".id", remove = FALSE) %>%
  25. mutate(.id_long = add_duplicate_number(.id))
  26. y <- y %>%
  27. unite(one_of(by), col = ".id", remove = FALSE) %>%
  28. mutate(.id_long = add_duplicate_number(.id))
  29. ids <- dplyr::union(x %>% dplyr::select(.id, .id_long),
  30. y %>% dplyr::select(.id, .id_long))
  31. x_ <- process_data(x, ids, by, fill = fill, ...)
  32. y_ <- process_data(y, ids, by, fill = fill, ...) %>%
  33. mutate(.x = .x + ncol(x) - 1)
  34. return(list(x = x_, y = y_))
  35. }
  36. #' Processes the data
  37. #'
  38. #' @param x a preprocessed dataset
  39. #' @param ids a data_frame of ids (.id and .id_long)
  40. #' @param by a vector of by-arguments
  41. #' @param width the width of the tiles
  42. #' @param side the side (x or y, lhs or rhs, etc)
  43. #' @param fill if missing ids should be filled
  44. #' @param ... further arguments passed to add_color
  45. #'
  46. #' @return a data_frame including all necessary information
  47. #'
  48. #' @examples
  49. #' NULL
  50. process_data <- function(x, ids, by, width = 1, side = NA, fill = TRUE, ...) {
  51. if (is.na(side)) side <- deparse(substitute(x))
  52. x_names <- names(x) %>% str_subset("^[^\\.]")
  53. x_keys <- 1:length(x_names)
  54. names(x_keys) <- x_names
  55. special_vars <- names(x) %>% str_subset("^\\.")
  56. x <- x %>%
  57. mutate(.r = row_number()) %>%
  58. gather_(key = ".col", value = ".val", names(x) %>% str_subset("^[^.]")) %>%
  59. mutate(.x = x_keys[.col],
  60. .y = -.r) %>%
  61. bind_rows(data_frame(.id = ".header",
  62. .id_long = paste(".header", x_names, sep = "_"),
  63. .r = 0,
  64. .col = x_names,
  65. .val = x_names,
  66. .x = x_keys, .y = 0), .) %>%
  67. mutate(.width = width,
  68. .side = side)
  69. # if there are multiple values in the ids (-2, -3 etc) but they are not present
  70. # in x, because it is in the second/other dataset, add these values here
  71. id_long <- ids$.id_long
  72. mis_ids <- id_long[!id_long %in% x$.id_long]
  73. # if the missing value is a -1, that means the missing value comes not from
  74. # missing dublicate ids
  75. mis_ids <- str_subset(mis_ids, "[^-1]$")
  76. if (length(mis_ids) > 0 && fill) {
  77. mis_ids_short <- str_replace(mis_ids, "-[0-9]+$", "")
  78. # insert the missing ids at the right place
  79. for (i in mis_ids_short) {
  80. irow <- (1:nrow(x))[x$.id == i]
  81. irow <- irow[1]
  82. x <- bind_rows(
  83. x %>% slice(1:irow),
  84. x %>% filter(.id %in% mis_ids_short) %>% mutate(.id_long = mis_ids),
  85. x %>% slice((irow + 1):nrow(x))
  86. )
  87. }
  88. }
  89. res <- add_color(x, rev(ids$.id), by, ...)
  90. return(res)
  91. }
  92. #' Adds Color to a processed data_frame
  93. #'
  94. #' @param x a processed data_frame
  95. #' @param ids a vector of ids for the color-matching
  96. #' @param by a vector of column names that constitute the by-argument of joins/sets
  97. #' @param color_header color for the header
  98. #' @param color_other color for "inactive" values
  99. #' @param color_missing color for missing values
  100. #' @param color_fun the function to generate the colors
  101. #' @param ...
  102. #'
  103. #' @return the processed data_frame with a new column .color
  104. #'
  105. #' @examples
  106. #' NULL
  107. add_color <- function(x, ids, by,
  108. color_header = "#737373", color_other = "#d0d0d0",
  109. color_missing = "#ffffff",
  110. color_fun = scales::brewer_pal(type = "qual", "Set1"), ...) {
  111. colors <- c(color_header, color_fun(length(ids)))
  112. names(colors) <- c(".header", ids)
  113. res <- x %>%
  114. mutate(
  115. .color = ifelse(is.na(.val),
  116. color_missing,
  117. ifelse(.col %in% by,
  118. colors[.id],
  119. color_other)),
  120. .color = ifelse(.id == ".header", color_header, .color))
  121. return(res)
  122. }