Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

94 linhas
2.7KB

  1. #' Preprocess data
  2. #'
  3. #' @param x a left dataset
  4. #' @param y a right dataset
  5. #' @param by a by argument for joins / set operations
  6. #'
  7. #' @return a preprocessed dataset
  8. #'
  9. #' @examples
  10. #' NULL
  11. preprocess_data <- function(x, y, by) {
  12. xvars <- names(x) %>% str_subset("^[^\\.]")
  13. yvars <- names(y) %>% str_subset("^[^\\.]")
  14. x <- x %>%
  15. unite(one_of(by), col = ".id", remove = FALSE) %>%
  16. unite(one_of(xvars), col = ".id_long", remove = FALSE)
  17. y <- y %>%
  18. unite(one_of(by), col = ".id", remove = FALSE) %>%
  19. unite(one_of(yvars), col = ".id_long", remove = FALSE)
  20. ids <- unique(c(x$.id, y$.id))
  21. x_ <- process_data(x, ids, by) %>%
  22. mutate(.id_long = paste(.id_long, .side, .r, sep = "_"))
  23. y_ <- process_data(y, ids, by) %>%
  24. mutate(.x = .x + ncol(x),
  25. .id_long = paste(.id_long, .side, .r, sep = "_"))
  26. return(list(x = x_, y = y_))
  27. }
  28. #' Processes the data
  29. #'
  30. #' @param x a preprocessed dataset
  31. #' @param ids a vector of ids
  32. #' @param by a vector of by-arguments
  33. #' @param width the width of the tiles
  34. #' @param side the side (x or y, lhs or rhs, etc)
  35. #'
  36. #' @return a data_frame including all necessary information
  37. #'
  38. #' @examples
  39. #' NULL
  40. process_data <- function(x, ids, by, width = 1, side = NA) {
  41. if (is.na(side)) side <- deparse(substitute(x))
  42. x_names <- names(x) %>% str_subset("^[^\\.]")
  43. x_keys <- 1:length(x_names)
  44. names(x_keys) <- x_names
  45. special_vars <- names(x) %>% str_subset("^\\.")
  46. x <- x %>%
  47. mutate(.r = row_number()) %>%
  48. gather_(key = "col", value = "val", names(x) %>% str_subset("^[^.]")) %>%
  49. mutate(.x = x_keys[col],
  50. .y = -.r) %>%
  51. bind_rows(data_frame(.id = ".header",
  52. .id_long = paste(".header", x_names, sep = "_"),
  53. .r = 0, col = x_names, val = x_names,
  54. .x = x_keys, .y = 0), .) %>%
  55. mutate(.width = width,
  56. .side = side)
  57. add_color(x, ids, by)
  58. }
  59. #' Adds Color to a processed data_frame
  60. #'
  61. #' @param x a processed data_frame
  62. #' @param ids a vector of ids for the color-matching
  63. #' @param by a vector of column names that constitute the by-argument of joins/sets
  64. #' @param color_header color for the header
  65. #' @param color_other color for "inactive" values
  66. #' @param color_missing color for missing values
  67. #'
  68. #' @return the processed data_frame with a new column .color
  69. #'
  70. #' @examples
  71. #' NULL
  72. add_color <- function(x, ids, by, color_header = "#bdbdbd", color_other = "#d0d0d0", color_missing = "#ffffff") {
  73. colors <- c(color_header, scales::brewer_pal(type = "qual", "Set1")(length(ids)))
  74. names(colors) <- c(".header", ids)
  75. x %>%
  76. mutate(.color = ifelse(is.na(val), color_missing, colors[.id]),
  77. .color = ifelse(col %in% by, .color, color_other))
  78. }