🔍 An RStudio addin slash regex utility belt
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

103 satır
3.2KB

  1. #' @export
  2. run_regex <- function(
  3. text,
  4. pattern,
  5. ignore.case = FALSE,
  6. perl = FALSE,
  7. fixed = FALSE,
  8. useBytes = FALSE,
  9. invert = FALSE
  10. ) {
  11. m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
  12. x <- purrr::map(purrr::set_names(m, text), function(mi) list('idx' = purrr::map2(mi, attr(mi, "match.length"), ~ if(.x[1] != -1) c(.x, .x + .y - 1L))))
  13. y <- purrr::map(purrr::set_names(text), ~ list(text = .))
  14. z <- purrr::map(purrr::set_names(regmatches(text, m), text), ~ list(m = .))
  15. x <- utils::modifyList(y, x, TRUE)
  16. utils::modifyList(z, x, TRUE)
  17. }
  18. wrap_span <- function(x, escape = FALSE) {
  19. if (is.null(x$idx[[1]])) return(x$text)
  20. text <- x$text
  21. idx <- x$idx
  22. len_idx <- length(idx)
  23. inserts <- data.frame(
  24. i = 1:len_idx - 1,
  25. start = purrr::map_int(idx, ~ .[1]),
  26. end = purrr::map_int(idx, ~ .[2]) + 1
  27. ) %>%
  28. mutate(
  29. class = sprintf("group g%02d", i),
  30. pad = 0
  31. )
  32. for (j in seq_len(nrow(inserts))) {
  33. if (inserts$i[j] == 0) next
  34. overlap <- filter(
  35. inserts,
  36. i != 0,
  37. start <= !!inserts$start[j] & end > !!inserts$end[j])
  38. inserts[j, 'pad'] <- inserts$pad[j] + nrow(overlap)
  39. }
  40. inserts <- inserts %>%
  41. tidyr::gather(type, loc, start:end) %>%
  42. mutate(
  43. class = ifelse(pad > 0, sprintf("%s pad%02d", class, pad), class),
  44. insert = ifelse(type == 'start', sprintf('<span class="%s">', class), "</span>")
  45. ) %>%
  46. group_by(loc, type) %>%
  47. summarize(insert = paste(insert, collapse = ''))
  48. idx_split <- c(0 - (inserts$loc[1] == 0), inserts$loc)
  49. if (!(nchar(text) + 1) %in% idx_split) idx_split <- c(idx_split, nchar(text) + 1)
  50. text_split <- c()
  51. for (k in seq_along(idx_split[-1])) {
  52. text_split <- c(text_split, substr(text, idx_split[k], idx_split[k+1] - 1))
  53. }
  54. out <- c()
  55. for (j in seq_along(text_split)) {
  56. out <- c(
  57. out,
  58. ifelse(escape, escape_html(text_split[j]), text_split[j]),
  59. if (!is.na(inserts$insert[j])) inserts$insert[j]
  60. )
  61. }
  62. paste(out, collapse = '')
  63. }
  64. #' View grouped regex results
  65. #'
  66. #' @param text Text to search
  67. #' @param pattern Regex pattern to look for
  68. #' @param render Render results to an HTML doc and open in RStudio viewer?
  69. #' @param escape Escape HTML-related characters in `text`?
  70. #' @param knitr Print into knitr doc? If `TRUE`, marks text as `asis_output` and
  71. #' sets `render = FALSE` and `escape = TRUE`.
  72. #' @param ... Passed to [run_regex]
  73. #' @export
  74. view_regex <- function(text, pattern, ..., render = TRUE, escape = render, knitr = FALSE) {
  75. if (knitr) {
  76. render <- FALSE
  77. escape <- TRUE
  78. }
  79. res <- run_regex(text, pattern, ...)
  80. res <- purrr::map_chr(res, wrap_span, escape = escape)
  81. res <- paste("<p class='results'>", res, "</p>")
  82. if (knitr) return(knitr::asis_output(res))
  83. if (!render) return(res)
  84. head <- c(
  85. "---", "pagetitle: View Regex", "---",
  86. "<h5>Regex</h5>",
  87. "<p><pre>", escape_html(pattern), "</pre></p>",
  88. "<h5>Results</h5>"
  89. )
  90. res <- c(head, res)
  91. tmp <- tempfile(fileext = ".Rmd")
  92. cat(res, file = tmp, sep = "\n")
  93. tmp_html <- suppressWarnings(
  94. rmarkdown::render(
  95. tmp,
  96. output_format = rmarkdown::html_document(css = system.file('style.css', package='regexhelp')),
  97. quiet = TRUE
  98. ))
  99. rstudioapi::viewer(tmp_html)
  100. }