🔍 An RStudio addin slash regex utility belt
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

141 lines
4.5KB

  1. #' @export
  2. run_regex <- function(
  3. text,
  4. pattern,
  5. ignore.case = FALSE,
  6. perl = FALSE,
  7. fixed = FALSE,
  8. useBytes = FALSE,
  9. invert = FALSE
  10. ) {
  11. m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
  12. x <- purrr::map(m, function(mi) list('idx' = purrr::map2(mi, attr(mi, "match.length"), ~ if(.x[1] != -1) c(.x, .x + .y - 1L))))
  13. y <- purrr::map(text, ~ list(text = .))
  14. z <- purrr::map(regmatches(text, m), ~ list(m = .))
  15. purrr::map(seq_along(x), ~ list(text = y[[.]][[1]], idx = x[[.]][[1]], m = z[[.]][[1]]))
  16. }
  17. wrap_result <- function(x, escape = FALSE) {
  18. if (is.null(x$idx[[1]])) return(if (escape) escape_html(x$text) else x$text)
  19. text <- x$text
  20. idx <- x$idx
  21. len_idx <- length(idx)
  22. inserts <- data.frame(
  23. i = 1:len_idx - 1,
  24. start = purrr::map_int(idx, ~ .[1]),
  25. end = purrr::map_int(idx, ~ .[2]) + 1
  26. ) %>%
  27. mutate(
  28. class = sprintf("group g%02d", i),
  29. pad = 0
  30. )
  31. for (j in seq_len(nrow(inserts))) {
  32. if (inserts$i[j] == 0) next
  33. overlap <- filter(
  34. inserts[1:(j-1), ],
  35. i != 0,
  36. start <= !!inserts$start[j] & end >= !!inserts$end[j])
  37. inserts[j, 'pad'] <- inserts$pad[j] + nrow(overlap)
  38. }
  39. inserts <- inserts %>%
  40. tidyr::gather(type, loc, start:end) %>%
  41. mutate(
  42. class = ifelse(pad > 0, sprintf("%s pad%02d", class, pad), class),
  43. insert = ifelse(type == 'start', sprintf('<span class="%s">', class), "</span>")
  44. ) %>%
  45. group_by(loc, type) %>%
  46. summarize(insert = paste(insert, collapse = ''))
  47. # inserts now gives html (span open and close) to insert and loc
  48. # first split text at inserts$loc locations,
  49. # then recombine by zipping with inserts$insert text
  50. # start at 0, unless there's a hit on first character
  51. # end at nchar(text) + 1 because window is idx[k] to idx[k+1]-1
  52. idx_split <- c(0 - (inserts$loc[1] == 0), inserts$loc)
  53. if (!(nchar(text) + 1) %in% idx_split)
  54. idx_split <- c(idx_split, nchar(text) + 1)
  55. text_split <- c()
  56. for (k in seq_along(idx_split[-1])) {
  57. text_split <- c(text_split, substr(text, idx_split[k], idx_split[k+1] - 1))
  58. }
  59. out <- c()
  60. for (j in seq_along(text_split)) {
  61. out <- c(
  62. out,
  63. ifelse(escape, escape_html(text_split[j]), text_split[j]),
  64. if (!is.na(inserts$insert[j])) inserts$insert[j]
  65. )
  66. }
  67. paste(out, collapse = '')
  68. }
  69. wrap_regex <- function(pattern, escape = TRUE, exact = TRUE) {
  70. stopifnot(length(pattern) == 1)
  71. if(escape) pattern <- escape_html(pattern)
  72. r_open_parens <- "(?<![\\\\])\\("
  73. x <- strsplit(pattern, r_open_parens, perl = TRUE)[[1]]
  74. first <- x[1]
  75. x <- x[-1]
  76. if (length(x)) {
  77. x <- paste0(
  78. '<span class="g', sprintf("%02d", seq_along(x)), '">(',
  79. x,
  80. collapse = ""
  81. )
  82. x <- gsub("(?<![\\\\])\\)", ")</span>", x, perl = TRUE)
  83. }
  84. if (exact) x <- escape_backslash(x)
  85. paste0(first, x)
  86. }
  87. #' View grouped regex results
  88. #'
  89. #' @param text Text to search
  90. #' @param pattern Regex pattern to look for
  91. #' @param render Render results to an HTML doc and open in RStudio viewer?
  92. #' @param escape Escape HTML-related characters in `text`?
  93. #' @param knitr Print into knitr doc? If `TRUE`, marks text as `asis_output` and
  94. #' sets `render = FALSE` and `escape = TRUE`.
  95. #' @param exact Should regex be displayed as entered by the user into R console
  96. #' or source (default)? When `TRUE`, regex is displayed with the double `\\`
  97. #' required for escaping backslashes in R. When `FALSE`, regex is displayed
  98. #' as interpreted by the regex engine (i.e. double `\\` as a single `\`).
  99. #' @param ... Passed to [run_regex]
  100. #' @export
  101. view_regex <- function(
  102. text,
  103. pattern,
  104. ...,
  105. render = TRUE,
  106. escape = render,
  107. knitr = FALSE,
  108. exact = escape
  109. ) {
  110. if (knitr) {
  111. render <- FALSE
  112. escape <- TRUE
  113. }
  114. res <- run_regex(text, pattern, ...)
  115. res <- purrr::map_chr(res, wrap_result, escape = escape)
  116. res <- paste("<p class='results'>", res, "</p>")
  117. if (!nchar(pattern)) res <- paste("<p class='results'>", text, "</p>")
  118. if (knitr) return(knitr::asis_output(res))
  119. if (!render) return(res)
  120. head <- c(
  121. "---", "pagetitle: View Regex", "---",
  122. "<h5>Regex</h5>",
  123. "<p><pre style = 'font-size: 1em;'>", wrap_regex(pattern, escape, exact), "</pre></p>",
  124. "<h5>Results</h5>"
  125. )
  126. res <- c(head, res)
  127. tmp <- tempfile(fileext = ".Rmd")
  128. cat(res, file = tmp, sep = "\n")
  129. tmp_html <- suppressWarnings(
  130. rmarkdown::render(
  131. tmp,
  132. output_format = rmarkdown::html_document(css = system.file('style.css', package='regexhelp')),
  133. quiet = TRUE
  134. ))
  135. rstudioapi::viewer(tmp_html)
  136. }