🔍 An RStudio addin slash regex utility belt
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

172 lines
5.7KB

  1. #' Extract matched groups from regexp
  2. #'
  3. #' @param text Text to search
  4. #' @param pattern regexp
  5. #' @inheritParams base::regexec
  6. #' @export
  7. run_regex <- function(
  8. text,
  9. pattern,
  10. ignore.case = FALSE,
  11. perl = FALSE,
  12. fixed = FALSE,
  13. useBytes = FALSE,
  14. invert = FALSE
  15. ) {
  16. # Use regex to get matches by group, gives start index and length
  17. m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
  18. # Convert to start/end index
  19. x <- purrr::map(m, function(mi) {
  20. list(
  21. 'idx' = purrr::map2(mi, attr(mi, "match.length"),
  22. ~ if(.x[1] != -1) c(.x, .x + .y)))
  23. })
  24. # Store text and original regexc result with same hierarchy
  25. y <- purrr::map(text, ~ list(text = .))
  26. z <- purrr::map(regmatches(text, m), ~ list(m = .))
  27. # Zip text, indexes and regexc match object lists
  28. purrr::map(seq_along(x), ~ list(text = y[[.]][[1]], idx = x[[.]][[1]], m = z[[.]][[1]]))
  29. }
  30. wrap_result <- function(x, escape = FALSE) {
  31. if (is.null(x$idx[[1]])) return(if (escape) escape_html(x$text) else x$text)
  32. text <- x$text
  33. idx <- x$idx
  34. len_idx <- length(idx)
  35. inserts <- data.frame(
  36. i = 1:len_idx - 1,
  37. start = purrr::map_int(idx, ~ .[1]),
  38. end = purrr::map_int(idx, ~ .[2])
  39. ) %>%
  40. mutate(
  41. # unmatched groups have start/end of zero
  42. start = ifelse(.data$start == 0, NA, .data$start),
  43. end = ifelse(.data$end == 0, NA, .data$end),
  44. class = sprintf("group g%02d", .data$i),
  45. pad = 0
  46. )
  47. for (j in seq_len(nrow(inserts))) {
  48. if (inserts$i[j] == 0) next
  49. if (is.na(inserts$start[j]) || is.na(inserts$end[j])) next
  50. overlap <- filter(
  51. inserts[1:(j-1), ],
  52. .data$i != 0,
  53. .data$start <= !!inserts$start[j] & .data$end >= !!inserts$end[j])
  54. inserts[j, 'pad'] <- inserts$pad[j] + nrow(overlap)
  55. }
  56. inserts <- inserts %>%
  57. tidyr::gather(type, loc, start:end) %>%
  58. filter(!is.na(.data$loc)) %>%
  59. mutate(
  60. class = ifelse(.data$pad > 0, sprintf("%s pad%02d", .data$class, .data$pad), .data$class),
  61. insert = ifelse(.data$type == 'start', sprintf('<span class="%s">', .data$class), "</span>")
  62. ) %>%
  63. group_by(.data$loc, .data$type) %>%
  64. summarize(insert = paste(.data$insert, collapse = ''))
  65. # inserts now gives html (span open and close) to insert and loc
  66. # first split text at inserts$loc locations,
  67. # then recombine by zipping with inserts$insert text
  68. # start at 0, unless there's a hit on first character
  69. # end at nchar(text) + 1 because window is idx[k] to idx[k+1]-1
  70. idx_split <- c(0 - (inserts$loc[1] == 0), inserts$loc)
  71. if (!(nchar(text) + 1) %in% idx_split)
  72. idx_split <- c(idx_split, nchar(text) + 1)
  73. text_split <- c()
  74. for (k in seq_along(idx_split[-1])) {
  75. text_split <- c(text_split, substr(text, idx_split[k], idx_split[k+1] - 1))
  76. }
  77. out <- c()
  78. for (j in seq_along(text_split)) {
  79. out <- c(
  80. out,
  81. ifelse(escape, escape_html(text_split[j]), text_split[j]),
  82. if (!is.na(inserts$insert[j])) inserts$insert[j]
  83. )
  84. }
  85. paste(out, collapse = '')
  86. }
  87. wrap_regex <- function(pattern, escape = TRUE, exact = TRUE) {
  88. stopifnot(length(pattern) == 1)
  89. if(escape) pattern <- escape_html(pattern)
  90. r_open_parens <- "(?<![\\\\])\\("
  91. x <- strsplit(pattern, r_open_parens, perl = TRUE)[[1]]
  92. first <- x[1]
  93. x <- x[-1]
  94. if (length(x)) {
  95. x <- paste0(
  96. '<span class="g', sprintf("%02d", seq_along(x)), '">(',
  97. x,
  98. collapse = ""
  99. )
  100. x <- gsub("(?<![\\\\])\\)", ")</span>", x, perl = TRUE)
  101. }
  102. if (exact) x <- escape_backslash(x)
  103. paste0(first, x)
  104. }
  105. #' View grouped regex results
  106. #'
  107. #' @param text Text to search
  108. #' @param pattern Regex pattern to look for
  109. #' @param render Render results to an HTML doc and open in RStudio viewer?
  110. #' @param escape Escape HTML-related characters in `text`?
  111. #' @param knitr Print into knitr doc? If `TRUE`, marks text as `asis_output` and
  112. #' sets `render = FALSE` and `escape = TRUE`.
  113. #' @param exact Should regex be displayed as entered by the user into R console
  114. #' or source (default)? When `TRUE`, regex is displayed with the double `\\\\`
  115. #' required for escaping backslashes in R. When `FALSE`, regex is displayed
  116. #' as interpreted by the regex engine (i.e. double `\\\\` as a single `\\`).
  117. #' @param ... Passed to [run_regex]
  118. #' @export
  119. view_regex <- function(
  120. text,
  121. pattern,
  122. ...,
  123. render = TRUE,
  124. escape = render,
  125. knitr = FALSE,
  126. exact = escape
  127. ) {
  128. if (knitr) {
  129. render <- FALSE
  130. escape <- TRUE
  131. }
  132. res <- run_regex(text, pattern, ...)
  133. res <- purrr::map_chr(res, wrap_result, escape = escape)
  134. res <- purrr::map_chr(res, function(resi) {
  135. result_pad <- ""
  136. if (grepl("pad\\d{2}", resi)) {
  137. max_pad <- max(stringr::str_extract_all(resi, "pad\\d{2}")[[1]])
  138. max_pad_level <- as.integer(stringr::str_extract(max_pad, "\\d{2}"))
  139. if (max_pad_level - 3 > 0) {
  140. result_pad <- sprintf("pad%02d", max_pad_level - 3)
  141. }
  142. }
  143. paste("<p class='results", result_pad, "'>", resi, "</p>")
  144. })
  145. res <- paste(res, collapse = "")
  146. if (!nchar(pattern)) res <- paste("<p class='results'>", text, "</p>")
  147. if (knitr) return(knitr::asis_output(res))
  148. if (!render) return(res)
  149. head <- c(
  150. "---", "pagetitle: View Regex", "---",
  151. "<h5>Regex</h5>",
  152. "<p><pre>", wrap_regex(pattern, escape, exact), "</pre></p>",
  153. "<h5>Results</h5>"
  154. )
  155. res <- c(head, res)
  156. tmp <- tempfile(fileext = ".Rmd")
  157. cat(res, file = tmp, sep = "\n")
  158. tmp_html <- suppressWarnings(
  159. rmarkdown::render(
  160. tmp,
  161. output_format = rmarkdown::html_document(css = c(system.file('style.css', package='regexplain'),
  162. system.file('skeleton.css', package='regexplain')),
  163. theme = NULL),
  164. quiet = TRUE
  165. ))
  166. rstudioapi::viewer(tmp_html)
  167. }