🔍 An RStudio addin slash regex utility belt
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

220 lines
7.5KB

  1. #' Extract matched groups from regexp
  2. #'
  3. #' @param text Text to search
  4. #' @param pattern regexp
  5. #' @inheritParams base::regexec
  6. #' @export
  7. run_regex <- function(
  8. text,
  9. pattern,
  10. ignore.case = FALSE,
  11. perl = FALSE,
  12. fixed = FALSE,
  13. useBytes = FALSE,
  14. invert = FALSE
  15. ) {
  16. # Use regex to get matches by group, gives start index and length
  17. m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
  18. # Convert to start/end index
  19. x <- purrr::map(m, function(mi) {
  20. list(
  21. 'idx' = purrr::map2(mi, attr(mi, "match.length"),
  22. ~ if(.x[1] != -1) c(.x, .x + .y)))
  23. })
  24. # Store text and original regexc result with same hierarchy
  25. y <- purrr::map(text, ~ list(text = .))
  26. z <- purrr::map(regmatches(text, m), ~ list(m = .))
  27. # Zip text, indexes and regexc match object lists
  28. purrr::map(seq_along(x), ~ list(text = y[[.]][[1]], idx = x[[.]][[1]], m = z[[.]][[1]]))
  29. }
  30. #' Wrap matches in HTML span tags to colorize via CSS
  31. #'
  32. #' @param x Individual list item in list returned by [run_regex()]
  33. #' @inheritParams view_regex
  34. #' @keywords internal
  35. wrap_result <- function(x, escape = FALSE, exact = FALSE) {
  36. if (is.null(x$idx[[1]])) return(if (escape) escape_html(x$text) else x$text)
  37. text <- x$text
  38. idx <- x$idx
  39. len_idx <- length(idx)
  40. inserts <- data.frame(
  41. i = 1:len_idx - 1,
  42. start = purrr::map_int(idx, ~ .[1]),
  43. end = purrr::map_int(idx, ~ .[2])
  44. ) %>%
  45. mutate(
  46. # unmatched groups have start/end of zero
  47. start = ifelse(.data$start == 0, NA, .data$start),
  48. end = ifelse(.data$end == 0, NA, .data$end),
  49. class = sprintf("group g%02d", .data$i),
  50. pad = 0
  51. )
  52. for (j in seq_len(nrow(inserts))) {
  53. if (inserts$i[j] == 0) next
  54. if (is.na(inserts$start[j]) || is.na(inserts$end[j])) next
  55. overlap <- filter(
  56. inserts[1:(j-1), ],
  57. .data$i != 0,
  58. .data$start <= !!inserts$start[j] & .data$end >= !!inserts$end[j])
  59. inserts[j, 'pad'] <- inserts$pad[j] + nrow(overlap)
  60. }
  61. inserts <- inserts %>%
  62. tidyr::gather(type, loc, start:end) %>%
  63. filter(!is.na(.data$loc)) %>%
  64. mutate(
  65. class = ifelse(.data$pad > 0, sprintf("%s pad%02d", .data$class, .data$pad), .data$class),
  66. insert = ifelse(.data$type == 'start', sprintf('<span class="%s">', .data$class), "</span>")
  67. ) %>%
  68. group_by(.data$loc, .data$type) %>%
  69. summarize(insert = paste(.data$insert, collapse = ''))
  70. # inserts now gives html (span open and close) to insert and loc
  71. # first split text at inserts$loc locations,
  72. # then recombine by zipping with inserts$insert text
  73. # start at 0, unless there's a hit on first character
  74. # end at nchar(text) + 1 because window is idx[k] to idx[k+1]-1
  75. idx_split <- c(0 - (inserts$loc[1] == 0), inserts$loc)
  76. if (!(nchar(text) + 1) %in% idx_split)
  77. idx_split <- c(idx_split, nchar(text) + 1)
  78. text_split <- c()
  79. for (k in seq_along(idx_split[-1])) {
  80. text_split <- c(text_split, substr(text, idx_split[k], idx_split[k+1] - 1))
  81. }
  82. out <- c()
  83. for (j in seq_along(text_split)) {
  84. out <- c(
  85. out,
  86. ifelse(escape, escape_html(text_split[j]), text_split[j]),
  87. if (!is.na(inserts$insert[j])) inserts$insert[j]
  88. )
  89. }
  90. if (exact) out <- escape_backslash(out)
  91. paste(out, collapse = '')
  92. }
  93. #' Wraps capture groups in regex pattern in span tags to colorize with CSS
  94. #'
  95. #' @inheritParams view_regex
  96. #' @keywords internal
  97. wrap_regex <- function(pattern, escape = TRUE, exact = TRUE) {
  98. stopifnot(length(pattern) == 1)
  99. if (escape) pattern <- escape_html(pattern)
  100. # 1. walk characters in pattern
  101. # 2. if current is open parens
  102. # 1. walk backwards, counting number of "\\" until first non-"\\" char
  103. # 2. If odd, then not an opening group
  104. # 3. Look forward, if followed by "?" then not a capturing group
  105. # 4. If capturing group then add opening "<span...>(" to out and
  106. # add TRUE for valid capture group to parens stack
  107. # 5. If non-capturing group, add "(" to out and FALSE for non-valid to paren stack
  108. # 3. if close parens, add closing "</span>" to out
  109. out <- c()
  110. paren_stack <- c()
  111. group <- 0
  112. pattern_chars <- strsplit(pattern, "")[[1]]
  113. for (i in seq_along(pattern_chars)) {
  114. is_capture_group <- FALSE
  115. if (pattern_chars[i] == "(") {
  116. backslash_count <- 0
  117. if (i != 1) {
  118. j <- i-1
  119. while (pattern_chars[j] == "\\" && j > 0) {
  120. backslash_count <- backslash_count + 1
  121. j <- j - 1
  122. }
  123. }
  124. if (backslash_count %% 2 == 0) {
  125. if (i != length(pattern_chars) && pattern_chars[i + 1] != "?") {
  126. is_capture_group <- TRUE
  127. }
  128. }
  129. if (is_capture_group) {
  130. group <- group + 1
  131. paren_stack <- c(TRUE, paren_stack) #push
  132. out <- c(out, paste0('<span class="g', sprintf("%02d", group), '">('))
  133. } else {
  134. paren_stack <- c(FALSE, paren_stack) #push
  135. out <- c(out, "(")
  136. }
  137. } else if (pattern_chars[i] == ")") {
  138. closes_capture_group <- paren_stack[1]
  139. paren_stack <- paren_stack[-1] #pop
  140. if (closes_capture_group) {
  141. out <- c(out, ")</span>")
  142. } else {
  143. out <- c(out, ")")
  144. }
  145. } else {
  146. out <- c(out, pattern_chars[i])
  147. }
  148. }
  149. if (exact) out <- escape_backslash(out)
  150. paste(out, collapse = "")
  151. }
  152. #' View grouped regex results
  153. #'
  154. #' @param text Text to search
  155. #' @param pattern Regex pattern to look for
  156. #' @param render Render results to an HTML doc and open in RStudio viewer?
  157. #' @param escape Escape HTML-related characters in `text`?
  158. #' @param knitr Print into knitr doc? If `TRUE`, marks text as `asis_output` and
  159. #' sets `render = FALSE` and `escape = TRUE`.
  160. #' @param exact Should the regex pattern be displayed as entered by the user
  161. #' into R console or source (default)? When `TRUE`, regex is displayed with
  162. #' the double `\\\\` required for escaping backslashes in R. When `FALSE`,
  163. #' regex is displayed as interpreted by the regex engine (i.e. double `\\\\`
  164. #' as a single `\\`).
  165. #' @param ... Passed to [run_regex()]
  166. #' @export
  167. view_regex <- function(
  168. text,
  169. pattern,
  170. ...,
  171. render = TRUE,
  172. escape = render,
  173. knitr = FALSE,
  174. exact = escape
  175. ) {
  176. if (knitr) {
  177. render <- FALSE
  178. escape <- TRUE
  179. }
  180. res <- run_regex(text, pattern, ...)
  181. res <- purrr::map_chr(res, wrap_result, escape = escape, exact = exact)
  182. res <- purrr::map_chr(res, function(resi) {
  183. result_pad <- ""
  184. if (grepl("pad\\d{2}", resi)) {
  185. max_pad <- max(stringr::str_extract_all(resi, "pad\\d{2}")[[1]])
  186. max_pad_level <- as.integer(stringr::str_extract(max_pad, "\\d{2}"))
  187. if (max_pad_level - 3 > 0) {
  188. result_pad <- sprintf("pad%02d", max_pad_level - 3)
  189. }
  190. }
  191. paste("<p class='results", result_pad, "'>", resi, "</p>")
  192. })
  193. res <- paste(res, collapse = "")
  194. if (!nchar(pattern)) res <- paste("<p class='results'>", text, "</p>")
  195. if (knitr) return(knitr::asis_output(res))
  196. if (!render) return(res)
  197. head <- c(
  198. "---", "pagetitle: View Regex", "---",
  199. "<h5>Regex</h5>",
  200. "<p><pre>", wrap_regex(pattern, escape, exact), "</pre></p>",
  201. "<h5>Results</h5>"
  202. )
  203. res <- c(head, res)
  204. tmp <- tempfile(fileext = ".Rmd")
  205. cat(res, file = tmp, sep = "\n")
  206. tmp_html <- suppressWarnings(
  207. rmarkdown::render(
  208. tmp,
  209. output_format = rmarkdown::html_document(css = c(system.file("styles", 'style.css', package='regexplain'),
  210. system.file("styles", 'skeleton.css', package='regexplain')),
  211. theme = NULL),
  212. quiet = TRUE
  213. ))
  214. rstudioapi::viewer(tmp_html)
  215. }