🔍 An RStudio addin slash regex utility belt
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

221 lines
7.5KB

  1. #' Extract matched groups from regexp
  2. #'
  3. #' @param text Text to search
  4. #' @param pattern regexp
  5. #' @inheritParams base::regexec
  6. #' @export
  7. run_regex <- function(
  8. text,
  9. pattern,
  10. ignore.case = FALSE,
  11. perl = FALSE,
  12. fixed = FALSE,
  13. useBytes = FALSE,
  14. invert = FALSE
  15. ) {
  16. # Use regex to get matches by group, gives start index and length
  17. m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
  18. # Convert to start/end index
  19. x <- purrr::map(m, function(mi) {
  20. list(
  21. 'idx' = purrr::map2(mi, attr(mi, "match.length"),
  22. ~ if(.x[1] != -1) c(.x, .x + .y)))
  23. })
  24. # Store text and original regexc result with same hierarchy
  25. y <- purrr::map(text, ~ list(text = .))
  26. z <- purrr::map(regmatches(text, m), ~ list(m = .))
  27. # Zip text, indexes and regexc match object lists
  28. purrr::map(seq_along(x), ~ list(text = y[[.]][[1]], idx = x[[.]][[1]], m = z[[.]][[1]]))
  29. }
  30. #' Wrap matches in HTML span tags to colorize via CSS
  31. #'
  32. #' @param x Individual list item in list returned by [run_regex()]
  33. #' @inheritParams view_regex
  34. #' @keywords internal
  35. wrap_result <- function(x, escape = FALSE, exact = FALSE) {
  36. if (is.null(x$idx[[1]])) return(if (escape) escape_html(x$text) else x$text)
  37. text <- x$text
  38. idx <- x$idx
  39. len_idx <- length(idx)
  40. inserts <- data.frame(
  41. i = 1:len_idx - 1,
  42. start = purrr::map_int(idx, ~ .[1]),
  43. end = purrr::map_int(idx, ~ .[2])
  44. ) %>%
  45. mutate(
  46. # unmatched groups have start/end of zero
  47. start = ifelse(.data$start == 0, NA, .data$start),
  48. end = ifelse(.data$end == 0, NA, .data$end),
  49. class = sprintf("group g%02d", .data$i),
  50. pad = 0
  51. )
  52. for (j in seq_len(nrow(inserts))) {
  53. if (inserts$i[j] == 0) next
  54. if (is.na(inserts$start[j]) || is.na(inserts$end[j])) next
  55. overlap <- filter(
  56. inserts[1:(j-1), ],
  57. .data$i != 0,
  58. .data$start <= !!inserts$start[j] & .data$end >= !!inserts$end[j])
  59. inserts[j, 'pad'] <- inserts$pad[j] + nrow(overlap)
  60. }
  61. inserts <- inserts %>%
  62. tidyr::gather(type, loc, start:end) %>%
  63. filter(!is.na(.data$loc)) %>%
  64. mutate(
  65. class = ifelse(.data$pad > 0, sprintf("%s pad%02d", .data$class, .data$pad), .data$class),
  66. insert = ifelse(.data$type == 'start', sprintf('<span class="%s">', .data$class), "</span>")
  67. ) %>%
  68. group_by(.data$loc, .data$type) %>%
  69. summarize(insert = paste(.data$insert, collapse = ''))
  70. # inserts now gives html (span open and close) to insert and loc
  71. # first split text at inserts$loc locations,
  72. # then recombine by zipping with inserts$insert text
  73. # start at 0, unless there's a hit on first character
  74. # end at nchar(text) + 1 because window is idx[k] to idx[k+1]-1
  75. idx_split <- c(0 - (inserts$loc[1] == 0), inserts$loc)
  76. if (!(nchar(text) + 1) %in% idx_split)
  77. idx_split <- c(idx_split, nchar(text) + 1)
  78. text_split <- c()
  79. for (k in seq_along(idx_split[-1])) {
  80. text_split <- c(text_split, substr(text, idx_split[k], idx_split[k+1] - 1))
  81. }
  82. out <- c()
  83. for (j in seq_along(text_split)) {
  84. out <- c(
  85. out,
  86. ifelse(escape, escape_html(text_split[j]), text_split[j]),
  87. if (!is.na(inserts$insert[j])) inserts$insert[j]
  88. )
  89. }
  90. if (exact) out <- escape_backslash(out)
  91. paste(out, collapse = '')
  92. }
  93. #' Wraps capture groups in regex pattern in span tags to colorize with CSS
  94. #'
  95. #' @inheritParams view_regex
  96. #' @keywords internal
  97. wrap_regex <- function(pattern, escape = TRUE, exact = TRUE) {
  98. stopifnot(length(pattern) == 1)
  99. if (escape) pattern <- escape_html(pattern)
  100. # 1. walk characters in pattern
  101. # 2. if current is open parens
  102. # 1. walk backwards, counting number of "\\" until first non-"\\" char
  103. # 2. If odd, then not an opening group
  104. # 3. Look forward, if followed by "?" then not a capturing group
  105. # 4. If capturing group then add opening "<span...>(" to out and
  106. # add TRUE for valid capture group to parens stack
  107. # 5. If non-capturing group, add "(" to out and FALSE for non-valid to paren stack
  108. # 3. if close parens, add closing "</span>" to out
  109. out <- c()
  110. paren_stack <- c()
  111. group <- 0
  112. pattern_chars <- strsplit(pattern, "")[[1]]
  113. for (i in seq_along(pattern_chars)) {
  114. is_capture_group <- FALSE
  115. if (pattern_chars[i] == "(") {
  116. backslash_count <- 0
  117. if (i != 1) {
  118. j <- i-1
  119. while (pattern_chars[j] == "\\" && j > 0) {
  120. backslash_count <- backslash_count + 1
  121. j <- j - 1
  122. }
  123. }
  124. if (backslash_count %% 2 == 0) {
  125. if (i != length(pattern_chars) && pattern_chars[i + 1] != "?") {
  126. is_capture_group <- TRUE
  127. }
  128. }
  129. if (is_capture_group) {
  130. group <- group + 1
  131. paren_stack <- c(TRUE, paren_stack) #push
  132. out <- c(out, paste0('<span class="g', sprintf("%02d", group), '">('))
  133. } else {
  134. paren_stack <- c(FALSE, paren_stack) #push
  135. out <- c(out, "(")
  136. }
  137. } else if (pattern_chars[i] == ")") {
  138. closes_capture_group <- paren_stack[1]
  139. paren_stack <- paren_stack[-1] #pop
  140. if (closes_capture_group) {
  141. out <- c(out, ")</span>")
  142. } else {
  143. out <- c(out, ")")
  144. }
  145. } else {
  146. out <- c(out, pattern_chars[i])
  147. }
  148. }
  149. if (exact) out <- escape_backslash(out)
  150. paste(out, collapse = "")
  151. }
  152. #' View grouped regex results
  153. #'
  154. #' @param text Text to search
  155. #' @param pattern Regex pattern to look for
  156. #' @param render Render results to an HTML doc and open in RStudio viewer?
  157. #' @param escape Escape HTML-related characters in `text`?
  158. #' @param knitr Print into knitr doc? If `TRUE`, marks text as `asis_output` and
  159. #' sets `render = FALSE` and `escape = TRUE`.
  160. #' @param exact Should the regex pattern be displayed as entered by the user
  161. #' into R console or source (default)? When `TRUE`, regex is displayed with
  162. #' the double `\\\\` required for escaping backslashes in R. When `FALSE`,
  163. #' regex is displayed as interpreted by the regex engine (i.e. double `\\\\`
  164. #' as a single `\\`).
  165. #' @param ... Passed to [run_regex()]
  166. #' @export
  167. view_regex <- function(
  168. text,
  169. pattern,
  170. ...,
  171. render = TRUE,
  172. escape = render,
  173. knitr = FALSE,
  174. exact = escape
  175. ) {
  176. if (knitr) {
  177. render <- FALSE
  178. escape <- TRUE
  179. }
  180. res <- run_regex(text, pattern, ...)
  181. res <- purrr::map_chr(res, wrap_result, escape = escape, exact = exact)
  182. res <- purrr::map_chr(res, function(resi) {
  183. result_pad <- ""
  184. if (grepl("pad\\d{2}", resi)) {
  185. max_pad <- max(stringr::str_extract_all(resi, "pad\\d{2}")[[1]])
  186. max_pad_level <- as.integer(stringr::str_extract(max_pad, "\\d{2}"))
  187. if (max_pad_level - 3 > 0) {
  188. result_pad <- sprintf("pad%02d", max_pad_level - 3)
  189. }
  190. }
  191. paste("<p class='results", result_pad, "'>", resi, "</p>")
  192. })
  193. res <- paste(res, collapse = "")
  194. if (!nchar(pattern)) res <- paste("<p class='results'>", text, "</p>")
  195. if (knitr) return(knitr::asis_output(res))
  196. if (!render) return(res)
  197. head <- c(
  198. "---", "pagetitle: View Regex", "---",
  199. "<h5>Pattern</h5>",
  200. "<p><pre>", wrap_regex(pattern, escape, exact), "</pre></p>",
  201. "<h5>Matches</h5>"
  202. )
  203. res <- c(head, res)
  204. tmp <- tempfile(fileext = ".Rmd")
  205. cat(res, file = tmp, sep = "\n")
  206. tmp_html <- suppressWarnings(
  207. rmarkdown::render(
  208. tmp,
  209. output_format = rmarkdown::html_document(css = c(system.file("styles", 'skeleton.css', package='regexplain'),
  210. system.file("styles", 'style.css', package='regexplain')),
  211. theme = NULL,
  212. md_extensions = "-autolink_bare_uris"),
  213. quiet = TRUE
  214. ))
  215. rstudioapi::viewer(tmp_html)
  216. }