🔍 An RStudio addin slash regex utility belt
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

172 lines
5.7KB

  1. #' Extract matched groups from regexp
  2. #'
  3. #' @param text Text to search
  4. #' @param pattern regexp
  5. #' @inheritParams base::regexec
  6. #' @export
  7. run_regex <- function(
  8. text,
  9. pattern,
  10. ignore.case = FALSE,
  11. perl = FALSE,
  12. fixed = FALSE,
  13. useBytes = FALSE,
  14. invert = FALSE
  15. ) {
  16. # Use regex to get matches by group, gives start index and length
  17. m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
  18. # Convert to start/end index
  19. x <- purrr::map(m, function(mi) {
  20. list(
  21. 'idx' = purrr::map2(mi, attr(mi, "match.length"),
  22. ~ if(.x[1] != -1) c(.x, .x + .y)))
  23. })
  24. # Store text and original regexc result with same hierarchy
  25. y <- purrr::map(text, ~ list(text = .))
  26. z <- purrr::map(regmatches(text, m), ~ list(m = .))
  27. # Zip text, indexes and regexc match object lists
  28. purrr::map(seq_along(x), ~ list(text = y[[.]][[1]], idx = x[[.]][[1]], m = z[[.]][[1]]))
  29. }
  30. wrap_result <- function(x, escape = FALSE) {
  31. if (is.null(x$idx[[1]])) return(if (escape) escape_html(x$text) else x$text)
  32. text <- x$text
  33. idx <- x$idx
  34. len_idx <- length(idx)
  35. inserts <- data.frame(
  36. i = 1:len_idx - 1,
  37. start = purrr::map_int(idx, ~ .[1]),
  38. end = purrr::map_int(idx, ~ .[2])
  39. ) %>%
  40. mutate(
  41. # unmatched groups have start/end of zero
  42. start = ifelse(.data$start == 0, NA, .data$start),
  43. end = ifelse(.data$end == 0, NA, .data$end),
  44. class = sprintf("group g%02d", .data$i),
  45. pad = 0
  46. )
  47. for (j in seq_len(nrow(inserts))) {
  48. if (inserts$i[j] == 0) next
  49. if (is.na(inserts$start[j]) || is.na(inserts$end[j])) next
  50. overlap <- filter(
  51. inserts[1:(j-1), ],
  52. .data$i != 0,
  53. .data$start <= !!inserts$start[j] & .data$end >= !!inserts$end[j])
  54. inserts[j, 'pad'] <- inserts$pad[j] + nrow(overlap)
  55. }
  56. inserts <- inserts %>%
  57. tidyr::gather(type, loc, start:end) %>%
  58. filter(!is.na(.data$loc)) %>%
  59. mutate(
  60. class = ifelse(.data$pad > 0, sprintf("%s pad%02d", .data$class, .data$pad), .data$class),
  61. insert = ifelse(.data$type == 'start', sprintf('<span class="%s">', .data$class), "</span>")
  62. ) %>%
  63. group_by(.data$loc, .data$type) %>%
  64. summarize(insert = paste(.data$insert, collapse = ''))
  65. # inserts now gives html (span open and close) to insert and loc
  66. # first split text at inserts$loc locations,
  67. # then recombine by zipping with inserts$insert text
  68. # start at 0, unless there's a hit on first character
  69. # end at nchar(text) + 1 because window is idx[k] to idx[k+1]-1
  70. idx_split <- c(0 - (inserts$loc[1] == 0), inserts$loc)
  71. if (!(nchar(text) + 1) %in% idx_split)
  72. idx_split <- c(idx_split, nchar(text) + 1)
  73. text_split <- c()
  74. for (k in seq_along(idx_split[-1])) {
  75. text_split <- c(text_split, substr(text, idx_split[k], idx_split[k+1] - 1))
  76. }
  77. out <- c()
  78. for (j in seq_along(text_split)) {
  79. out <- c(
  80. out,
  81. ifelse(escape, escape_html(text_split[j]), text_split[j]),
  82. if (!is.na(inserts$insert[j])) inserts$insert[j]
  83. )
  84. }
  85. paste(out, collapse = '')
  86. }
  87. wrap_regex <- function(pattern, escape = TRUE, exact = TRUE) {
  88. stopifnot(length(pattern) == 1)
  89. if(escape) pattern <- escape_html(pattern)
  90. r_open_parens <- "(?<![\\\\])\\("
  91. x <- strsplit(pattern, r_open_parens, perl = TRUE)[[1]]
  92. first <- x[1]
  93. x <- x[-1]
  94. if (length(x)) {
  95. x <- paste0(
  96. '<span class="g', sprintf("%02d", seq_along(x)), '">(',
  97. x,
  98. collapse = ""
  99. )
  100. x <- gsub("(?<![\\\\])\\)", ")</span>", x, perl = TRUE)
  101. }
  102. if (exact) x <- escape_backslash(x)
  103. paste0(first, x)
  104. }
  105. #' View grouped regex results
  106. #'
  107. #' @param text Text to search
  108. #' @param pattern Regex pattern to look for
  109. #' @param render Render results to an HTML doc and open in RStudio viewer?
  110. #' @param escape Escape HTML-related characters in `text`?
  111. #' @param knitr Print into knitr doc? If `TRUE`, marks text as `asis_output` and
  112. #' sets `render = FALSE` and `escape = TRUE`.
  113. #' @param exact Should regex be displayed as entered by the user into R console
  114. #' or source (default)? When `TRUE`, regex is displayed with the double `\\\\`
  115. #' required for escaping backslashes in R. When `FALSE`, regex is displayed
  116. #' as interpreted by the regex engine (i.e. double `\\\\` as a single `\\`).
  117. #' @param ... Passed to [run_regex]
  118. #' @export
  119. view_regex <- function(
  120. text,
  121. pattern,
  122. ...,
  123. render = TRUE,
  124. escape = render,
  125. knitr = FALSE,
  126. exact = escape
  127. ) {
  128. if (knitr) {
  129. render <- FALSE
  130. escape <- TRUE
  131. }
  132. res <- run_regex(text, pattern, ...)
  133. res <- purrr::map_chr(res, wrap_result, escape = escape)
  134. res <- purrr::map_chr(res, function(resi) {
  135. result_pad <- ""
  136. if (grepl("pad\\d{2}", resi)) {
  137. max_pad <- max(stringr::str_extract_all(resi, "pad\\d{2}")[[1]])
  138. max_pad_level <- as.integer(stringr::str_extract(max_pad, "\\d{2}"))
  139. if (max_pad_level - 3 > 0) {
  140. result_pad <- sprintf("pad%02d", max_pad_level - 3)
  141. }
  142. }
  143. paste("<p class='results", result_pad, "'>", resi, "</p>")
  144. })
  145. res <- paste(res, collapse = "")
  146. if (!nchar(pattern)) res <- paste("<p class='results'>", text, "</p>")
  147. if (knitr) return(knitr::asis_output(res))
  148. if (!render) return(res)
  149. head <- c(
  150. "---", "pagetitle: View Regex", "---",
  151. "<h5>Regex</h5>",
  152. "<p><pre>", wrap_regex(pattern, escape, exact), "</pre></p>",
  153. "<h5>Results</h5>"
  154. )
  155. res <- c(head, res)
  156. tmp <- tempfile(fileext = ".Rmd")
  157. cat(res, file = tmp, sep = "\n")
  158. tmp_html <- suppressWarnings(
  159. rmarkdown::render(
  160. tmp,
  161. output_format = rmarkdown::html_document(css = c(system.file("styles", 'style.css', package='regexplain'),
  162. system.file("styles", 'skeleton.css', package='regexplain')),
  163. theme = NULL),
  164. quiet = TRUE
  165. ))
  166. rstudioapi::viewer(tmp_html)
  167. }