🔍 An RStudio addin slash regex utility belt
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

141 lines
4.5KB

  1. #' @export
  2. run_regex <- function(
  3. text,
  4. pattern,
  5. ignore.case = FALSE,
  6. perl = FALSE,
  7. fixed = FALSE,
  8. useBytes = FALSE,
  9. invert = FALSE
  10. ) {
  11. m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
  12. x <- purrr::map(m, function(mi) list('idx' = purrr::map2(mi, attr(mi, "match.length"), ~ if(.x[1] != -1) c(.x, .x + .y - 1L))))
  13. y <- purrr::map(text, ~ list(text = .))
  14. z <- purrr::map(regmatches(text, m), ~ list(m = .))
  15. purrr::map(seq_along(x), ~ list(text = y[[.]][[1]], idx = x[[.]][[1]], m = z[[.]][[1]]))
  16. }
  17. wrap_result <- function(x, escape = FALSE) {
  18. if (is.null(x$idx[[1]])) return(if (escape) escape_html(x$text) else x$text)
  19. text <- x$text
  20. idx <- x$idx
  21. len_idx <- length(idx)
  22. inserts <- data.frame(
  23. i = 1:len_idx - 1,
  24. start = purrr::map_int(idx, ~ .[1]),
  25. end = purrr::map_int(idx, ~ .[2]) + 1
  26. ) %>%
  27. mutate(
  28. class = sprintf("group g%02d", i),
  29. pad = 0
  30. )
  31. for (j in seq_len(nrow(inserts))) {
  32. if (inserts$i[j] == 0) next
  33. overlap <- filter(
  34. inserts[1:(j-1), ],
  35. i != 0,
  36. start <= !!inserts$start[j] & end >= !!inserts$end[j])
  37. inserts[j, 'pad'] <- inserts$pad[j] + nrow(overlap)
  38. }
  39. inserts <- inserts %>%
  40. tidyr::gather(type, loc, start:end) %>%
  41. mutate(
  42. class = ifelse(pad > 0, sprintf("%s pad%02d", class, pad), class),
  43. insert = ifelse(type == 'start', sprintf('<span class="%s">', class), "</span>")
  44. ) %>%
  45. group_by(loc, type) %>%
  46. summarize(insert = paste(insert, collapse = ''))
  47. # inserts now gives html (span open and close) to insert and loc
  48. # first split text at inserts$loc locations,
  49. # then recombine by zipping with inserts$insert text
  50. # start at 0, unless there's a hit on first character
  51. # end at nchar(text) + 1 because window is idx[k] to idx[k+1]-1
  52. idx_split <- c(0 - (inserts$loc[1] == 0), inserts$loc)
  53. if (!(nchar(text) + 1) %in% idx_split)
  54. idx_split <- c(idx_split, nchar(text) + 1)
  55. text_split <- c()
  56. for (k in seq_along(idx_split[-1])) {
  57. text_split <- c(text_split, substr(text, idx_split[k], idx_split[k+1] - 1))
  58. }
  59. out <- c()
  60. for (j in seq_along(text_split)) {
  61. out <- c(
  62. out,
  63. ifelse(escape, escape_html(text_split[j]), text_split[j]),
  64. if (!is.na(inserts$insert[j])) inserts$insert[j]
  65. )
  66. }
  67. paste(out, collapse = '')
  68. }
  69. wrap_regex <- function(pattern, escape = TRUE, exact = TRUE) {
  70. stopifnot(length(pattern) == 1)
  71. if(escape) pattern <- escape_html(pattern)
  72. r_open_parens <- "(?<![\\\\])\\("
  73. x <- strsplit(pattern, r_open_parens, perl = TRUE)[[1]]
  74. first <- x[1]
  75. x <- x[-1]
  76. if (length(x)) {
  77. x <- paste0(
  78. '<span class="g', sprintf("%02d", seq_along(x)), '">(',
  79. x,
  80. collapse = ""
  81. )
  82. x <- gsub("(?<![\\\\])\\)", ")</span>", x, perl = TRUE)
  83. }
  84. if (exact) x <- escape_backslash(x)
  85. paste0(first, x)
  86. }
  87. #' View grouped regex results
  88. #'
  89. #' @param text Text to search
  90. #' @param pattern Regex pattern to look for
  91. #' @param render Render results to an HTML doc and open in RStudio viewer?
  92. #' @param escape Escape HTML-related characters in `text`?
  93. #' @param knitr Print into knitr doc? If `TRUE`, marks text as `asis_output` and
  94. #' sets `render = FALSE` and `escape = TRUE`.
  95. #' @param exact Should regex be displayed as entered by the user into R console
  96. #' or source (default)? When `TRUE`, regex is displayed with the double `\\`
  97. #' required for escaping backslashes in R. When `FALSE`, regex is displayed
  98. #' as interpreted by the regex engine (i.e. double `\\` as a single `\`).
  99. #' @param ... Passed to [run_regex]
  100. #' @export
  101. view_regex <- function(
  102. text,
  103. pattern,
  104. ...,
  105. render = TRUE,
  106. escape = render,
  107. knitr = FALSE,
  108. exact = escape
  109. ) {
  110. if (knitr) {
  111. render <- FALSE
  112. escape <- TRUE
  113. }
  114. res <- run_regex(text, pattern, ...)
  115. res <- purrr::map_chr(res, wrap_result, escape = escape)
  116. res <- paste("<p class='results'>", res, "</p>")
  117. if (!nchar(pattern)) res <- paste("<p class='results'>", text, "</p>")
  118. if (knitr) return(knitr::asis_output(res))
  119. if (!render) return(res)
  120. head <- c(
  121. "---", "pagetitle: View Regex", "---",
  122. "<h5>Regex</h5>",
  123. "<p><pre style = 'font-size: 1em;'>", wrap_regex(pattern, escape, exact), "</pre></p>",
  124. "<h5>Results</h5>"
  125. )
  126. res <- c(head, res)
  127. tmp <- tempfile(fileext = ".Rmd")
  128. cat(res, file = tmp, sep = "\n")
  129. tmp_html <- suppressWarnings(
  130. rmarkdown::render(
  131. tmp,
  132. output_format = rmarkdown::html_document(css = system.file('style.css', package='regexhelp')),
  133. quiet = TRUE
  134. ))
  135. rstudioapi::viewer(tmp_html)
  136. }