#' Extract matched groups from regexp
#'
#' @param text Text to search
#' @param pattern regexp
#' @inheritDotParams base::regexec ignore.case perl fixed useBytes
run_regex <- function(
text,
pattern,
ignore.case = FALSE,
perl = FALSE,
fixed = FALSE,
useBytes = FALSE
) {
# Use regex to get matches by group, gives start index and length
m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
# Convert to start/end index
x <- purrr::map(m, function(mi) {
list(
'idx' = purrr::map2(mi, attr(mi, "match.length"),
~ if(.x[1] != -1) c(.x, .x + .y)))
})
# Store text and original regexc result with same hierarchy
y <- purrr::map(text, ~ list(text = .))
z <- purrr::map(regmatches(text, m), ~ list(m = .))
# Zip text, indexes and regexc match object lists
purrr::map(seq_along(x), ~ list(text = y[[.]][[1]], idx = x[[.]][[1]], m = z[[.]][[1]]))
}
#' Wrap matches in HTML span tags to colorize via CSS
#'
#' @param x Individual list item in list returned by [run_regex()]
#' @inheritParams view_regex
#' @keywords internal
wrap_result <- function(x, escape = FALSE, exact = FALSE) {
if (is.null(x$idx[[1]])) return(if (escape) escape_html(x$text) else x$text)
text <- x$text
idx <- x$idx
len_idx <- length(idx)
inserts <- data.frame(
i = 1:len_idx - 1,
start = purrr::map_int(idx, ~ .[1]),
end = purrr::map_int(idx, ~ .[2])
) %>%
mutate(
# unmatched groups have start/end of zero
start = ifelse(.data$start == 0, NA, .data$start),
end = ifelse(.data$end == 0, NA, .data$end),
class = sprintf("group g%02d", .data$i),
pad = 0
)
for (j in seq_len(nrow(inserts))) {
if (inserts$i[j] == 0) next
if (is.na(inserts$start[j]) || is.na(inserts$end[j])) next
overlap <- filter(
inserts[1:(j-1), ],
.data$i != 0,
.data$start <= !!inserts$start[j] & .data$end >= !!inserts$end[j])
inserts[j, 'pad'] <- inserts$pad[j] + nrow(overlap)
}
inserts <- inserts %>%
tidyr::gather(type, loc, start:end) %>%
filter(!is.na(.data$loc)) %>%
dplyr::arrange(loc, class, desc(type)) %>%
mutate(
class = ifelse(.data$pad > 0, sprintf("%s pad%02d", .data$class, .data$pad), .data$class),
insert = ifelse(.data$type == 'start', sprintf('', .data$class), "")
)
inserts_g0 <- filter(inserts, class == "group g00")
inserts_other <- filter(inserts, class != "group g00")
inserts <- dplyr::bind_rows(
filter(inserts_g0, type == "start"),
inserts_other,
filter(inserts_g0, type == "end")
) %>%
mutate(type = sprintf("%05d%s", 1:nrow(.), type)) %>%
group_by(.data$loc, .data$type) %>%
summarize(insert = paste(.data$insert, collapse = '')) %>%
dplyr::ungroup() %>%
mutate(type = sub("^\\d{5}", "", type))
# inserts now gives html (span open and close) to insert and loc
# first split text at inserts$loc locations,
# then recombine by zipping with inserts$insert text
# start at 0, unless there's a hit on first character
# end at nchar(text) + 1 because window is idx[k] to idx[k+1]-1
idx_split <- c(0 - (inserts$loc[1] == 0), inserts$loc)
if (!(nchar(text) + 1) %in% idx_split)
idx_split <- c(idx_split, nchar(text) + 1)
text_split <- c()
for (k in seq_along(idx_split[-1])) {
text_split <- c(text_split, substr(text, idx_split[k], idx_split[k+1] - 1))
}
out <- c()
for (j in seq_along(text_split)) {
out <- c(
out,
ifelse(escape, escape_html(text_split[j]), text_split[j]),
if (!is.na(inserts$insert[j])) inserts$insert[j]
)
}
if (exact) out <- escape_backslash(out)
paste(out, collapse = '')
}
#' Wraps capture groups in regex pattern in span tags to colorize with CSS
#'
#' @inheritParams view_regex
#' @keywords internal
wrap_regex <- function(pattern, escape = TRUE, exact = TRUE) {
stopifnot(length(pattern) == 1)
if (escape) pattern <- escape_html(pattern)
# 1. walk characters in pattern
# 2. if current is open parens
# 1. walk backwards, counting number of "\\" until first non-"\\" char
# 2. If odd, then not an opening group
# 3. Look forward, if followed by "?" then not a capturing group
# 4. If capturing group then add opening " ", resi, " ", text, "Pattern
",
"", wrap_regex(pattern, escape, exact), "