瀏覽代碼

Better handling of results groups

Track # of result groups in regex result data frame as idx$pass. Refactored span collating.
main
Garrick Aden-Buie 7 年之前
父節點
當前提交
975e3ae4fa
共有 3 個文件被更改,包括 51 次插入14 次删除
  1. +29
    -12
      R/run_regex.R
  2. +9
    -1
      tests/testthat/test-regex.R
  3. +13
    -1
      tests/testthat/test-wrap_result.R

+ 29
- 12
R/run_regex.R 查看文件

@@ -28,6 +28,7 @@ run_regex <- function(
for (i in seq_along(m2)) {
if (is.null(m2[[i]]$idx[[1]])) next
m2[[i]]$idx[, c(1, 2)] <- m2[[i]]$idx[, c(1, 2)] + mmi[i] - 1L
m2[[i]]$idx$pass <- m2[[i]]$idx$pass + 1L
m[[i]]$idx <- rbind(m[[i]]$idx, m2[[i]]$idx)
}
}
@@ -43,6 +44,7 @@ expand_matches <- function(m) {
x$start <- ifelse(x$start == 0L, NA_integer_, x$start)
x$end <- ifelse(x$end == 0L, NA_integer_, x$end)
x$group <- 1:nrow(x) - 1L
x$pass <- 1L
x
}

@@ -87,18 +89,18 @@ wrap_result <- function(x, escape = FALSE, exact = FALSE) {
class = ifelse(.data$pad > 0, sprintf("%s pad%02d", .data$class, .data$pad), .data$class),
insert = ifelse(.data$type == 'start', sprintf('<span class="%s">', .data$class), "</span>")
)
inserts_g0 <- filter(inserts, class == "group g00")
inserts_other <- filter(inserts, class != "group g00")
inserts <- dplyr::bind_rows(
filter(inserts_g0, type == "start"),
inserts_other,
filter(inserts_g0, type == "end")
) %>%
mutate(type = sprintf("%05d%s", 1:nrow(.), type)) %>%
group_by(.data$loc, .data$type) %>%
summarize(insert = paste(.data$insert, collapse = '')) %>%
dplyr::ungroup() %>%
mutate(type = sub("^\\d{5}", "", type))
inserts <- if (max(inserts$pass) == 1) {
collapse_span_inserts(inserts)
} else {
inserts %>%
tidyr::nest(-pass) %>%
mutate(data = purrr::map(data, collapse_span_inserts)) %>%
tidyr::unnest() %>%
group_by(loc, type) %>%
summarize(insert = paste(insert, collapse = "")) %>%
dplyr::ungroup()
}

# inserts now gives html (span open and close) to insert and loc
# first split text at inserts$loc locations,
@@ -124,6 +126,21 @@ wrap_result <- function(x, escape = FALSE, exact = FALSE) {
paste(out, collapse = '')
}

collapse_span_inserts <- function(inserts) {
inserts_g0 <- filter(inserts, class == "group g00")
inserts_other <- filter(inserts, class != "group g00")
dplyr::bind_rows(
filter(inserts_g0, type == "start"),
inserts_other,
filter(inserts_g0, type == "end")
) %>%
mutate(type = sprintf("%05d%s", 1:nrow(.), type)) %>%
group_by(.data$loc, .data$type) %>%
summarize(insert = paste(.data$insert, collapse = '')) %>%
dplyr::ungroup() %>%
mutate(type = sub("^\\d{5}", "", type))
}

#' Wraps capture groups in regex pattern in span tags to colorize with CSS
#'
#' @inheritParams view_regex

+ 9
- 1
tests/testthat/test-regex.R 查看文件

@@ -5,7 +5,8 @@ test_that("expand_matches gives data frame of indices with groups", {
idx <- data.frame(
start = c(1L, 1L, 2L, NA_integer_),
end = c(3L, 2L, 3L, NA_integer_),
group = c(0L, 1L, 2L, 3L)
group = c(0L, 1L, 2L, 3L),
pass = rep(1L, 4)
)
expect_equal(expand_matches(m[[1]]), idx)
})
@@ -23,3 +24,10 @@ test_that("max_match_index works", {
m <- run_regex(c("abcaba", "aba", "z"), c("(a)(b)(d)?c?"), global = FALSE)
expect_equal(max_match_index(m), c(4, 3, NA_integer_))
})

test_that("results group (pass) is calculated correctly", {
text <- "ab ab"
pattern <- "(a)(b)"
m <- run_regex(text, pattern, global = TRUE)
expect_equal(unique(m[[1]]$idx$pass), c(1L, 2L))
})

+ 13
- 1
tests/testthat/test-wrap_result.R 查看文件

@@ -34,9 +34,21 @@ test_that("wrap_results works when groups start and end at same index", {
expect_equal(res, "<span class=\"group g00\"><span class=\"group g01\">728</span><span class=\"group g02\">229</span><span class=\"group g03\">8386</span></span>")
})

test_that("wrap_regex searches globally", {
test_that("wrap_result searches globally", {
text <- "ab ab"
pattern <- "(a)(b)"
result <- paste(rep("<span class=\"group g00\"><span class=\"group g01\">a</span><span class=\"group g02\">b</span></span>", 2), collapse = " ")
expect_equal(wrap_result(run_regex(text, pattern, global = TRUE)[[1]]), result)
})

test_that("wrap_result starts/ends correctly with touching groups", {
text <- "The big red apple fell to the ground."
pattern <- "(\\w+) (\\w+) "
result <- paste0(
'<span class=\"group g00\"><span class=\"group g01\">The</span> <span class=\"group g02\">big</span> </span>',
'<span class=\"group g00\"><span class=\"group g01\">red</span> <span class=\"group g02\">apple</span> </span>',
'<span class=\"group g00\"><span class=\"group g01\">fell</span> <span class=\"group g02\">to</span> </span>',
'the ground.'
)
expect_equal(wrap_result(run_regex(text, pattern, global = TRUE)[[1]]), result)
})

Loading…
取消
儲存