Garrick Aden-Buie 7 лет назад
Родитель
Сommit
67725f59bb
4 измененных файлов: 29 добавлений и 28 удалений
  1. +10
    -18
      R/run_regex.R
  2. +1
    -1
      man/run_regex.Rd
  3. +12
    -2
      tests/testthat/test-regex.R
  4. +6
    -7
      tests/testthat/test-wrap_result.R

+ 10
- 18
R/run_regex.R Просмотреть файл

@@ -11,7 +11,7 @@ run_regex <- function(
perl = FALSE,
fixed = FALSE,
useBytes = FALSE,
global = FALSE
global = TRUE
) {
# Use regex to get matches by group, gives start index and length
m <- regexec(pattern, text, ignore.case, perl, fixed, useBytes)
@@ -27,8 +27,8 @@ run_regex <- function(
m2 <- run_regex(subtext, pattern, ignore.case, perl, fixed, useBytes)
for (i in seq_along(m2)) {
if (is.null(m2[[i]]$idx[[1]])) next
m2[[i]]$idx[, c(1, 2)] <- m2[[i]]$idx[, c(1, 2)] + mmi[i] - 1
m[[i]]$idx <- dplyr::bind_rows(m[[i]]$idx, m2[[i]]$idx)
m2[[i]]$idx[, c(1, 2)] <- m2[[i]]$idx[, c(1, 2)] + mmi[i] - 1L
m[[i]]$idx <- rbind(m[[i]]$idx, m2[[i]]$idx)
}
}
m
@@ -43,7 +43,7 @@ expand_matches <- function(m) {
x$start <- ifelse(x$start == 0L, NA_integer_, x$start)
x$end <- ifelse(x$end == 0L, NA_integer_, x$end)
x$group <- 1:nrow(x) - 1L
dplyr::as_tibble(x, validate = FALSE)
x
}

max_match_index <- function(m) {
@@ -64,20 +64,12 @@ max_match_index <- function(m) {
wrap_result <- function(x, escape = FALSE, exact = FALSE) {
if (is.null(x$idx[[1]])) return(if (escape) escape_html(x$text) else x$text)
text <- x$text
idx <- x$idx
len_idx <- length(idx)
inserts <- data.frame(
i = 1:len_idx - 1,
start = purrr::map_int(idx, ~ .[1]),
end = purrr::map_int(idx, ~ .[2])
) %>%
mutate(
# unmatched groups have start/end of zero
start = ifelse(.data$start == 0, NA, .data$start),
end = ifelse(.data$end == 0, NA, .data$end),
class = sprintf("group g%02d", .data$i),
pad = 0
)

inserts <- x$idx
inserts$class <- sprintf("group g%02d", inserts$group)
inserts$pad <- 0L
names(inserts)[which(names(inserts) == "group")] <- "i"

for (j in seq_len(nrow(inserts))) {
if (inserts$i[j] == 0) next
if (is.na(inserts$start[j]) || is.na(inserts$end[j])) next

+ 1
- 1
man/run_regex.Rd Просмотреть файл

@@ -5,7 +5,7 @@
\title{Extract matched groups from regexp}
\usage{
run_regex(text, pattern, ignore.case = FALSE, perl = FALSE,
fixed = FALSE, useBytes = FALSE, global = FALSE)
fixed = FALSE, useBytes = FALSE, global = TRUE)
}
\arguments{
\item{text}{Text to search}

+ 12
- 2
tests/testthat/test-regex.R Просмотреть файл

@@ -2,7 +2,7 @@ context("test-regex")

test_that("expand_matches gives data frame of indices with groups", {
m <- regexec("(a)(b)(d)?", "abcaba")
idx <- dplyr::data_frame(
idx <- data.frame(
start = c(1L, 1L, 2L, NA_integer_),
end = c(3L, 2L, 3L, NA_integer_),
group = c(0L, 1L, 2L, 3L)
@@ -10,6 +10,16 @@ test_that("expand_matches gives data frame of indices with groups", {
expect_equal(expand_matches(m[[1]]), idx)
})

test_that("start/end indices are integers", {
text <- "ab ab"
pattern <- "(a)(b)"
m <- run_regex(text, pattern, global = TRUE)
expect_is(m[[1]]$idx$start, "integer")
expect_is(m[[1]]$idx$end, "integer")
expect_is(m[[1]]$idx$group, "integer")
})

test_that("max_match_index works", {
m <- run_regex(c("abcaba", "aba", "z"), c("(a)(b)(d)?c?"), global = TRUE)
m <- run_regex(c("abcaba", "aba", "z"), c("(a)(b)(d)?c?"), global = FALSE)
expect_equal(max_match_index(m), c(4, 3, NA_integer_))
})

+ 6
- 7
tests/testthat/test-wrap_result.R Просмотреть файл

@@ -6,38 +6,37 @@ test_that("wrap_result handles zero length groups", {
# (?<=\()([^)]*)(?=\))
text <- "Type 'q()' to quit R."
pattern <- "(?<=\\()([^)]*)(?=\\))"
res <- wrap_result(run_regex(text, pattern, perl = TRUE)[[1]])
res <- wrap_result(run_regex(text, pattern, perl = TRUE, global = FALSE)[[1]])
expect_equal(res, "Type 'q(<span class=\"group g00\"><span class=\"group g01\"></span></span>)' to quit R.")
})

test_that("wrap_results generally works", {
text <- "apples"
pattern <- "apples"
res <- wrap_result(run_regex(text, pattern, perl = TRUE)[[1]])
res <- wrap_result(run_regex(text, pattern, perl = TRUE, global = FALSE)[[1]])
expect_equal(res, "<span class=\"group g00\">apples</span>")

text <- "He wheeled the bike past the winding road."
pattern <- "(a|the) ([^ ]+)"
res <- wrap_result(run_regex(text, pattern, perl = TRUE)[[1]])
res <- wrap_result(run_regex(text, pattern, perl = TRUE, global = FALSE)[[1]])
expect_equal(res, "He wheeled <span class=\"group g00\"><span class=\"group g01\">the</span> <span class=\"group g02\">bike</span></span> past the winding road.")

text <- ".15in"
pattern <- "^(auto|inherit|((\\.\\d+)|(\\d+(\\.\\d+)?))(%|in|cm|mm|em|ex|pt|pc|px|vh|vw|vmin|vmax))$"
res <- wrap_result(run_regex(text, pattern, perl = TRUE)[[1]])
res <- wrap_result(run_regex(text, pattern, perl = TRUE, global = FALSE)[[1]])
expect_equal(res, "<span class=\"group g00\"><span class=\"group g01\"><span class=\"group g02 pad01\"><span class=\"group g03 pad02\">.15</span></span><span class=\"group g06 pad01\">in</span></span></span>")
})

test_that("wrap_results works when groups start and end at same index", {
text <- "7282298386"
pattern <- "\\(?(\\d{3})[-). ]?(\\d{3})[- .]?(\\d{4})"
res <- wrap_result(run_regex(text, pattern, perl = TRUE)[[1]])
res <- wrap_result(run_regex(text, pattern, perl = TRUE, global = FALSE)[[1]])
expect_equal(res, "<span class=\"group g00\"><span class=\"group g01\">728</span><span class=\"group g02\">229</span><span class=\"group g03\">8386</span></span>")
})

test_that("wrap_regex searches globally", {
testthat::skip("Global search test not ready yet")
text <- "ab ab"
pattern <- "(a)(b)"
result <- paste(rep("<span class=\"group g00\"><span class=\"group g01\">a</span><span class=\"group g02\">b</span></span>", 2), collapse = " ")
expect_equal(wrap_result(run_regex(text, pattern)[[1]]), result)
expect_equal(wrap_result(run_regex(text, pattern, global = TRUE)[[1]]), result)
})

Загрузка…
Отмена
Сохранить