Преглед изворни кода

Better handling of results groups

Track # of result groups in regex result data frame as idx$pass. Refactored span collating.
pull/21/head
Garrick Aden-Buie пре 7 година
родитељ
комит
8e65867222
3 измењених фајлова са 51 додато и 14 уклоњено
  1. +29
    -12
      R/run_regex.R
  2. +9
    -1
      tests/testthat/test-regex.R
  3. +13
    -1
      tests/testthat/test-wrap_result.R

+ 29
- 12
R/run_regex.R Прегледај датотеку

for (i in seq_along(m2)) { for (i in seq_along(m2)) {
if (is.null(m2[[i]]$idx[[1]])) next if (is.null(m2[[i]]$idx[[1]])) next
m2[[i]]$idx[, c(1, 2)] <- m2[[i]]$idx[, c(1, 2)] + mmi[i] - 1L m2[[i]]$idx[, c(1, 2)] <- m2[[i]]$idx[, c(1, 2)] + mmi[i] - 1L
m2[[i]]$idx$pass <- m2[[i]]$idx$pass + 1L
m[[i]]$idx <- rbind(m[[i]]$idx, m2[[i]]$idx) m[[i]]$idx <- rbind(m[[i]]$idx, m2[[i]]$idx)
} }
} }
x$start <- ifelse(x$start == 0L, NA_integer_, x$start) x$start <- ifelse(x$start == 0L, NA_integer_, x$start)
x$end <- ifelse(x$end == 0L, NA_integer_, x$end) x$end <- ifelse(x$end == 0L, NA_integer_, x$end)
x$group <- 1:nrow(x) - 1L x$group <- 1:nrow(x) - 1L
x$pass <- 1L
x x
} }


class = ifelse(.data$pad > 0, sprintf("%s pad%02d", .data$class, .data$pad), .data$class), class = ifelse(.data$pad > 0, sprintf("%s pad%02d", .data$class, .data$pad), .data$class),
insert = ifelse(.data$type == 'start', sprintf('<span class="%s">', .data$class), "</span>") insert = ifelse(.data$type == 'start', sprintf('<span class="%s">', .data$class), "</span>")
) )
inserts_g0 <- filter(inserts, class == "group g00")
inserts_other <- filter(inserts, class != "group g00")
inserts <- dplyr::bind_rows(
filter(inserts_g0, type == "start"),
inserts_other,
filter(inserts_g0, type == "end")
) %>%
mutate(type = sprintf("%05d%s", 1:nrow(.), type)) %>%
group_by(.data$loc, .data$type) %>%
summarize(insert = paste(.data$insert, collapse = '')) %>%
dplyr::ungroup() %>%
mutate(type = sub("^\\d{5}", "", type))
inserts <- if (max(inserts$pass) == 1) {
collapse_span_inserts(inserts)
} else {
inserts %>%
tidyr::nest(-pass) %>%
mutate(data = purrr::map(data, collapse_span_inserts)) %>%
tidyr::unnest() %>%
group_by(loc, type) %>%
summarize(insert = paste(insert, collapse = "")) %>%
dplyr::ungroup()
}


# inserts now gives html (span open and close) to insert and loc # inserts now gives html (span open and close) to insert and loc
# first split text at inserts$loc locations, # first split text at inserts$loc locations,
paste(out, collapse = '') paste(out, collapse = '')
} }


collapse_span_inserts <- function(inserts) {
inserts_g0 <- filter(inserts, class == "group g00")
inserts_other <- filter(inserts, class != "group g00")
dplyr::bind_rows(
filter(inserts_g0, type == "start"),
inserts_other,
filter(inserts_g0, type == "end")
) %>%
mutate(type = sprintf("%05d%s", 1:nrow(.), type)) %>%
group_by(.data$loc, .data$type) %>%
summarize(insert = paste(.data$insert, collapse = '')) %>%
dplyr::ungroup() %>%
mutate(type = sub("^\\d{5}", "", type))
}

#' Wraps capture groups in regex pattern in span tags to colorize with CSS #' Wraps capture groups in regex pattern in span tags to colorize with CSS
#' #'
#' @inheritParams view_regex #' @inheritParams view_regex

+ 9
- 1
tests/testthat/test-regex.R Прегледај датотеку

idx <- data.frame( idx <- data.frame(
start = c(1L, 1L, 2L, NA_integer_), start = c(1L, 1L, 2L, NA_integer_),
end = c(3L, 2L, 3L, NA_integer_), end = c(3L, 2L, 3L, NA_integer_),
group = c(0L, 1L, 2L, 3L)
group = c(0L, 1L, 2L, 3L),
pass = rep(1L, 4)
) )
expect_equal(expand_matches(m[[1]]), idx) expect_equal(expand_matches(m[[1]]), idx)
}) })
m <- run_regex(c("abcaba", "aba", "z"), c("(a)(b)(d)?c?"), global = FALSE) m <- run_regex(c("abcaba", "aba", "z"), c("(a)(b)(d)?c?"), global = FALSE)
expect_equal(max_match_index(m), c(4, 3, NA_integer_)) expect_equal(max_match_index(m), c(4, 3, NA_integer_))
}) })

test_that("results group (pass) is calculated correctly", {
text <- "ab ab"
pattern <- "(a)(b)"
m <- run_regex(text, pattern, global = TRUE)
expect_equal(unique(m[[1]]$idx$pass), c(1L, 2L))
})

+ 13
- 1
tests/testthat/test-wrap_result.R Прегледај датотеку

expect_equal(res, "<span class=\"group g00\"><span class=\"group g01\">728</span><span class=\"group g02\">229</span><span class=\"group g03\">8386</span></span>") expect_equal(res, "<span class=\"group g00\"><span class=\"group g01\">728</span><span class=\"group g02\">229</span><span class=\"group g03\">8386</span></span>")
}) })


test_that("wrap_regex searches globally", {
test_that("wrap_result searches globally", {
text <- "ab ab" text <- "ab ab"
pattern <- "(a)(b)" pattern <- "(a)(b)"
result <- paste(rep("<span class=\"group g00\"><span class=\"group g01\">a</span><span class=\"group g02\">b</span></span>", 2), collapse = " ") result <- paste(rep("<span class=\"group g00\"><span class=\"group g01\">a</span><span class=\"group g02\">b</span></span>", 2), collapse = " ")
expect_equal(wrap_result(run_regex(text, pattern, global = TRUE)[[1]]), result) expect_equal(wrap_result(run_regex(text, pattern, global = TRUE)[[1]]), result)
}) })

test_that("wrap_result starts/ends correctly with touching groups", {
text <- "The big red apple fell to the ground."
pattern <- "(\\w+) (\\w+) "
result <- paste0(
'<span class=\"group g00\"><span class=\"group g01\">The</span> <span class=\"group g02\">big</span> </span>',
'<span class=\"group g00\"><span class=\"group g01\">red</span> <span class=\"group g02\">apple</span> </span>',
'<span class=\"group g00\"><span class=\"group g01\">fell</span> <span class=\"group g02\">to</span> </span>',
'the ground.'
)
expect_equal(wrap_result(run_regex(text, pattern, global = TRUE)[[1]]), result)
})

Loading…
Откажи
Сачувај