Explorar el Código

Fix unicode unescaping for e.g. \\U0001F575

tags/v0.2.0
Garrick Aden-Buie hace 8 años
padre
commit
09188b347d
Se han modificado 2 ficheros con 2 adiciones y 1 borrados
  1. +1
    -1
      R/regex_gadget.R
  2. +1
    -0
      tests/testthat/test-sanitize_text_input.R

+ 1
- 1
R/regex_gadget.R Ver fichero



sanitize_text_input <- function(x) { sanitize_text_input <- function(x) {
if (is.null(x) || !nchar(x)) return(x) if (is.null(x) || !nchar(x)) return(x)
rx_unicode <- "\\u[0-9a-f]{4,8}"
rx_unicode <- "\\\\u[0-9a-f]{4,8}"
rx_hex <- "\\\\x[0-9a-f]{2}|\\\\x\\{[0-9a-f]{1,6}\\}" rx_hex <- "\\\\x[0-9a-f]{2}|\\\\x\\{[0-9a-f]{1,6}\\}"
rx_octal <- "\\\\[0][0-7]{1,3}" rx_octal <- "\\\\[0][0-7]{1,3}"
rx_escape <- paste(rx_unicode, rx_hex, rx_octal, sep = "|") rx_escape <- paste(rx_unicode, rx_hex, rx_octal, sep = "|")

+ 1
- 0
tests/testthat/test-sanitize_text_input.R Ver fichero

# rx_unicode <- "\\u[0-9a-f]{4,8}" # rx_unicode <- "\\u[0-9a-f]{4,8}"
expect_equal(sanitize_text_input("\\u2019"), "\u2019") expect_equal(sanitize_text_input("\\u2019"), "\u2019")
expect_equal(sanitize_text_input("\\u000D"), "\r") expect_equal(sanitize_text_input("\\u000D"), "\r")
expect_equal(sanitize_text_input("\\U0001F575"), "\U001F575")
}) })


test_that("sanitizes hex", { test_that("sanitizes hex", {

Cargando…
Cancelar
Guardar