prep_open_dataset <- function(path_prep, partitioning = "sboe_id", ...) { cf_open_dataset(path_prep, partitioning = partitioning, ..., dir = "data-prep") } out_open_dataset <- function(path_out, partitioning = "sboe_id", ...) { cf_open_dataset(path_out, partitioning = partitioning, ..., dir = "data-out") } cf_open_dataset <- function(path, partitioning = "sboe_id", ..., dir = "data-prep") { path <- resolve_path_up_2(path, dir = dir) if (length(fs::dir_ls(path, type = "dir")) == 0) { partitioning <- NULL } arrow::open_dataset(path, partitioning = partitioning, ...) } prep_open_dataset_db <- function(table, ..., path_prep = table) { cf_open_dataset_db(table, ..., path = path_prep, dir = "data-prep") } out_open_dataset_db <- function(table, ..., path_out = table) { cf_open_dataset_db(table, ..., path = path_out, dir = "data-out") } cf_open_dataset_db <- function(table, ..., path = table, dir = "data-prep") { pq <- cf_open_dataset(path, ..., dir = dir) table <- fs::path_file(table) con <- duckdb_global_con() duckdb::duckdb_register_arrow(con, table, pq) dplyr::tbl(con, table) } prep_open_address_db <- function( path_db = "address_lookup.sqlite" ) { path_db <- resolve_path_up_2(path_db) con <- if (!is.null(.globals$con_address)) { .globals$con_address } else { .globals$con_address <- DBI::dbConnect(RSQLite::SQLite(), path_db) } tbl(con, "resolved") } # Utils ---- resolve_path_up_2 <- function(path, dir = "data-prep") { if (fs::file_exists(path)) { return(path) } path_here <- here::here(dir, path) path_wd <- fs::path(dir, path) path_up <- fs::path("..", dir, path) path_up2 <- fs::path("..", "..", dir, path) if (fs::file_exists(path_wd)) return(path_wd) if (fs::file_exists(path_here)) return(path_here) if (fs::file_exists(path_up)) return(path_up) if (fs::file_exists(path_up2)) return(path_up2) stop("File not found: ", path) }