| .globals <- new.env(parent = emptyenv()) | |||||
| duckdb_global_con <- function() { | |||||
| if (is.null(.globals$duckdb_con)) { | |||||
| .globals$duckdb_con <- con <- DBI::dbConnect(duckdb::duckdb()) | |||||
| } | |||||
| .globals$duckdb_con | |||||
| } | |||||
| cf_prep_db_create <- function(data_dir = here::here("data-prep")) { | cf_prep_db_create <- function(data_dir = here::here("data-prep")) { | ||||
| tables <- dir_ls(data_dir) | tables <- dir_ls(data_dir) | ||||
| names(tables) <- path_file(tables) | names(tables) <- path_file(tables) | ||||
| tbls_arrow$report_list <- arrow::open_dataset(tables["report_list"]) | tbls_arrow$report_list <- arrow::open_dataset(tables["report_list"]) | ||||
| cli::cli_progress_done() | cli::cli_progress_done() | ||||
| con <- DBI::dbConnect(duckdb::duckdb()) | |||||
| con <- duckdb_global_con() | |||||
| cli::cli_progress_step("Creating db") | cli::cli_progress_step("Creating db") | ||||
| prep_open_dataset <- function(path_prep, partitioning = "sboe_id", ...) { | |||||
| if (!fs::file_exists(path_prep)) { | |||||
| path_here <- here::here("data-prep/", path_prep) | |||||
| path_up <- fs::path("..", "data-prep", path_prep) | |||||
| if (fs::file_exists(path_here)) { | |||||
| path_prep <- path_here | |||||
| } else if (fs::file_exists(path_up)) { | |||||
| path_prep <- path_up | |||||
| } else { | |||||
| stop("File not found: ", path_prep) | |||||
| } | |||||
| } | |||||
| if (length(fs::dir_ls(path_prep, type = "dir")) == 0) { | |||||
| partitioning <- NULL | |||||
| } | |||||
| arrow::open_dataset(path_prep, partitioning = partitioning, ...) | |||||
| } | |||||
| prep_open_dataset_db <- function(table, ..., path_prep = table) { | |||||
| pq <- prep_open_dataset(path_prep, ...) | |||||
| con <- duckdb_global_con() | |||||
| duckdb::duckdb_register_arrow(con, table, pq) | |||||
| dplyr::tbl(con, table) | |||||
| } |
| prepare_candidates <- function(path_officers = "../data-prep/officers", report_list = tar_read(report_list)) { | |||||
| officers_pq <- prep_open_dataset(path_officers) | |||||
| officers <- officers_pq |> filter(type == "Candidate") |> collect() | |||||
| officers |> | |||||
| filter(!is.na(name)) |> | |||||
| mutate( | |||||
| name_display = name, | |||||
| name = toupper(name) | |||||
| ) |> | |||||
| distinct(sboe_id, name, .keep_all = TRUE) |> | |||||
| semi_join(report_list, by = "report_id") | |||||
| } |