Преглед на файлове

need to know fixed sboe_id when writing out the parquet files

main
Garrick Aden-Buie преди 2 години
родител
ревизия
422727b68d
No known key found for this signature in database
променени са 2 файла, в които са добавени 28 реда и са изтрити 1 реда
  1. +10
    -0
      process/R/fix-sboe_id-missing.R
  2. +18
    -1
      process/R/read_report_file.R

+ 10
- 0
process/R/fix-sboe_id-missing.R Целия файл

fix_sboe_id_missing <- function(sboe_id, report_id) { fix_sboe_id_missing <- function(sboe_id, report_id) {
idx_missing <- which(sboe_id == "No Id") idx_missing <- which(sboe_id == "No Id")

if (!length(idx_missing)) { if (!length(idx_missing)) {
return(sboe_id) return(sboe_id)
} }


# can't fix `sboe_id` if we don't have a `report_id`
idx_has_report_id <- which(!is.na(report_id))

if (!length(idx_has_report_id)) {
return(sboe_id)
}

idx_missing <- union(idx_missing, idx_has_report_id)

sboe_id[idx_missing] <- paste0("NOID-", report_id[idx_missing]) sboe_id[idx_missing] <- paste0("NOID-", report_id[idx_missing])
sboe_id sboe_id
} }

+ 18
- 1
process/R/read_report_file.R Целия файл

write_processed_report_export <- function(dir_sboe_id, report_list = tar_read(report_list)) { write_processed_report_export <- function(dir_sboe_id, report_list = tar_read(report_list)) {
reports <- process_report_export(dir_sboe_id, report_list) reports <- process_report_export(dir_sboe_id, report_list)


sboe_id <- report_path_info(dir_sboe_id)$sboe_id
info <- report_path_info(dir_sboe_id)


if (!any(info$sboe_id == "No Id") && length(unique(info$sboe_id)) == 1) {
return(write_processed_report_export_parquet(reports, unique(info$sboe_id)))
}

sboe_ids <- map_dfr(reports, select, sboe_id) |> pull(sboe_id) |> unique()

reports <-
sboe_ids |>
set_names() |>
map(\(id) map(reports, \(d) filter(d, sboe_id == id)))

map(sboe_ids, \(id) write_processed_report_export_parquet(reports[[id]], id)) |>
flatten_chr() |>
unname()
}

write_processed_report_export_parquet <- function(reports, sboe_id) {
base_dir <- here::here("..", "data") base_dir <- here::here("..", "data")
sboe_id_param <- sprintf("sboe_id=%s", sboe_id) sboe_id_param <- sprintf("sboe_id=%s", sboe_id)



Loading…
Отказ
Запис