|
- process_receipts_csv <- function(dir_sboe_id, report_list = tar_read(report_list)) {
- # Read the files in the directory, extract report_id from the path
- # Compare to report_list to determine which reports go into the data
-
- files <- dir_ls(dir_sboe_id)
- info <- report_path_info(files)
- info$path <- files
-
- # These are the reports we want to keep in the data
- info <- semi_join(info, report_list, by = c("sboe_id", "report_id"))
-
- receipts <-
- info |>
- pmap(function(sboe_id, report_id, path, ...) {
- if (file_size(path) < 1) {
- return(NULL)
- }
- read_receipts_csv(path, sboe_id, report_id)
- }) |>
- list_rbind()
-
- names(receipts) <- snakecase::to_snake_case(names(receipts), parsing_option = 3)
-
- receipts
- }
-
- write_receipts_parquet <- function(dir_sboe_id, report_list = tar_read(report_list)) {
- receipts <- process_receipts_csv(dir_sboe_id, report_list)
-
- info <- report_path_info(dir_sboe_id)
-
- data_dir <- here::here("..", "data", "receipts", sprintf("sboe_id=%s", info$sboe_id))
- data_path <- path(data_dir, "part-0.parquet")
- dir_create(data_dir)
-
- arrow::write_parquet(receipts, data_path)
- data_path
- }
-
- read_receipts_csv <- function(path, sboe_id = NULL, report_id = NULL) {
- if (is.null(sboe_id) || is.null(report_id)) {
- info <- report_path_info(path)
- sboe_id <- info$sboe_id
- report_id <- info$report_id
- }
-
- x <- read_csv(
- path,
- col_types = cols(
- .default = col_character(),
- GroupID = col_integer(),
- IsOrg = col_logical(),
- IsUS = col_logical(),
- Amount = col_double(),
- SumToDate = col_double(),
- IsAggregated = col_logical(),
- IsPrior = col_character()
- )
- )
-
- record_problems(x, label = "receipts")
-
- x |>
- mutate(IsPrior = IsPrior == "X") |>
- mutate(sboe_id = sboe_id, report_id = report_id, .before = 0)
- }
|