No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

67 líneas
1.8KB

  1. process_receipts_csv <- function(dir_sboe_id, report_list = tar_read(report_list)) {
  2. # Read the files in the directory, extract report_id from the path
  3. # Compare to report_list to determine which reports go into the data
  4. files <- dir_ls(dir_sboe_id)
  5. info <- report_path_info(files)
  6. info$path <- files
  7. # These are the reports we want to keep in the data
  8. info <- semi_join(info, report_list, by = c("sboe_id", "report_id"))
  9. receipts <-
  10. info |>
  11. pmap(function(sboe_id, report_id, path, ...) {
  12. if (file_size(path) < 1) {
  13. return(NULL)
  14. }
  15. read_receipts_csv(path, sboe_id, report_id)
  16. }) |>
  17. list_rbind()
  18. names(receipts) <- snakecase::to_snake_case(names(receipts), parsing_option = 3)
  19. receipts
  20. }
  21. write_receipts_parquet <- function(dir_sboe_id, report_list = tar_read(report_list)) {
  22. receipts <- process_receipts_csv(dir_sboe_id, report_list)
  23. info <- report_path_info(dir_sboe_id)
  24. data_dir <- here::here("..", "data", "receipts", sprintf("sboe_id=%s", info$sboe_id))
  25. data_path <- path(data_dir, "part-0.parquet")
  26. dir_create(data_dir)
  27. arrow::write_parquet(receipts, data_path)
  28. data_path
  29. }
  30. read_receipts_csv <- function(path, sboe_id = NULL, report_id = NULL) {
  31. if (is.null(sboe_id) || is.null(report_id)) {
  32. info <- report_path_info(path)
  33. sboe_id <- info$sboe_id
  34. report_id <- info$report_id
  35. }
  36. x <- read_csv(
  37. path,
  38. col_types = cols(
  39. .default = col_character(),
  40. GroupID = col_integer(),
  41. IsOrg = col_logical(),
  42. IsUS = col_logical(),
  43. Amount = col_double(),
  44. SumToDate = col_double(),
  45. IsAggregated = col_logical(),
  46. IsPrior = col_character()
  47. )
  48. )
  49. record_problems(x, label = "receipts")
  50. x |>
  51. mutate(IsPrior = IsPrior == "X") |>
  52. mutate(sboe_id = sboe_id, report_id = report_id, .before = 0)
  53. }