Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

71 lines
1.6KB

  1. out_receipts_payer <- function(
  2. path_data_prep_receipts,
  3. path_out_report_list
  4. ) {
  5. lg_info_target(lg_get_logger())
  6. out <- path("data-out", "receipts_payer", "receipts_payer.parquet")
  7. dir_create(path_dir(out))
  8. report_list <- out_open_dataset_db(path_out_report_list)
  9. receipts <-
  10. prep_open_dataset_db(path_data_prep_receipts) |>
  11. semi_join(report_list, by = "report_id")
  12. receipts_payer <-
  13. receipts |>
  14. distinct(
  15. org_name,
  16. is_org,
  17. is_us,
  18. profession,
  19. employers_name,
  20. street_1,
  21. city,
  22. state,
  23. full_zip,
  24. country_name
  25. ) |>
  26. add_address_lookup(postal_code = full_zip, name = "address_lookup") |>
  27. mutate(payer_id = row_number(), .before = 1)
  28. arrow::write_parquet(collect(receipts_payer), out)
  29. dirname(out)
  30. }
  31. out_receipts <- function(
  32. path_data_prep_receipts,
  33. path_out_receipts_payer,
  34. path_out_report_list
  35. ) {
  36. lg_info_target(lg_get_logger())
  37. out <- path("data-out", "receipts", "receipts.parquet")
  38. dir_create(path_dir(out))
  39. report_list <- out_open_dataset_db(path_out_report_list)
  40. receipts_payer <-
  41. out_open_dataset_db(path_out_receipts_payer) |>
  42. select(-address_lookup)
  43. receipts <-
  44. prep_open_dataset_db(path_data_prep_receipts) |>
  45. semi_join(report_list, by = "report_id")
  46. # Replace payer info with payer_id
  47. cols_payer_common <- intersect(colnames(receipts_payer), colnames(receipts))
  48. receipts <-
  49. receipts |>
  50. left_join(receipts_payer, by = cols_payer_common) |>
  51. select(-one_of(cols_payer_common)) |>
  52. relocate(payer_id, .after = report_id) |>
  53. collect()
  54. arrow::write_parquet(receipts, out)
  55. dirname(out)
  56. }