You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
1.7KB

  1. out_expenses_payee <- function(
  2. path_data_prep_expenditures,
  3. path_out_report_list
  4. ) {
  5. lg_info_target(lg_get_logger())
  6. out <- path("data-out", "expenses_payee", "expenses_payee.parquet")
  7. dir_create(path_dir(out))
  8. report_list <- out_open_dataset_db(path_out_report_list)
  9. expenses <-
  10. prep_open_dataset_db(path_data_prep_expenditures) |>
  11. semi_join(report_list, by = "report_id")
  12. expenses_payee <-
  13. expenses |>
  14. distinct(
  15. org_name,
  16. is_org,
  17. is_us,
  18. profession,
  19. employers_name,
  20. street_1,
  21. street_2,
  22. city,
  23. state,
  24. full_zip,
  25. country_name
  26. ) |>
  27. add_address_lookup(postal_code = full_zip, name = "address_lookup") |>
  28. mutate(payee_id = row_number(), .before = 1)
  29. arrow::write_parquet(collect(expenses_payee), out)
  30. dirname(out)
  31. }
  32. out_expenses <- function(
  33. path_data_prep_expenditures,
  34. path_out_expenses_payee,
  35. path_out_report_list
  36. ) {
  37. lg_info_target(lg_get_logger())
  38. out <- path("data-out", "expenses", "expenses.parquet")
  39. dir_create(path_dir(out))
  40. report_list <- out_open_dataset_db(path_out_report_list)
  41. expenses_payee <-
  42. out_open_dataset_db(path_out_expenses_payee) |>
  43. select(-address_lookup)
  44. expenses <-
  45. prep_open_dataset_db(path_data_prep_expenditures) |>
  46. semi_join(report_list, by = "report_id") |>
  47. select(-name_sort)
  48. # Replace payee info with payee_id
  49. cols_payee_common <- intersect(colnames(expenses_payee), colnames(expenses))
  50. expenses <-
  51. expenses |>
  52. left_join(expenses_payee, by = cols_payee_common) |>
  53. select(-one_of(cols_payee_common)) |>
  54. relocate(payee_id, .after = report_id) |>
  55. collect()
  56. arrow::write_parquet(expenses, out)
  57. dirname(out)
  58. }