|
- out_expenses_payee <- function(
- path_data_prep_expenditures,
- path_out_report_list
- ) {
- lg_info_target(lg_get_logger())
-
- out <- path("data-out", "expenses_payee", "expenses_payee.parquet")
- dir_create(path_dir(out))
-
- report_list <- out_open_dataset_db(path_out_report_list)
-
- expenses <-
- prep_open_dataset_db(path_data_prep_expenditures) |>
- semi_join(report_list, by = "report_id")
-
- expenses_payee <-
- expenses |>
- distinct(
- org_name,
- is_org,
- is_us,
- profession,
- employers_name,
- street_1,
- street_2,
- city,
- state,
- full_zip,
- country_name
- ) |>
- add_address_lookup(postal_code = full_zip, name = "address_lookup") |>
- mutate(payee_id = row_number(), .before = 1)
-
- arrow::write_parquet(collect(expenses_payee), out)
-
- dirname(out)
- }
-
- out_expenses <- function(
- path_data_prep_expenditures,
- path_out_expenses_payee,
- path_out_report_list
- ) {
- lg_info_target(lg_get_logger())
-
- out <- path("data-out", "expenses", "expenses.parquet")
- dir_create(path_dir(out))
-
- report_list <- out_open_dataset_db(path_out_report_list)
- expenses_payee <-
- out_open_dataset_db(path_out_expenses_payee) |>
- select(-address_lookup)
-
- expenses <-
- prep_open_dataset_db(path_data_prep_expenditures) |>
- semi_join(report_list, by = "report_id") |>
- select(-name_sort)
-
- # Replace payee info with payee_id
- cols_payee_common <- intersect(colnames(expenses_payee), colnames(expenses))
-
- expenses <-
- expenses |>
- left_join(expenses_payee, by = cols_payee_common) |>
- select(-one_of(cols_payee_common)) |>
- relocate(payee_id, .after = report_id) |>
- collect()
-
- arrow::write_parquet(expenses, out)
-
- dirname(out)
- }
|