|
- out_receipts_payer <- function(
- path_data_prep_receipts,
- path_out_report_list
- ) {
- lg_info_target(lg_get_logger())
-
- out <- path("data-out", "receipts_payer", "receipts_payer.parquet")
- dir_create(path_dir(out))
-
- report_list <- out_open_dataset_db(path_out_report_list)
-
- receipts <-
- prep_open_dataset_db(path_data_prep_receipts) |>
- semi_join(report_list, by = "report_id")
-
- receipts_payer <-
- receipts |>
- distinct(
- org_name,
- is_org,
- is_us,
- profession,
- employers_name,
- street_1,
- city,
- state,
- full_zip,
- country_name
- ) |>
- add_address_lookup(postal_code = full_zip, name = "address_lookup") |>
- mutate(payer_id = row_number(), .before = 1)
-
- arrow::write_parquet(collect(receipts_payer), out)
-
- dirname(out)
- }
-
- out_receipts <- function(
- path_data_prep_receipts,
- path_out_receipts_payer,
- path_out_report_list
- ) {
- lg_info_target(lg_get_logger())
-
- out <- path("data-out", "receipts", "receipts.parquet")
- dir_create(path_dir(out))
-
- report_list <- out_open_dataset_db(path_out_report_list)
- receipts_payer <-
- out_open_dataset_db(path_out_receipts_payer) |>
- select(-address_lookup)
-
- receipts <-
- prep_open_dataset_db(path_data_prep_receipts) |>
- semi_join(report_list, by = "report_id")
-
- # Replace payer info with payer_id
- cols_payer_common <- intersect(colnames(receipts_payer), colnames(receipts))
-
- receipts <-
- receipts |>
- left_join(receipts_payer, by = cols_payer_common) |>
- select(-one_of(cols_payer_common)) |>
- relocate(payer_id, .after = report_id) |>
- collect()
-
- arrow::write_parquet(receipts, out)
-
- dirname(out)
- }
|