Selaa lähdekoodia

replace missing values with "" in receipts

main
Garrick Aden-Buie 2 vuotta sitten
vanhempi
commit
be5f430ffe
No known key found for this signature in database
2 muutettua tiedostoa jossa 23 lisäystä ja 5 poistoa
  1. +19
    -2
      process/R/out_receipts.R
  2. +4
    -3
      process/R/prepare_candidates.R

+ 19
- 2
process/R/out_receipts.R Näytä tiedosto

@@ -15,6 +15,7 @@ out_receipts_payer <- function(

receipts_payer <-
receipts |>
receipts_replace_na() |>
distinct(
org_name,
is_org,
@@ -35,6 +36,20 @@ out_receipts_payer <- function(
dirname(out)
}

receipts_replace_na <- function(receipts) {
receipts |>
tidyr::replace_na(list(
org_name = "",
profession = "",
employers_name = "",
street_1 = "",
city = "",
state = "",
full_zip = "",
country_name = ""
))
}

out_receipts <- function(
path_data_prep_receipts,
path_out_receipts_payer,
@@ -59,14 +74,16 @@ out_receipts <- function(

receipts <-
receipts |>
receipts_replace_na() |>
left_join(receipts_payer, by = cols_payer_common) |>
select(-any_of(cols_payer_common)) |>
select(-any_of(cols_payer_common), -name_sort) |>
mutate(
is_donation = receipt_type_code %in% c("IND", "CPCM", "GEN", "PPTY", "OUTS", "NFPC"),
.after = receipt_type_code
) |>
relocate(payer_id, .after = report_id) |>
collect()
collect() |>
mutate(across(occur_date, lubridate::mdy))

arrow::write_parquet(receipts, out)


+ 4
- 3
process/R/prepare_candidates.R Näytä tiedosto

@@ -113,7 +113,8 @@ fastlink_candidates <- function(candidates_for_matching, candidate_listing_for_m
varnames = c("name_clean", "street", "city"),
stringdist.match = c("name_clean", "street"),
partial.match = c("name_clean", "street"),
stringdist.method = "dl"
stringdist.method = "dl",
threshold.match = 0.9
)
}

@@ -128,12 +129,12 @@ fastlink_match_candidates <- function(
candidates_for_matching,
candidate_listing_for_matching,
candidates_linked,
threshold.match = 0.8
threshold.match = 0.9
)

matches <- as_tibble(matches)

select(matches, sboe_id, candidate_id)
distinct(matches, sboe_id, candidate_id)
}

candidates_match <- function(

Loading…
Peruuta
Tallenna