| @@ -1,49 +1,3 @@ | |||
| fastlink_candidate_listing <- function(candidate_listing_raw) { | |||
| data <- | |||
| candidate_listing_raw |> | |||
| distinct( | |||
| name_on_ballot, | |||
| first_name, | |||
| middle_name, | |||
| last_name, | |||
| name_suffix_lbl, | |||
| street_address, | |||
| city | |||
| ) |> | |||
| mutate( | |||
| name_on_ballot_clean = sub(" \\(.+?\\)\\s?", "", name_on_ballot) | |||
| ) | |||
| linked <- fastLink::fastLink( | |||
| data, | |||
| data, | |||
| varnames = c( | |||
| "name_on_ballot_clean", | |||
| "first_name", "middle_name", "last_name", "name_suffix_lbl", | |||
| "street_address", "city" | |||
| ), | |||
| stringdist.match = c("middle_name"), | |||
| stringdist.method = "jw", | |||
| jw.weight = .25, | |||
| threshold.match = 0.98 | |||
| ) | |||
| matches <- | |||
| fastLink::getMatches( | |||
| data, | |||
| data, | |||
| linked, | |||
| threshold.match = 0.9 | |||
| ) |> | |||
| as_tibble() | |||
| list( | |||
| data = data, | |||
| linked = linked, | |||
| matches = matches | |||
| ) | |||
| } | |||
| prep_candidates_dedupe_mapping <- function( | |||
| candidate_listing_raw, | |||
| candidate_listing_dedupe | |||
| @@ -305,24 +259,6 @@ prep_candidate_listing <- function( | |||
| ) | |||
| } | |||
| extract_candidate_info <- function( | |||
| candidate_listing_contest, | |||
| candidate_names, | |||
| info_vars | |||
| ) { | |||
| candidate_listing_contest |> | |||
| select(first_name:name_suffix_lbl, election_dt, all_of(info_vars)) |> | |||
| distinct() |> | |||
| left_join( | |||
| candidate_names, | |||
| by = c("first_name", "middle_name", "last_name", "name_suffix_lbl") | |||
| ) |> | |||
| select(-first_name, -middle_name, -last_name, -name_suffix_lbl) |> | |||
| relocate(candidate_id, .before = 1) |> | |||
| distinct() |> | |||
| arrange(candidate_id, election_dt, !!!rlang::syms(info_vars)) | |||
| } | |||
| candidate_listing_current_contact_info <- function(candidate_address) { | |||
| candidate_phone_current <- | |||
| candidate_address |> | |||
| @@ -352,3 +288,52 @@ candidate_listing_current_contact_info <- function(candidate_address) { | |||
| left_join(candidate_phone_current, by = "candidate_id", relationship = "one-to-one") |> | |||
| left_join(candidate_email_current, by = "candidate_id", relationship = "one-to-one") | |||
| } | |||
| # This function isn't used anymore -- I opted for a more manual approach of | |||
| # using overlapping signals. But this general idea could work in other places, | |||
| # e.g. for deduping donors. | |||
| fastlink_candidate_listing <- function(candidate_listing_raw) { | |||
| data <- | |||
| candidate_listing_raw |> | |||
| distinct( | |||
| name_on_ballot, | |||
| first_name, | |||
| middle_name, | |||
| last_name, | |||
| name_suffix_lbl, | |||
| street_address, | |||
| city | |||
| ) |> | |||
| mutate( | |||
| name_on_ballot_clean = sub(" \\(.+?\\)\\s?", "", name_on_ballot) | |||
| ) | |||
| linked <- fastLink::fastLink( | |||
| data, | |||
| data, | |||
| varnames = c( | |||
| "name_on_ballot_clean", | |||
| "first_name", "middle_name", "last_name", "name_suffix_lbl", | |||
| "street_address", "city" | |||
| ), | |||
| stringdist.match = c("middle_name"), | |||
| stringdist.method = "jw", | |||
| jw.weight = .25, | |||
| threshold.match = 0.98 | |||
| ) | |||
| matches <- | |||
| fastLink::getMatches( | |||
| data, | |||
| data, | |||
| linked, | |||
| threshold.match = 0.9 | |||
| ) |> | |||
| as_tibble() | |||
| list( | |||
| data = data, | |||
| linked = linked, | |||
| matches = matches | |||
| ) | |||
| } | |||