Garrick Aden-Buie пре 2 година
родитељ
комит
1775eb0526
No known key found for this signature in database
1 измењених фајлова са 49 додато и 64 уклоњено
  1. +49
    -64
      process/R/out_candidate_listing.R

+ 49
- 64
process/R/out_candidate_listing.R Прегледај датотеку

@@ -1,49 +1,3 @@
fastlink_candidate_listing <- function(candidate_listing_raw) {
data <-
candidate_listing_raw |>
distinct(
name_on_ballot,
first_name,
middle_name,
last_name,
name_suffix_lbl,
street_address,
city
) |>
mutate(
name_on_ballot_clean = sub(" \\(.+?\\)\\s?", "", name_on_ballot)
)

linked <- fastLink::fastLink(
data,
data,
varnames = c(
"name_on_ballot_clean",
"first_name", "middle_name", "last_name", "name_suffix_lbl",
"street_address", "city"
),
stringdist.match = c("middle_name"),
stringdist.method = "jw",
jw.weight = .25,
threshold.match = 0.98
)

matches <-
fastLink::getMatches(
data,
data,
linked,
threshold.match = 0.9
) |>
as_tibble()

list(
data = data,
linked = linked,
matches = matches
)
}

prep_candidates_dedupe_mapping <- function(
candidate_listing_raw,
candidate_listing_dedupe
@@ -305,24 +259,6 @@ prep_candidate_listing <- function(
)
}

extract_candidate_info <- function(
candidate_listing_contest,
candidate_names,
info_vars
) {
candidate_listing_contest |>
select(first_name:name_suffix_lbl, election_dt, all_of(info_vars)) |>
distinct() |>
left_join(
candidate_names,
by = c("first_name", "middle_name", "last_name", "name_suffix_lbl")
) |>
select(-first_name, -middle_name, -last_name, -name_suffix_lbl) |>
relocate(candidate_id, .before = 1) |>
distinct() |>
arrange(candidate_id, election_dt, !!!rlang::syms(info_vars))
}

candidate_listing_current_contact_info <- function(candidate_address) {
candidate_phone_current <-
candidate_address |>
@@ -352,3 +288,52 @@ candidate_listing_current_contact_info <- function(candidate_address) {
left_join(candidate_phone_current, by = "candidate_id", relationship = "one-to-one") |>
left_join(candidate_email_current, by = "candidate_id", relationship = "one-to-one")
}

# This function isn't used anymore -- I opted for a more manual approach of
# using overlapping signals. But this general idea could work in other places,
# e.g. for deduping donors.
fastlink_candidate_listing <- function(candidate_listing_raw) {
data <-
candidate_listing_raw |>
distinct(
name_on_ballot,
first_name,
middle_name,
last_name,
name_suffix_lbl,
street_address,
city
) |>
mutate(
name_on_ballot_clean = sub(" \\(.+?\\)\\s?", "", name_on_ballot)
)

linked <- fastLink::fastLink(
data,
data,
varnames = c(
"name_on_ballot_clean",
"first_name", "middle_name", "last_name", "name_suffix_lbl",
"street_address", "city"
),
stringdist.match = c("middle_name"),
stringdist.method = "jw",
jw.weight = .25,
threshold.match = 0.98
)

matches <-
fastLink::getMatches(
data,
data,
linked,
threshold.match = 0.9
) |>
as_tibble()

list(
data = data,
linked = linked,
matches = matches
)
}

Loading…
Откажи
Сачувај