|
- prep_candidate_listing <- function(
- path_candidate_listing_raw
- ) {
-
- candidate_listing_contest <-
- prep_open_dataset_db(fs::path_dir(path_candidate_listing_raw)) |>
- filter(name_on_ballot != "No Preference") |>
- tidyr::replace_na(list(
- first_name = "",
- middle_name = "",
- last_name = "",
- name_suffix_lbl = ""
- ))
-
- # candidate names ----
- # Extract candidate names, these will be primary keys for the candidates table
- candidate_names <-
- candidate_listing_contest |>
- dbplyr::window_order(last_name, first_name, middle_name) |>
- distinct(first_name, middle_name, last_name, name_suffix_lbl) |>
- mutate(candidate_id = row_number(), .before = 1)
-
- # candidate_name_on_ballot ----
- candidate_name_on_ballot <-
- extract_candidate_info(
- candidate_listing_contest,
- candidate_names,
- info_vars = c("name_on_ballot")
- ) |>
- collect()
-
- # candidate_address ----
- candidate_address <-
- candidate_listing_contest |>
- mutate(
- phone = coalesce(phone, office_phone, business_phone),
- street_address = toupper(street_address),
- street_address = REGEXP_REPLACE(street_address, " +", " ", "g"),
- street_address = trimws(street_address),
- ) |>
- select(-office_phone, -business_phone) |>
- extract_candidate_info(
- candidate_names,
- info_vars = c("street_address", "city", "state", "zip_code", "phone", "email")
- ) |>
- collect() |>
- rename(street = street_address)
-
- # candidate_party ----
- candidate_party <-
- candidate_listing_contest |>
- extract_candidate_info(
- candidate_names,
- info_vars = c("party_candidate")
- ) |>
- collect() |>
- mutate(party_candidate = forcats::fct_inorder(party_candidate))
-
- # Extract contests (remaining data in candidate_listing) ----
- cols_candidate_id <- intersect(colnames(candidate_names), colnames(candidate_listing_contest))
- cols_related <- setdiff(
- c(colnames(candidate_name_on_ballot), colnames(candidate_address), colnames(candidate_party)),
- "election_dt"
- )
-
- contests <-
- candidate_listing_contest |>
- select(election_dt:name_suffix_lbl) |>
- select(-any_of(cols_related)) |>
- left_join(candidate_names, by = cols_candidate_id) |>
- relocate(candidate_id, .before = first_name) |>
- collect()
-
- # Get current complete contact information ----
- candidate_contact_current <-
- candidate_listing_current_contact_info(candidate_address)
-
- # Join candidates into one big table ----
- candidates <-
- candidate_names |>
- collect() |>
- left_join(
- candidate_name_on_ballot |>
- slice_max(election_dt, by = candidate_id, n = 1) |>
- select(-election_dt),
- by = "candidate_id",
- relationship = "one-to-one"
- ) |>
- left_join(
- candidate_contact_current,
- by = "candidate_id",
- relationship = "one-to-one"
- ) |>
- left_join(
- candidate_party |>
- filter(!is.na(party_candidate)) |>
- group_by(candidate_id) |>
- slice_max(election_dt, n = 1) |>
- arrange(party_candidate) |>
- slice_head(n = 1) |>
- ungroup() |>
- select(-election_dt) |>
- rename(party_last = party_candidate),
- by = "candidate_id",
- relationship = "one-to-one"
- ) |>
- left_join(
- candidate_party |>
- filter(!is.na(party_candidate)) |>
- group_by(candidate_id) |>
- count(party_candidate) |>
- slice_max(n, n = 1) |>
- arrange(party_candidate) |>
- slice_head(n = 1) |>
- ungroup() |>
- select(-n) |>
- rename(party_most = party_candidate),
- by = "candidate_id",
- relationship = "one-to-one"
- ) |>
- left_join(
- contests |>
- group_by(candidate_id) |>
- distinct(candidate_id, election_dt, contest_name) |>
- summarize(
- contest_n = n(),
- contest_first = min(election_dt),
- contest_latest = max(election_dt)
- ),
- by = "candidate_id",
- relationship = "one-to-one"
- ) |>
- relocate(name_on_ballot, .before = first_name) |>
- relocate(starts_with("party"), .before = street) |>
- relocate(starts_with("contest"), .before = street) |>
- add_address_lookup(street = street, postal_code = zip_code)
-
- # Return list of tables
- list(
- elections = contests,
- candidates = candidates,
- candidate_name_on_ballot = candidate_name_on_ballot,
- candidate_contact = candidate_address,
- candidate_party = candidate_party
- )
- }
-
- extract_candidate_info <- function(
- candidate_listing_contest,
- candidate_names,
- info_vars
- ) {
- candidate_listing_contest |>
- select(first_name:name_suffix_lbl, election_dt, all_of(info_vars)) |>
- distinct() |>
- left_join(
- candidate_names,
- by = c("first_name", "middle_name", "last_name", "name_suffix_lbl")
- ) |>
- select(-first_name, -middle_name, -last_name, -name_suffix_lbl) |>
- relocate(candidate_id, .before = 1) |>
- distinct() |>
- arrange(candidate_id, election_dt, !!info_vars)
- }
-
- candidate_listing_current_contact_info <- function(candidate_address) {
- candidate_phone_current <-
- candidate_address |>
- filter(!is.na(phone)) |>
- slice_max(election_dt, by = candidate_id, n = 1, with_ties = FALSE) |>
- select(candidate_id, phone)
-
- candidate_email_current <-
- candidate_address |>
- filter(!is.na(email)) |>
- slice_max(election_dt, by = candidate_id, n = 1, with_ties = FALSE) |>
- select(candidate_id, email)
-
- candidate_address_current <-
- candidate_address |>
- group_by(candidate_id) |>
- slice_max(election_dt, n = 1) |>
- select(-election_dt) |>
- filter(
- n_distinct(street) == 1 | !grepl("PO BOX", street)
- ) |>
- slice_head(n = 1) |>
- select(candidate_id, street:zip_code) |>
- ungroup()
-
- candidate_address_current |>
- left_join(candidate_phone_current, by = "candidate_id", relationship = "one-to-one") |>
- left_join(candidate_email_current, by = "candidate_id", relationship = "one-to-one")
- }
|