Преглед на файлове

move around unsued code

main
Garrick Aden-Buie преди 2 години
родител
ревизия
769eb5546d
No known key found for this signature in database
променени са 1 файла, в които са добавени 56 реда и са изтрити 54 реда
  1. +56
    -54
      process/R/out_candidate_listing.R

+ 56
- 54
process/R/out_candidate_listing.R Целия файл

prep_candidates_dedupe_mapping <- function(
candidate_listing_raw,
candidate_listing_dedupe
) {
# candidate names ----
# Extract candidate names, these will be primary keys for the candidates table
candidate_names <-
candidate_listing_raw |>
arrange(last_name, first_name, middle_name) |>
distinct(name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl) |>
mutate(candidate_id = row_number(), .before = 1)

# Find last election ----
candidates_last_contest <-
candidate_listing_raw |>
group_by(name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl) |>
slice_max(election_dt, n = 1) |>
distinct(contest_last = election_dt)

deduped_ids <-
candidate_listing_dedupe$matches |>
left_join(
candidate_names,
by = join_by(name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl)
) |>
distinct(dupe_id = dedupe.ids, candidate_id) |>
add_count(dupe_id) |>
filter(n > 1) |>
select(-n) |>
mutate(dupe_id = fct_infreq(paste(dupe_id))) |>
arrange(dupe_id, candidate_id) |>
group_split(dupe_id)

mapping <- candidate_names |> select(candidate_id) |> mutate(candidate_group = candidate_id)

for (dupes in deduped_ids) {

map_group <- left_join(dupes[-1], mapping, by = "candidate_id")
all_ids <- union(map_group$candidate_id, map_group$candidate_group)
map_others <- mapping |> filter(candidate_group %in% all_ids)
browser(expr = nrow(map_group) < nrow(map_others))

# recompute current grouping to min of all ids
update <-
dplyr::union(map_group, map_others) |>
mutate(candidate_group = min(candidate_id, candidate_group))

mapping <- rows_update(mapping, update, by = "candidate_id")
}

mapping |>
left_join(candidate_names, by = "candidate_id") |>
left_join(candidates_last_contest, by = join_by(name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl))
}


prep_dedupe_candidates <- function(candidate_listing_raw) { prep_dedupe_candidates <- function(candidate_listing_raw) {
candidate_names <- candidate_names <-
matches = matches matches = matches
) )
} }

# This also isn't used anymore...
prep_candidates_dedupe_mapping <- function(
candidate_listing_raw,
candidate_listing_dedupe
) {
# candidate names ----
# Extract candidate names, these will be primary keys for the candidates table
candidate_names <-
candidate_listing_raw |>
arrange(last_name, first_name, middle_name) |>
distinct(name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl) |>
mutate(candidate_id = row_number(), .before = 1)

# Find last election ----
candidates_last_contest <-
candidate_listing_raw |>
group_by(name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl) |>
slice_max(election_dt, n = 1) |>
distinct(contest_last = election_dt)

deduped_ids <-
candidate_listing_dedupe$matches |>
left_join(
candidate_names,
by = join_by(name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl)
) |>
distinct(dupe_id = dedupe.ids, candidate_id) |>
add_count(dupe_id) |>
filter(n > 1) |>
select(-n) |>
mutate(dupe_id = fct_infreq(paste(dupe_id))) |>
arrange(dupe_id, candidate_id) |>
group_split(dupe_id)

mapping <- candidate_names |> select(candidate_id) |> mutate(candidate_group = candidate_id)

for (dupes in deduped_ids) {

map_group <- left_join(dupes[-1], mapping, by = "candidate_id")
all_ids <- union(map_group$candidate_id, map_group$candidate_group)
map_others <- mapping |> filter(candidate_group %in% all_ids)
browser(expr = nrow(map_group) < nrow(map_others))

# recompute current grouping to min of all ids
update <-
dplyr::union(map_group, map_others) |>
mutate(candidate_group = min(candidate_id, candidate_group))

mapping <- rows_update(mapping, update, by = "candidate_id")
}

mapping |>
left_join(candidate_names, by = "candidate_id") |>
left_join(candidates_last_contest, by = join_by(name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl))
}

Loading…
Отказ
Запис