Selaa lähdekoodia

fill in missing candidate address with committee's address

main
Garrick Aden-Buie 2 vuotta sitten
vanhempi
commit
77cb0758bb
No known key found for this signature in database
4 muutettua tiedostoa jossa 43 lisäystä ja 32 poistoa
  1. +9
    -6
      process/R/out-cover-committees.R
  2. +9
    -1
      process/R/prepare_candidates.R
  3. +1
    -1
      process/_targets.R
  4. +24
    -24
      process/_targets/meta/meta

+ 9
- 6
process/R/out-cover-committees.R Näytä tiedosto

committee_name_address <- committee_name_address <-
cover |> cover |>
inner_join( inner_join(
report_list |> select(report_id, doc_order),
report_list |> select(report_id, year, doc_order),
by = "report_id" by = "report_id"
) |> ) |>
group_by(sboe_id) |> group_by(sboe_id) |>
slice_max(year, n = 1) |>
slice_max(doc_order, n = 1) |> slice_max(doc_order, n = 1) |>
slice_max(date_filed, n = 1, with_ties = FALSE) |>
ungroup() |> ungroup() |>
select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |> select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |>
add_address_lookup(postal_code = zip_code)
add_address_lookup(postal_code = zip_code) |>
collect()


committee_type <- committee_type <-
cover |> cover |>
group_by(sboe_id) |> group_by(sboe_id) |>
count(committee_type, fund_type, fund_name) |> count(committee_type, fund_type, fund_name) |>
slice_max(n, n = 1) |>
slice_max(n, n = 1, with_ties = FALSE) |>
ungroup() |> ungroup() |>
select(-n)
select(-n) |>
collect()


committees <- committees <-
committee_name_address |> committee_name_address |>
left_join(committee_type, by = "sboe_id") |>
collect()
left_join(committee_type, by = join_by(sboe_id), relationship = "one-to-one")


arrow::write_parquet(committees, out) arrow::write_parquet(committees, out)



+ 9
- 1
process/R/prepare_candidates.R Näytä tiedosto

prepare_candidates_for_matching <- function( prepare_candidates_for_matching <- function(
path_data_prep_officers, path_data_prep_officers,
path_out_report_list, path_out_report_list,
path_out_committees,
path_addresses = "data-out/addresses" path_addresses = "data-out/addresses"
) { ) {
lg_info_target(lg_get_logger()) lg_info_target(lg_get_logger())


report_list <- out_open_dataset_db(path_out_report_list) report_list <- out_open_dataset_db(path_out_report_list)
addresses <- out_open_dataset_db(path_addresses) addresses <- out_open_dataset_db(path_addresses)
committees <- out_open_dataset_db(path_out_committees)
candidates_db <- out_open_dataset_db(path_data_prep_officers)


candidates <- candidates <-
out_open_dataset_db(path_data_prep_officers) |>
candidates_db |>
semi_join(report_list, by = "report_id") |> semi_join(report_list, by = "report_id") |>
filter(type == "Candidate") |> filter(type == "Candidate") |>
rows_patch(
committees |> select(sboe_id, address = address_lookup),
by = "sboe_id",
unmatched = "ignore"
) |>
mutate( mutate(
name_clean = toupper(name), name_clean = toupper(name),
name_clean = REGEXP_REPLACE(name_clean, " FOR .+$", "", "g"), name_clean = REGEXP_REPLACE(name_clean, " FOR .+$", "", "g"),

+ 1
- 1
process/_targets.R Näytä tiedosto

# Candidates -------------------------------------------------------------- # Candidates --------------------------------------------------------------
tar_target( tar_target(
candidates_for_matching, candidates_for_matching,
prepare_candidates_for_matching(path_data_prep_officers, path_out_report_list)
prepare_candidates_for_matching(path_data_prep_officers, path_out_report_list, path_out_committees)
), ),
tar_target( tar_target(
candidate_listing_for_matching, candidate_listing_for_matching,

+ 24
- 24
process/_targets/meta/meta
File diff suppressed because it is too large
Näytä tiedosto


Loading…
Peruuta
Tallenna