Bladeren bron

fill in missing candidate address with committee's address

main
Garrick Aden-Buie 2 jaren geleden
bovenliggende
commit
77cb0758bb
Geen bekende sleutel gevonden voor deze handtekening in de database
4 gewijzigde bestanden met toevoegingen van 43 en 32 verwijderingen
  1. +9
    -6
      process/R/out-cover-committees.R
  2. +9
    -1
      process/R/prepare_candidates.R
  3. +1
    -1
      process/_targets.R
  4. +24
    -24
      process/_targets/meta/meta

+ 9
- 6
process/R/out-cover-committees.R Bestand weergeven

@@ -29,27 +29,30 @@ out_committees <- function(path_out_cover, path_out_report_list) {
committee_name_address <-
cover |>
inner_join(
report_list |> select(report_id, doc_order),
report_list |> select(report_id, year, doc_order),
by = "report_id"
) |>
group_by(sboe_id) |>
slice_max(year, n = 1) |>
slice_max(doc_order, n = 1) |>
slice_max(date_filed, n = 1, with_ties = FALSE) |>
ungroup() |>
select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |>
add_address_lookup(postal_code = zip_code)
add_address_lookup(postal_code = zip_code) |>
collect()

committee_type <-
cover |>
group_by(sboe_id) |>
count(committee_type, fund_type, fund_name) |>
slice_max(n, n = 1) |>
slice_max(n, n = 1, with_ties = FALSE) |>
ungroup() |>
select(-n)
select(-n) |>
collect()

committees <-
committee_name_address |>
left_join(committee_type, by = "sboe_id") |>
collect()
left_join(committee_type, by = join_by(sboe_id), relationship = "one-to-one")

arrow::write_parquet(committees, out)


+ 9
- 1
process/R/prepare_candidates.R Bestand weergeven

@@ -1,17 +1,25 @@
prepare_candidates_for_matching <- function(
path_data_prep_officers,
path_out_report_list,
path_out_committees,
path_addresses = "data-out/addresses"
) {
lg_info_target(lg_get_logger())

report_list <- out_open_dataset_db(path_out_report_list)
addresses <- out_open_dataset_db(path_addresses)
committees <- out_open_dataset_db(path_out_committees)
candidates_db <- out_open_dataset_db(path_data_prep_officers)

candidates <-
out_open_dataset_db(path_data_prep_officers) |>
candidates_db |>
semi_join(report_list, by = "report_id") |>
filter(type == "Candidate") |>
rows_patch(
committees |> select(sboe_id, address = address_lookup),
by = "sboe_id",
unmatched = "ignore"
) |>
mutate(
name_clean = toupper(name),
name_clean = REGEXP_REPLACE(name_clean, " FOR .+$", "", "g"),

+ 1
- 1
process/_targets.R Bestand weergeven

@@ -208,7 +208,7 @@ list(
# Candidates --------------------------------------------------------------
tar_target(
candidates_for_matching,
prepare_candidates_for_matching(path_data_prep_officers, path_out_report_list)
prepare_candidates_for_matching(path_data_prep_officers, path_out_report_list, path_out_committees)
),
tar_target(
candidate_listing_for_matching,

+ 24
- 24
process/_targets/meta/meta
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


Laden…
Annuleren
Opslaan