| committee_name_address <- | committee_name_address <- | ||||
| cover |> | cover |> | ||||
| inner_join( | inner_join( | ||||
| report_list |> select(report_id, doc_order), | |||||
| report_list |> select(report_id, year, doc_order), | |||||
| by = "report_id" | by = "report_id" | ||||
| ) |> | ) |> | ||||
| group_by(sboe_id) |> | group_by(sboe_id) |> | ||||
| slice_max(year, n = 1) |> | |||||
| slice_max(doc_order, n = 1) |> | slice_max(doc_order, n = 1) |> | ||||
| slice_max(date_filed, n = 1, with_ties = FALSE) |> | |||||
| ungroup() |> | ungroup() |> | ||||
| select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |> | select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |> | ||||
| add_address_lookup(postal_code = zip_code) | |||||
| add_address_lookup(postal_code = zip_code) |> | |||||
| collect() | |||||
| committee_type <- | committee_type <- | ||||
| cover |> | cover |> | ||||
| group_by(sboe_id) |> | group_by(sboe_id) |> | ||||
| count(committee_type, fund_type, fund_name) |> | count(committee_type, fund_type, fund_name) |> | ||||
| slice_max(n, n = 1) |> | |||||
| slice_max(n, n = 1, with_ties = FALSE) |> | |||||
| ungroup() |> | ungroup() |> | ||||
| select(-n) | |||||
| select(-n) |> | |||||
| collect() | |||||
| committees <- | committees <- | ||||
| committee_name_address |> | committee_name_address |> | ||||
| left_join(committee_type, by = "sboe_id") |> | |||||
| collect() | |||||
| left_join(committee_type, by = join_by(sboe_id), relationship = "one-to-one") | |||||
| arrow::write_parquet(committees, out) | arrow::write_parquet(committees, out) | ||||
| prepare_candidates_for_matching <- function( | prepare_candidates_for_matching <- function( | ||||
| path_data_prep_officers, | path_data_prep_officers, | ||||
| path_out_report_list, | path_out_report_list, | ||||
| path_out_committees, | |||||
| path_addresses = "data-out/addresses" | path_addresses = "data-out/addresses" | ||||
| ) { | ) { | ||||
| lg_info_target(lg_get_logger()) | lg_info_target(lg_get_logger()) | ||||
| report_list <- out_open_dataset_db(path_out_report_list) | report_list <- out_open_dataset_db(path_out_report_list) | ||||
| addresses <- out_open_dataset_db(path_addresses) | addresses <- out_open_dataset_db(path_addresses) | ||||
| committees <- out_open_dataset_db(path_out_committees) | |||||
| candidates_db <- out_open_dataset_db(path_data_prep_officers) | |||||
| candidates <- | candidates <- | ||||
| out_open_dataset_db(path_data_prep_officers) |> | |||||
| candidates_db |> | |||||
| semi_join(report_list, by = "report_id") |> | semi_join(report_list, by = "report_id") |> | ||||
| filter(type == "Candidate") |> | filter(type == "Candidate") |> | ||||
| rows_patch( | |||||
| committees |> select(sboe_id, address = address_lookup), | |||||
| by = "sboe_id", | |||||
| unmatched = "ignore" | |||||
| ) |> | |||||
| mutate( | mutate( | ||||
| name_clean = toupper(name), | name_clean = toupper(name), | ||||
| name_clean = REGEXP_REPLACE(name_clean, " FOR .+$", "", "g"), | name_clean = REGEXP_REPLACE(name_clean, " FOR .+$", "", "g"), |
| # Candidates -------------------------------------------------------------- | # Candidates -------------------------------------------------------------- | ||||
| tar_target( | tar_target( | ||||
| candidates_for_matching, | candidates_for_matching, | ||||
| prepare_candidates_for_matching(path_data_prep_officers, path_out_report_list) | |||||
| prepare_candidates_for_matching(path_data_prep_officers, path_out_report_list, path_out_committees) | |||||
| ), | ), | ||||
| tar_target( | tar_target( | ||||
| candidate_listing_for_matching, | candidate_listing_for_matching, |