| @@ -29,27 +29,30 @@ out_committees <- function(path_out_cover, path_out_report_list) { | |||
| committee_name_address <- | |||
| cover |> | |||
| inner_join( | |||
| report_list |> select(report_id, doc_order), | |||
| report_list |> select(report_id, year, doc_order), | |||
| by = "report_id" | |||
| ) |> | |||
| group_by(sboe_id) |> | |||
| slice_max(year, n = 1) |> | |||
| slice_max(doc_order, n = 1) |> | |||
| slice_max(date_filed, n = 1, with_ties = FALSE) |> | |||
| ungroup() |> | |||
| select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |> | |||
| add_address_lookup(postal_code = zip_code) | |||
| add_address_lookup(postal_code = zip_code) |> | |||
| collect() | |||
| committee_type <- | |||
| cover |> | |||
| group_by(sboe_id) |> | |||
| count(committee_type, fund_type, fund_name) |> | |||
| slice_max(n, n = 1) |> | |||
| slice_max(n, n = 1, with_ties = FALSE) |> | |||
| ungroup() |> | |||
| select(-n) | |||
| select(-n) |> | |||
| collect() | |||
| committees <- | |||
| committee_name_address |> | |||
| left_join(committee_type, by = "sboe_id") |> | |||
| collect() | |||
| left_join(committee_type, by = join_by(sboe_id), relationship = "one-to-one") | |||
| arrow::write_parquet(committees, out) | |||
| @@ -1,17 +1,25 @@ | |||
| prepare_candidates_for_matching <- function( | |||
| path_data_prep_officers, | |||
| path_out_report_list, | |||
| path_out_committees, | |||
| path_addresses = "data-out/addresses" | |||
| ) { | |||
| lg_info_target(lg_get_logger()) | |||
| report_list <- out_open_dataset_db(path_out_report_list) | |||
| addresses <- out_open_dataset_db(path_addresses) | |||
| committees <- out_open_dataset_db(path_out_committees) | |||
| candidates_db <- out_open_dataset_db(path_data_prep_officers) | |||
| candidates <- | |||
| out_open_dataset_db(path_data_prep_officers) |> | |||
| candidates_db |> | |||
| semi_join(report_list, by = "report_id") |> | |||
| filter(type == "Candidate") |> | |||
| rows_patch( | |||
| committees |> select(sboe_id, address = address_lookup), | |||
| by = "sboe_id", | |||
| unmatched = "ignore" | |||
| ) |> | |||
| mutate( | |||
| name_clean = toupper(name), | |||
| name_clean = REGEXP_REPLACE(name_clean, " FOR .+$", "", "g"), | |||
| @@ -208,7 +208,7 @@ list( | |||
| # Candidates -------------------------------------------------------------- | |||
| tar_target( | |||
| candidates_for_matching, | |||
| prepare_candidates_for_matching(path_data_prep_officers, path_out_report_list) | |||
| prepare_candidates_for_matching(path_data_prep_officers, path_out_report_list, path_out_committees) | |||
| ), | |||
| tar_target( | |||
| candidate_listing_for_matching, | |||