| @@ -0,0 +1,272 @@ | |||
| --- | |||
| title: "Republican Donors, 2020-2022" | |||
| author: Garrick Aden-Buie | |||
| format: html | |||
| execute: | |||
| echo: true | |||
| --- | |||
| ## Task | |||
| > I’m working on some donor data and I’m wondering if we may have it already in | |||
| > what we’re doing… I’m trying to pull all disclosed donors ($50+) that gave to | |||
| > Republican campaigns in ’20 and ’22. Even if we couldn’t specify party of | |||
| > campaign but I could sort it, that would even be helpful. Is that something we | |||
| > have already? | |||
| ## Setup | |||
| ```{r setup} | |||
| library(tidyverse) | |||
| library(fs) | |||
| library(zoomerjoin) | |||
| pkgload::load_all(here::here("process")) | |||
| report_dates <- prep_open_dataset_db("report_dates") | |||
| officers <- prep_open_dataset_db("officers") | |||
| receipts <- prep_open_dataset_db("receipts") | |||
| candidate_listing <- prep_open_dataset_db("candidate_listing") | |||
| cover <- prep_open_dataset_db("cover") | |||
| report_list_local <- targets::tar_read(report_list, store = here::here("process", "_targets")) | |||
| copy_to(duckdb_global_con(), report_list_local, "report_list") | |||
| report_list <- tbl(duckdb_global_con(), "report_list") | |||
| con <- DBI::dbConnect(RSQLite::SQLite(), here::here("data-prep", "address_lookup.sqlite")) | |||
| address_db <- tbl(con, "resolved") | |||
| ``` | |||
| ## Reports | |||
| 1. Reports from 2020 and 2022 | |||
| 1. Where an individual donated more than $50 | |||
| 1. Receipt types that match individual donors and not clerical records | |||
| ```{r receipt-types} | |||
| receipts |> | |||
| count(receipt_type_desc, receipt_type_code, sort = TRUE) |> | |||
| print(n = 20) | |||
| ``` | |||
| ```{r reports-first-pass} | |||
| report_list_2020_2022 <- | |||
| report_list |> | |||
| filter(year %in% c(2020, 2022)) | |||
| receipts_2020_2022_src <- | |||
| receipts |> | |||
| semi_join(report_list_2020_2022, by = "report_id") |> | |||
| mutate(donor_name = toupper(org_name)) |> | |||
| filter( | |||
| # Keep individual/party donors; drop record keeping things, like refunds | |||
| receipt_type_code %in% c("IND", "CPCM", "GEN", "PPTY", "OUTS", "NFPC"), | |||
| !donor_name %in% c("AGGREGATED INDIVIDUAL CONTRIBUTION", "VARIOUS VARIOUS") | |||
| ) |> | |||
| add_address_lookup(name = "donor_address") |> | |||
| select(sboe_id, report_id, donor_name, donor_address, amount, profession, employers_name, form_of_payment_desc, everything()) | |||
| total_donations_per_committee <- | |||
| receipts_2020_2022_src |> | |||
| group_by(sboe_id, donor_name, donor_address) |> | |||
| summarize(total = sum(amount), .groups = "drop") |> | |||
| filter(total >= 50) | |||
| receipts_2020_2022 <- | |||
| receipts_2020_2022_src |> | |||
| semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) | |||
| ``` | |||
| ## Donors | |||
| ```{r donors} | |||
| donors <- | |||
| receipts_2020_2022 |> | |||
| distinct(donor_name, donor_address) |> | |||
| semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) |> | |||
| dbplyr::window_order(donor_name) |> | |||
| mutate(donor_id = row_number(), .before = 1) | |||
| donor_receipt_types <- | |||
| receipts_2020_2022 |> | |||
| left_join(donors, by = c("donor_name", "donor_address")) |> | |||
| group_by(donor_id, receipt_type_desc) |> | |||
| summarize(n_records = n(), total = sum(amount), .groups = "drop") | |||
| donor_type <- | |||
| donor_receipt_types |> | |||
| slice_max(total, by = donor_id, n = 1, with_ties = FALSE) |> | |||
| select(donor_id, donor_type = receipt_type_desc) | |||
| ``` | |||
| ## Add Committee Information | |||
| ```{r committee-info} | |||
| cover_2020_2022 <- | |||
| cover |> | |||
| semi_join(report_list_2020_2022, by = "report_id") |> | |||
| left_join(report_dates |> select(report_id, sboe_end_date)) | |||
| committee_names <- | |||
| cover_2020_2022 |> | |||
| mutate(committee_name = toupper(committee_name)) |> | |||
| filter(!is.na(committee_name)) |> | |||
| add_address_lookup(street_1, city, state, zip_code, name = "committee_address") |> | |||
| group_by(sboe_id) |> | |||
| slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> | |||
| select(sboe_id, committee_name, committee_address) | |||
| committee_types <- | |||
| cover_2020_2022 |> | |||
| filter(!is.na(committee_type)) |> | |||
| group_by(sboe_id) |> | |||
| slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> | |||
| ungroup() |> | |||
| select(sboe_id, committee_type) | |||
| committees <- | |||
| committee_names |> | |||
| left_join(committee_types, by = "sboe_id") | |||
| ``` | |||
| ## Candidates | |||
| ```{r candidate} | |||
| candidates <- | |||
| officers |> | |||
| filter(type == "Candidate") |> | |||
| semi_join(report_list_2020_2022, by = "report_id") |> | |||
| left_join(report_dates |> select(report_id, sboe_end_date)) |> | |||
| group_by(sboe_id) |> | |||
| slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> | |||
| ungroup() |> | |||
| select(sboe_id, candidate_name = name, candidate_address = address, candidate_phone = phone) | |||
| ``` | |||
| ## Candidate Affiliation | |||
| ```{r candidate-affilitaions} | |||
| candidate_listing <- targets::tar_read(candidate_listing, store = here::here("process", "_targets")) | |||
| candidate_2020_2022 <- | |||
| candidate_listing |> | |||
| filter( | |||
| between(year(election_dt), 2020, 2022), | |||
| name_on_ballot != "No Preference", | |||
| party_candidate %in% c("REP", "DEM") | |||
| ) | |||
| candidate_affiliation <- | |||
| candidate_2020_2022 |> | |||
| mutate( | |||
| across(c(first_name, middle_name, last_name), ~ if_else(is.na(.), "", .)), | |||
| candidate_name = paste(first_name, middle_name, last_name), | |||
| candidate_name = gsub(" +", " ", candidate_name) | |||
| ) |> | |||
| distinct(election_dt, candidate_name, party_candidate) |> | |||
| group_by(candidate_name, party_candidate) |> | |||
| slice_max(election_dt, n = 1, with_ties = FALSE) |> | |||
| ungroup() |> | |||
| select(-election_dt) | |||
| ``` | |||
| ## Summary | |||
| ```{r collect-data} | |||
| total_donations_per_committee_lcl <- total_donations_per_committee |> collect() | |||
| donors_lcl <- | |||
| donors |> | |||
| left_join(donor_type, by = "donor_id") |> | |||
| collect() | |||
| donor_type_amounts_lcl <- | |||
| donor_receipt_types |> | |||
| collect() |> | |||
| left_join(donors_lcl, by = "donor_id") |> | |||
| select(-donor_id, -n_records) |> | |||
| relocate(donor_name, donor_address, donor_type) |> | |||
| rename(name = receipt_type_desc, value = total) |> | |||
| mutate(name = fct_reorder(name, value, .fun = sum, .desc = TRUE)) |> | |||
| pivot_wider(values_fill = 0) | |||
| committees_lcl <- committees |> collect() | |||
| candidates_lcl <- | |||
| candidates |> | |||
| collect() |> | |||
| replace_na(list(candidate_name = "")) |> | |||
| mutate( | |||
| candidate_name = gsub('"[^"]+"', "", candidate_name), | |||
| candidate_name = gsub("\\(.+\\)?$", "", candidate_name), | |||
| candidate_name = stringr::str_squish(candidate_name), | |||
| candidate_name = toupper(candidate_name) | |||
| ) |> | |||
| jaccard_left_join(candidate_affiliation, by = "candidate_name", threshold = 0.8) |> | |||
| rename( | |||
| candidate_name_sboe = candidate_name.x, | |||
| candidate_name_list = candidate_name.y, | |||
| candidate_party = party_candidate | |||
| ) |> | |||
| group_by(sboe_id, candidate_name_sboe) |> | |||
| slice(1) |> | |||
| ungroup() | |||
| total_donations <- | |||
| total_donations_per_committee_lcl |> | |||
| left_join( | |||
| donors_lcl |> select(-donor_id), | |||
| by = c("donor_name", "donor_address") | |||
| ) |> | |||
| left_join(committees_lcl, by = "sboe_id") |> | |||
| left_join(candidates_lcl, by = "sboe_id") |> | |||
| group_by(sboe_id) |> | |||
| mutate(total_committee = sum(total)) |> | |||
| ungroup() |> | |||
| arrange(desc(total_committee), desc(total)) | |||
| ``` | |||
| ## Output | |||
| ```{r prepare-receipts} | |||
| report_list_lcl <- | |||
| report_list_2020_2022 |> | |||
| select(year:amended, sboe_start_date, sboe_end_date, contains("received_")) |> | |||
| collect() | |||
| receipts_2020_2022_lcl <- | |||
| receipts_2020_2022 |> | |||
| collect() |> | |||
| semi_join(total_donations, by = c("sboe_id", "donor_name", "donor_address")) |> | |||
| inner_join( | |||
| report_list_lcl |> select(year:report_id), | |||
| y = _, | |||
| by = c("sboe_id", "report_id") | |||
| ) | |||
| receipts_split <- | |||
| receipts_2020_2022_lcl |> | |||
| arrange(year, doc_name, report_id) |> | |||
| (\(x) split(x, paste("Receipts -", x$year)))() | |||
| ``` | |||
| ```{r create-sheets} | |||
| sheets <- list( | |||
| "Donation Summary" = total_donations, | |||
| "Donors" = donor_type_amounts_lcl, | |||
| "Committees" = committees_lcl, | |||
| "Candidates" = candidates_lcl, | |||
| "Report List" = report_list_lcl | |||
| ) | |||
| sheets <- c(sheets, receipts_split) | |||
| ``` | |||
| ```{r preview-sheets} | |||
| sheets | |||
| ``` | |||
| ```{r write-sheets} | |||
| writexl::write_xlsx(sheets, "donors_2020_2022.xlsx") | |||
| ``` | |||