| --- | |||||
| title: "Republican Donors, 2020-2022" | |||||
| author: Garrick Aden-Buie | |||||
| format: html | |||||
| execute: | |||||
| echo: true | |||||
| --- | |||||
| ## Task | |||||
| > I’m working on some donor data and I’m wondering if we may have it already in | |||||
| > what we’re doing… I’m trying to pull all disclosed donors ($50+) that gave to | |||||
| > Republican campaigns in ’20 and ’22. Even if we couldn’t specify party of | |||||
| > campaign but I could sort it, that would even be helpful. Is that something we | |||||
| > have already? | |||||
| ## Setup | |||||
| ```{r setup} | |||||
| library(tidyverse) | |||||
| library(fs) | |||||
| library(zoomerjoin) | |||||
| pkgload::load_all(here::here("process")) | |||||
| report_dates <- prep_open_dataset_db("report_dates") | |||||
| officers <- prep_open_dataset_db("officers") | |||||
| receipts <- prep_open_dataset_db("receipts") | |||||
| candidate_listing <- prep_open_dataset_db("candidate_listing") | |||||
| cover <- prep_open_dataset_db("cover") | |||||
| report_list_local <- targets::tar_read(report_list, store = here::here("process", "_targets")) | |||||
| copy_to(duckdb_global_con(), report_list_local, "report_list") | |||||
| report_list <- tbl(duckdb_global_con(), "report_list") | |||||
| con <- DBI::dbConnect(RSQLite::SQLite(), here::here("data-prep", "address_lookup.sqlite")) | |||||
| address_db <- tbl(con, "resolved") | |||||
| ``` | |||||
| ## Reports | |||||
| 1. Reports from 2020 and 2022 | |||||
| 1. Where an individual donated more than $50 | |||||
| 1. Receipt types that match individual donors and not clerical records | |||||
| ```{r receipt-types} | |||||
| receipts |> | |||||
| count(receipt_type_desc, receipt_type_code, sort = TRUE) |> | |||||
| print(n = 20) | |||||
| ``` | |||||
| ```{r reports-first-pass} | |||||
| report_list_2020_2022 <- | |||||
| report_list |> | |||||
| filter(year %in% c(2020, 2022)) | |||||
| receipts_2020_2022_src <- | |||||
| receipts |> | |||||
| semi_join(report_list_2020_2022, by = "report_id") |> | |||||
| mutate(donor_name = toupper(org_name)) |> | |||||
| filter( | |||||
| # Keep individual/party donors; drop record keeping things, like refunds | |||||
| receipt_type_code %in% c("IND", "CPCM", "GEN", "PPTY", "OUTS", "NFPC"), | |||||
| !donor_name %in% c("AGGREGATED INDIVIDUAL CONTRIBUTION", "VARIOUS VARIOUS") | |||||
| ) |> | |||||
| add_address_lookup(name = "donor_address") |> | |||||
| select(sboe_id, report_id, donor_name, donor_address, amount, profession, employers_name, form_of_payment_desc, everything()) | |||||
| total_donations_per_committee <- | |||||
| receipts_2020_2022_src |> | |||||
| group_by(sboe_id, donor_name, donor_address) |> | |||||
| summarize(total = sum(amount), .groups = "drop") |> | |||||
| filter(total >= 50) | |||||
| receipts_2020_2022 <- | |||||
| receipts_2020_2022_src |> | |||||
| semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) | |||||
| ``` | |||||
| ## Donors | |||||
| ```{r donors} | |||||
| donors <- | |||||
| receipts_2020_2022 |> | |||||
| distinct(donor_name, donor_address) |> | |||||
| semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) |> | |||||
| dbplyr::window_order(donor_name) |> | |||||
| mutate(donor_id = row_number(), .before = 1) | |||||
| donor_receipt_types <- | |||||
| receipts_2020_2022 |> | |||||
| left_join(donors, by = c("donor_name", "donor_address")) |> | |||||
| group_by(donor_id, receipt_type_desc) |> | |||||
| summarize(n_records = n(), total = sum(amount), .groups = "drop") | |||||
| donor_type <- | |||||
| donor_receipt_types |> | |||||
| slice_max(total, by = donor_id, n = 1, with_ties = FALSE) |> | |||||
| select(donor_id, donor_type = receipt_type_desc) | |||||
| ``` | |||||
| ## Add Committee Information | |||||
| ```{r committee-info} | |||||
| cover_2020_2022 <- | |||||
| cover |> | |||||
| semi_join(report_list_2020_2022, by = "report_id") |> | |||||
| left_join(report_dates |> select(report_id, sboe_end_date)) | |||||
| committee_names <- | |||||
| cover_2020_2022 |> | |||||
| mutate(committee_name = toupper(committee_name)) |> | |||||
| filter(!is.na(committee_name)) |> | |||||
| add_address_lookup(street_1, city, state, zip_code, name = "committee_address") |> | |||||
| group_by(sboe_id) |> | |||||
| slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> | |||||
| select(sboe_id, committee_name, committee_address) | |||||
| committee_types <- | |||||
| cover_2020_2022 |> | |||||
| filter(!is.na(committee_type)) |> | |||||
| group_by(sboe_id) |> | |||||
| slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> | |||||
| ungroup() |> | |||||
| select(sboe_id, committee_type) | |||||
| committees <- | |||||
| committee_names |> | |||||
| left_join(committee_types, by = "sboe_id") | |||||
| ``` | |||||
| ## Candidates | |||||
| ```{r candidate} | |||||
| candidates <- | |||||
| officers |> | |||||
| filter(type == "Candidate") |> | |||||
| semi_join(report_list_2020_2022, by = "report_id") |> | |||||
| left_join(report_dates |> select(report_id, sboe_end_date)) |> | |||||
| group_by(sboe_id) |> | |||||
| slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> | |||||
| ungroup() |> | |||||
| select(sboe_id, candidate_name = name, candidate_address = address, candidate_phone = phone) | |||||
| ``` | |||||
| ## Candidate Affiliation | |||||
| ```{r candidate-affilitaions} | |||||
| candidate_listing <- targets::tar_read(candidate_listing, store = here::here("process", "_targets")) | |||||
| candidate_2020_2022 <- | |||||
| candidate_listing |> | |||||
| filter( | |||||
| between(year(election_dt), 2020, 2022), | |||||
| name_on_ballot != "No Preference", | |||||
| party_candidate %in% c("REP", "DEM") | |||||
| ) | |||||
| candidate_affiliation <- | |||||
| candidate_2020_2022 |> | |||||
| mutate( | |||||
| across(c(first_name, middle_name, last_name), ~ if_else(is.na(.), "", .)), | |||||
| candidate_name = paste(first_name, middle_name, last_name), | |||||
| candidate_name = gsub(" +", " ", candidate_name) | |||||
| ) |> | |||||
| distinct(election_dt, candidate_name, party_candidate) |> | |||||
| group_by(candidate_name, party_candidate) |> | |||||
| slice_max(election_dt, n = 1, with_ties = FALSE) |> | |||||
| ungroup() |> | |||||
| select(-election_dt) | |||||
| ``` | |||||
| ## Summary | |||||
| ```{r collect-data} | |||||
| total_donations_per_committee_lcl <- total_donations_per_committee |> collect() | |||||
| donors_lcl <- | |||||
| donors |> | |||||
| left_join(donor_type, by = "donor_id") |> | |||||
| collect() | |||||
| donor_type_amounts_lcl <- | |||||
| donor_receipt_types |> | |||||
| collect() |> | |||||
| left_join(donors_lcl, by = "donor_id") |> | |||||
| select(-donor_id, -n_records) |> | |||||
| relocate(donor_name, donor_address, donor_type) |> | |||||
| rename(name = receipt_type_desc, value = total) |> | |||||
| mutate(name = fct_reorder(name, value, .fun = sum, .desc = TRUE)) |> | |||||
| pivot_wider(values_fill = 0) | |||||
| committees_lcl <- committees |> collect() | |||||
| candidates_lcl <- | |||||
| candidates |> | |||||
| collect() |> | |||||
| replace_na(list(candidate_name = "")) |> | |||||
| mutate( | |||||
| candidate_name = gsub('"[^"]+"', "", candidate_name), | |||||
| candidate_name = gsub("\\(.+\\)?$", "", candidate_name), | |||||
| candidate_name = stringr::str_squish(candidate_name), | |||||
| candidate_name = toupper(candidate_name) | |||||
| ) |> | |||||
| jaccard_left_join(candidate_affiliation, by = "candidate_name", threshold = 0.8) |> | |||||
| rename( | |||||
| candidate_name_sboe = candidate_name.x, | |||||
| candidate_name_list = candidate_name.y, | |||||
| candidate_party = party_candidate | |||||
| ) |> | |||||
| group_by(sboe_id, candidate_name_sboe) |> | |||||
| slice(1) |> | |||||
| ungroup() | |||||
| total_donations <- | |||||
| total_donations_per_committee_lcl |> | |||||
| left_join( | |||||
| donors_lcl |> select(-donor_id), | |||||
| by = c("donor_name", "donor_address") | |||||
| ) |> | |||||
| left_join(committees_lcl, by = "sboe_id") |> | |||||
| left_join(candidates_lcl, by = "sboe_id") |> | |||||
| group_by(sboe_id) |> | |||||
| mutate(total_committee = sum(total)) |> | |||||
| ungroup() |> | |||||
| arrange(desc(total_committee), desc(total)) | |||||
| ``` | |||||
| ## Output | |||||
| ```{r prepare-receipts} | |||||
| report_list_lcl <- | |||||
| report_list_2020_2022 |> | |||||
| select(year:amended, sboe_start_date, sboe_end_date, contains("received_")) |> | |||||
| collect() | |||||
| receipts_2020_2022_lcl <- | |||||
| receipts_2020_2022 |> | |||||
| collect() |> | |||||
| semi_join(total_donations, by = c("sboe_id", "donor_name", "donor_address")) |> | |||||
| inner_join( | |||||
| report_list_lcl |> select(year:report_id), | |||||
| y = _, | |||||
| by = c("sboe_id", "report_id") | |||||
| ) | |||||
| receipts_split <- | |||||
| receipts_2020_2022_lcl |> | |||||
| arrange(year, doc_name, report_id) |> | |||||
| (\(x) split(x, paste("Receipts -", x$year)))() | |||||
| ``` | |||||
| ```{r create-sheets} | |||||
| sheets <- list( | |||||
| "Donation Summary" = total_donations, | |||||
| "Donors" = donor_type_amounts_lcl, | |||||
| "Committees" = committees_lcl, | |||||
| "Candidates" = candidates_lcl, | |||||
| "Report List" = report_list_lcl | |||||
| ) | |||||
| sheets <- c(sheets, receipts_split) | |||||
| ``` | |||||
| ```{r preview-sheets} | |||||
| sheets | |||||
| ``` | |||||
| ```{r write-sheets} | |||||
| writexl::write_xlsx(sheets, "donors_2020_2022.xlsx") | |||||
| ``` |