--- title: "Republican Donors, 2020-2022" author: Garrick Aden-Buie format: html execute: echo: true --- ## Task > I’m working on some donor data and I’m wondering if we may have it already in > what we’re doing… I’m trying to pull all disclosed donors ($50+) that gave to > Republican campaigns in ’20 and ’22. Even if we couldn’t specify party of > campaign but I could sort it, that would even be helpful. Is that something we > have already? ## Setup ```{r setup} library(tidyverse) library(fs) library(zoomerjoin) pkgload::load_all(here::here("process")) report_dates <- prep_open_dataset_db("report_dates") officers <- prep_open_dataset_db("officers") receipts <- prep_open_dataset_db("receipts") candidate_listing <- prep_open_dataset_db("candidate_listing") cover <- prep_open_dataset_db("cover") report_list_local <- targets::tar_read(report_list, store = here::here("process", "_targets")) copy_to(duckdb_global_con(), report_list_local, "report_list") report_list <- tbl(duckdb_global_con(), "report_list") con <- DBI::dbConnect(RSQLite::SQLite(), here::here("data-prep", "address_lookup.sqlite")) address_db <- tbl(con, "resolved") ``` ## Reports 1. Reports from 2020 and 2022 1. Where an individual donated more than $50 1. Receipt types that match individual donors and not clerical records ```{r receipt-types} receipts |> count(receipt_type_desc, receipt_type_code, sort = TRUE) |> print(n = 20) ``` ```{r reports-first-pass} report_list_2020_2022 <- report_list |> filter(year %in% c(2020, 2022)) receipts_2020_2022_src <- receipts |> semi_join(report_list_2020_2022, by = "report_id") |> mutate(donor_name = toupper(org_name)) |> filter( # Keep individual/party donors; drop record keeping things, like refunds receipt_type_code %in% c("IND", "CPCM", "GEN", "PPTY", "OUTS", "NFPC"), !donor_name %in% c("AGGREGATED INDIVIDUAL CONTRIBUTION", "VARIOUS VARIOUS") ) |> add_address_lookup(name = "donor_address") |> select(sboe_id, report_id, donor_name, donor_address, amount, profession, employers_name, form_of_payment_desc, everything()) total_donations_per_committee <- receipts_2020_2022_src |> group_by(sboe_id, donor_name, donor_address) |> summarize(total = sum(amount), .groups = "drop") |> filter(total >= 50) receipts_2020_2022 <- receipts_2020_2022_src |> semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) ``` ## Donors ```{r donors} donors <- receipts_2020_2022 |> distinct(donor_name, donor_address) |> semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) |> dbplyr::window_order(donor_name) |> mutate(donor_id = row_number(), .before = 1) donor_receipt_types <- receipts_2020_2022 |> left_join(donors, by = c("donor_name", "donor_address")) |> group_by(donor_id, receipt_type_desc) |> summarize(n_records = n(), total = sum(amount), .groups = "drop") donor_type <- donor_receipt_types |> slice_max(total, by = donor_id, n = 1, with_ties = FALSE) |> select(donor_id, donor_type = receipt_type_desc) ``` ## Add Committee Information ```{r committee-info} cover_2020_2022 <- cover |> semi_join(report_list_2020_2022, by = "report_id") |> left_join(report_dates |> select(report_id, sboe_end_date)) committee_names <- cover_2020_2022 |> mutate(committee_name = toupper(committee_name)) |> filter(!is.na(committee_name)) |> add_address_lookup(street_1, city, state, zip_code, name = "committee_address") |> group_by(sboe_id) |> slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> select(sboe_id, committee_name, committee_address) committee_types <- cover_2020_2022 |> filter(!is.na(committee_type)) |> group_by(sboe_id) |> slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> ungroup() |> select(sboe_id, committee_type) committees <- committee_names |> left_join(committee_types, by = "sboe_id") ``` ## Candidates ```{r candidate} candidates <- officers |> filter(type == "Candidate") |> semi_join(report_list_2020_2022, by = "report_id") |> left_join(report_dates |> select(report_id, sboe_end_date)) |> group_by(sboe_id) |> slice_max(sboe_end_date, n = 1, with_ties = FALSE) |> ungroup() |> select(sboe_id, candidate_name = name, candidate_address = address, candidate_phone = phone) ``` ## Candidate Affiliation ```{r candidate-affilitaions} candidate_listing <- targets::tar_read(candidate_listing, store = here::here("process", "_targets")) candidate_2020_2022 <- candidate_listing |> filter( between(year(election_dt), 2020, 2022), name_on_ballot != "No Preference", party_candidate %in% c("REP", "DEM") ) candidate_affiliation <- candidate_2020_2022 |> mutate( across(c(first_name, middle_name, last_name), ~ if_else(is.na(.), "", .)), candidate_name = paste(first_name, middle_name, last_name), candidate_name = gsub(" +", " ", candidate_name) ) |> distinct(election_dt, candidate_name, party_candidate) |> group_by(candidate_name, party_candidate) |> slice_max(election_dt, n = 1, with_ties = FALSE) |> ungroup() |> select(-election_dt) ``` ## Summary ```{r collect-data} total_donations_per_committee_lcl <- total_donations_per_committee |> collect() donors_lcl <- donors |> left_join(donor_type, by = "donor_id") |> collect() donor_type_amounts_lcl <- donor_receipt_types |> collect() |> left_join(donors_lcl, by = "donor_id") |> select(-donor_id, -n_records) |> relocate(donor_name, donor_address, donor_type) |> rename(name = receipt_type_desc, value = total) |> mutate(name = fct_reorder(name, value, .fun = sum, .desc = TRUE)) |> pivot_wider(values_fill = 0) committees_lcl <- committees |> collect() candidates_lcl <- candidates |> collect() |> replace_na(list(candidate_name = "")) |> mutate( candidate_name = gsub('"[^"]+"', "", candidate_name), candidate_name = gsub("\\(.+\\)?$", "", candidate_name), candidate_name = stringr::str_squish(candidate_name), candidate_name = toupper(candidate_name) ) |> jaccard_left_join(candidate_affiliation, by = "candidate_name", threshold = 0.8) |> rename( candidate_name_sboe = candidate_name.x, candidate_name_list = candidate_name.y, candidate_party = party_candidate ) |> group_by(sboe_id, candidate_name_sboe) |> slice(1) |> ungroup() total_donations <- total_donations_per_committee_lcl |> left_join( donors_lcl |> select(-donor_id), by = c("donor_name", "donor_address") ) |> left_join(committees_lcl, by = "sboe_id") |> left_join(candidates_lcl, by = "sboe_id") |> group_by(sboe_id) |> mutate(total_committee = sum(total)) |> ungroup() |> arrange(desc(total_committee), desc(total)) ``` ## Output ```{r prepare-receipts} report_list_lcl <- report_list_2020_2022 |> select(year:amended, sboe_start_date, sboe_end_date, contains("received_")) |> collect() receipts_2020_2022_lcl <- receipts_2020_2022 |> collect() |> semi_join(total_donations, by = c("sboe_id", "donor_name", "donor_address")) |> inner_join( report_list_lcl |> select(year:report_id), y = _, by = c("sboe_id", "report_id") ) receipts_split <- receipts_2020_2022_lcl |> arrange(year, doc_name, report_id) |> (\(x) split(x, paste("Receipts -", x$year)))() ``` ```{r create-sheets} sheets <- list( "Donation Summary" = total_donations, "Donors" = donor_type_amounts_lcl, "Committees" = committees_lcl, "Candidates" = candidates_lcl, "Report List" = report_list_lcl ) sheets <- c(sheets, receipts_split) ``` ```{r preview-sheets} sheets ``` ```{r write-sheets} writexl::write_xlsx(sheets, "donors_2020_2022.xlsx") ```