|
- ---
- title: "Republican Donors, 2020-2022"
- author: Garrick Aden-Buie
- format: html
-
- execute:
- echo: true
- ---
-
- ## Task
-
- > I’m working on some donor data and I’m wondering if we may have it already in
- > what we’re doing… I’m trying to pull all disclosed donors ($50+) that gave to
- > Republican campaigns in ’20 and ’22. Even if we couldn’t specify party of
- > campaign but I could sort it, that would even be helpful. Is that something we
- > have already?
-
- ## Setup
-
- ```{r setup}
- library(tidyverse)
- library(fs)
- library(zoomerjoin)
- pkgload::load_all(here::here("process"))
-
- report_dates <- prep_open_dataset_db("report_dates")
- officers <- prep_open_dataset_db("officers")
- receipts <- prep_open_dataset_db("receipts")
- candidate_listing <- prep_open_dataset_db("candidate_listing")
- cover <- prep_open_dataset_db("cover")
-
- report_list_local <- targets::tar_read(report_list, store = here::here("process", "_targets"))
- copy_to(duckdb_global_con(), report_list_local, "report_list")
- report_list <- tbl(duckdb_global_con(), "report_list")
-
- con <- DBI::dbConnect(RSQLite::SQLite(), here::here("data-prep", "address_lookup.sqlite"))
- address_db <- tbl(con, "resolved")
- ```
-
- ## Reports
-
- 1. Reports from 2020 and 2022
- 1. Where an individual donated more than $50
- 1. Receipt types that match individual donors and not clerical records
-
- ```{r receipt-types}
- receipts |>
- count(receipt_type_desc, receipt_type_code, sort = TRUE) |>
- print(n = 20)
- ```
-
- ```{r reports-first-pass}
- report_list_2020_2022 <-
- report_list |>
- filter(year %in% c(2020, 2022))
-
- receipts_2020_2022_src <-
- receipts |>
- semi_join(report_list_2020_2022, by = "report_id") |>
- mutate(donor_name = toupper(org_name)) |>
- filter(
- # Keep individual/party donors; drop record keeping things, like refunds
- receipt_type_code %in% c("IND", "CPCM", "GEN", "PPTY", "OUTS", "NFPC"),
- !donor_name %in% c("AGGREGATED INDIVIDUAL CONTRIBUTION", "VARIOUS VARIOUS")
- ) |>
- add_address_lookup(name = "donor_address") |>
- select(sboe_id, report_id, donor_name, donor_address, amount, profession, employers_name, form_of_payment_desc, everything())
-
- total_donations_per_committee <-
- receipts_2020_2022_src |>
- group_by(sboe_id, donor_name, donor_address) |>
- summarize(total = sum(amount), .groups = "drop") |>
- filter(total >= 50)
-
- receipts_2020_2022 <-
- receipts_2020_2022_src |>
- semi_join(total_donations_per_committee, by = c("donor_name", "donor_address"))
- ```
-
- ## Donors
-
- ```{r donors}
- donors <-
- receipts_2020_2022 |>
- distinct(donor_name, donor_address) |>
- semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) |>
- dbplyr::window_order(donor_name) |>
- mutate(donor_id = row_number(), .before = 1)
-
- donor_receipt_types <-
- receipts_2020_2022 |>
- left_join(donors, by = c("donor_name", "donor_address")) |>
- group_by(donor_id, receipt_type_desc) |>
- summarize(n_records = n(), total = sum(amount), .groups = "drop")
-
- donor_type <-
- donor_receipt_types |>
- slice_max(total, by = donor_id, n = 1, with_ties = FALSE) |>
- select(donor_id, donor_type = receipt_type_desc)
- ```
-
- ## Add Committee Information
-
- ```{r committee-info}
- cover_2020_2022 <-
- cover |>
- semi_join(report_list_2020_2022, by = "report_id") |>
- left_join(report_dates |> select(report_id, sboe_end_date))
-
- committee_names <-
- cover_2020_2022 |>
- mutate(committee_name = toupper(committee_name)) |>
- filter(!is.na(committee_name)) |>
- add_address_lookup(street_1, city, state, zip_code, name = "committee_address") |>
- group_by(sboe_id) |>
- slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
- select(sboe_id, committee_name, committee_address)
-
- committee_types <-
- cover_2020_2022 |>
- filter(!is.na(committee_type)) |>
- group_by(sboe_id) |>
- slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
- ungroup() |>
- select(sboe_id, committee_type)
-
- committees <-
- committee_names |>
- left_join(committee_types, by = "sboe_id")
- ```
-
- ## Candidates
-
- ```{r candidate}
- candidates <-
- officers |>
- filter(type == "Candidate") |>
- semi_join(report_list_2020_2022, by = "report_id") |>
- left_join(report_dates |> select(report_id, sboe_end_date)) |>
- group_by(sboe_id) |>
- slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
- ungroup() |>
- select(sboe_id, candidate_name = name, candidate_address = address, candidate_phone = phone)
- ```
-
- ## Candidate Affiliation
-
- ```{r candidate-affilitaions}
- candidate_listing <- targets::tar_read(candidate_listing, store = here::here("process", "_targets"))
-
- candidate_2020_2022 <-
- candidate_listing |>
- filter(
- between(year(election_dt), 2020, 2022),
- name_on_ballot != "No Preference",
- party_candidate %in% c("REP", "DEM")
- )
-
- candidate_affiliation <-
- candidate_2020_2022 |>
- mutate(
- across(c(first_name, middle_name, last_name), ~ if_else(is.na(.), "", .)),
- candidate_name = paste(first_name, middle_name, last_name),
- candidate_name = gsub(" +", " ", candidate_name)
- ) |>
- distinct(election_dt, candidate_name, party_candidate) |>
- group_by(candidate_name, party_candidate) |>
- slice_max(election_dt, n = 1, with_ties = FALSE) |>
- ungroup() |>
- select(-election_dt)
- ```
-
-
- ## Summary
-
- ```{r collect-data}
- total_donations_per_committee_lcl <- total_donations_per_committee |> collect()
-
- donors_lcl <-
- donors |>
- left_join(donor_type, by = "donor_id") |>
- collect()
-
- donor_type_amounts_lcl <-
- donor_receipt_types |>
- collect() |>
- left_join(donors_lcl, by = "donor_id") |>
- select(-donor_id, -n_records) |>
- relocate(donor_name, donor_address, donor_type) |>
- rename(name = receipt_type_desc, value = total) |>
- mutate(name = fct_reorder(name, value, .fun = sum, .desc = TRUE)) |>
- pivot_wider(values_fill = 0)
-
- committees_lcl <- committees |> collect()
-
- candidates_lcl <-
- candidates |>
- collect() |>
- replace_na(list(candidate_name = "")) |>
- mutate(
- candidate_name = gsub('"[^"]+"', "", candidate_name),
- candidate_name = gsub("\\(.+\\)?$", "", candidate_name),
- candidate_name = stringr::str_squish(candidate_name),
- candidate_name = toupper(candidate_name)
- ) |>
- jaccard_left_join(candidate_affiliation, by = "candidate_name", threshold = 0.8) |>
- rename(
- candidate_name_sboe = candidate_name.x,
- candidate_name_list = candidate_name.y,
- candidate_party = party_candidate
- ) |>
- group_by(sboe_id, candidate_name_sboe) |>
- slice(1) |>
- ungroup()
-
- total_donations <-
- total_donations_per_committee_lcl |>
- left_join(
- donors_lcl |> select(-donor_id),
- by = c("donor_name", "donor_address")
- ) |>
- left_join(committees_lcl, by = "sboe_id") |>
- left_join(candidates_lcl, by = "sboe_id") |>
- group_by(sboe_id) |>
- mutate(total_committee = sum(total)) |>
- ungroup() |>
- arrange(desc(total_committee), desc(total))
- ```
-
- ## Output
-
- ```{r prepare-receipts}
- report_list_lcl <-
- report_list_2020_2022 |>
- select(year:amended, sboe_start_date, sboe_end_date, contains("received_")) |>
- collect()
-
- receipts_2020_2022_lcl <-
- receipts_2020_2022 |>
- collect() |>
- semi_join(total_donations, by = c("sboe_id", "donor_name", "donor_address")) |>
- inner_join(
- report_list_lcl |> select(year:report_id),
- y = _,
- by = c("sboe_id", "report_id")
- )
-
- receipts_split <-
- receipts_2020_2022_lcl |>
- arrange(year, doc_name, report_id) |>
- (\(x) split(x, paste("Receipts -", x$year)))()
- ```
-
- ```{r create-sheets}
- sheets <- list(
- "Donation Summary" = total_donations,
- "Donors" = donor_type_amounts_lcl,
- "Committees" = committees_lcl,
- "Candidates" = candidates_lcl,
- "Report List" = report_list_lcl
- )
-
- sheets <- c(sheets, receipts_split)
- ```
-
- ```{r preview-sheets}
- sheets
- ```
-
- ```{r write-sheets}
- writexl::write_xlsx(sheets, "donors_2020_2022.xlsx")
- ```
|