Quellcode durchsuchen

repub donors

main
Garrick Aden-Buie vor 2 Jahren
Ursprung
Commit
139d69eea7
Es konnte kein GPG-Schlüssel zu dieser Signatur gefunden werden
1 geänderte Dateien mit 272 neuen und 0 gelöschten Zeilen
  1. +272
    -0
      reports/2023-11-14_repub-donors/republican-donors-2020-2022.qmd

+ 272
- 0
reports/2023-11-14_repub-donors/republican-donors-2020-2022.qmd Datei anzeigen

@@ -0,0 +1,272 @@
---
title: "Republican Donors, 2020-2022"
author: Garrick Aden-Buie
format: html

execute:
echo: true
---

## Task

> I’m working on some donor data and I’m wondering if we may have it already in
> what we’re doing… I’m trying to pull all disclosed donors ($50+) that gave to
> Republican campaigns in ’20 and ’22. Even if we couldn’t specify party of
> campaign but I could sort it, that would even be helpful. Is that something we
> have already?

## Setup

```{r setup}
library(tidyverse)
library(fs)
library(zoomerjoin)
pkgload::load_all(here::here("process"))

report_dates <- prep_open_dataset_db("report_dates")
officers <- prep_open_dataset_db("officers")
receipts <- prep_open_dataset_db("receipts")
candidate_listing <- prep_open_dataset_db("candidate_listing")
cover <- prep_open_dataset_db("cover")

report_list_local <- targets::tar_read(report_list, store = here::here("process", "_targets"))
copy_to(duckdb_global_con(), report_list_local, "report_list")
report_list <- tbl(duckdb_global_con(), "report_list")

con <- DBI::dbConnect(RSQLite::SQLite(), here::here("data-prep", "address_lookup.sqlite"))
address_db <- tbl(con, "resolved")
```

## Reports

1. Reports from 2020 and 2022
1. Where an individual donated more than $50
1. Receipt types that match individual donors and not clerical records

```{r receipt-types}
receipts |>
count(receipt_type_desc, receipt_type_code, sort = TRUE) |>
print(n = 20)
```

```{r reports-first-pass}
report_list_2020_2022 <-
report_list |>
filter(year %in% c(2020, 2022))

receipts_2020_2022_src <-
receipts |>
semi_join(report_list_2020_2022, by = "report_id") |>
mutate(donor_name = toupper(org_name)) |>
filter(
# Keep individual/party donors; drop record keeping things, like refunds
receipt_type_code %in% c("IND", "CPCM", "GEN", "PPTY", "OUTS", "NFPC"),
!donor_name %in% c("AGGREGATED INDIVIDUAL CONTRIBUTION", "VARIOUS VARIOUS")
) |>
add_address_lookup(name = "donor_address") |>
select(sboe_id, report_id, donor_name, donor_address, amount, profession, employers_name, form_of_payment_desc, everything())

total_donations_per_committee <-
receipts_2020_2022_src |>
group_by(sboe_id, donor_name, donor_address) |>
summarize(total = sum(amount), .groups = "drop") |>
filter(total >= 50)

receipts_2020_2022 <-
receipts_2020_2022_src |>
semi_join(total_donations_per_committee, by = c("donor_name", "donor_address"))
```

## Donors

```{r donors}
donors <-
receipts_2020_2022 |>
distinct(donor_name, donor_address) |>
semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) |>
dbplyr::window_order(donor_name) |>
mutate(donor_id = row_number(), .before = 1)

donor_receipt_types <-
receipts_2020_2022 |>
left_join(donors, by = c("donor_name", "donor_address")) |>
group_by(donor_id, receipt_type_desc) |>
summarize(n_records = n(), total = sum(amount), .groups = "drop")

donor_type <-
donor_receipt_types |>
slice_max(total, by = donor_id, n = 1, with_ties = FALSE) |>
select(donor_id, donor_type = receipt_type_desc)
```

## Add Committee Information

```{r committee-info}
cover_2020_2022 <-
cover |>
semi_join(report_list_2020_2022, by = "report_id") |>
left_join(report_dates |> select(report_id, sboe_end_date))

committee_names <-
cover_2020_2022 |>
mutate(committee_name = toupper(committee_name)) |>
filter(!is.na(committee_name)) |>
add_address_lookup(street_1, city, state, zip_code, name = "committee_address") |>
group_by(sboe_id) |>
slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
select(sboe_id, committee_name, committee_address)

committee_types <-
cover_2020_2022 |>
filter(!is.na(committee_type)) |>
group_by(sboe_id) |>
slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
ungroup() |>
select(sboe_id, committee_type)

committees <-
committee_names |>
left_join(committee_types, by = "sboe_id")
```

## Candidates

```{r candidate}
candidates <-
officers |>
filter(type == "Candidate") |>
semi_join(report_list_2020_2022, by = "report_id") |>
left_join(report_dates |> select(report_id, sboe_end_date)) |>
group_by(sboe_id) |>
slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
ungroup() |>
select(sboe_id, candidate_name = name, candidate_address = address, candidate_phone = phone)
```

## Candidate Affiliation

```{r candidate-affilitaions}
candidate_listing <- targets::tar_read(candidate_listing, store = here::here("process", "_targets"))

candidate_2020_2022 <-
candidate_listing |>
filter(
between(year(election_dt), 2020, 2022),
name_on_ballot != "No Preference",
party_candidate %in% c("REP", "DEM")
)

candidate_affiliation <-
candidate_2020_2022 |>
mutate(
across(c(first_name, middle_name, last_name), ~ if_else(is.na(.), "", .)),
candidate_name = paste(first_name, middle_name, last_name),
candidate_name = gsub(" +", " ", candidate_name)
) |>
distinct(election_dt, candidate_name, party_candidate) |>
group_by(candidate_name, party_candidate) |>
slice_max(election_dt, n = 1, with_ties = FALSE) |>
ungroup() |>
select(-election_dt)
```


## Summary

```{r collect-data}
total_donations_per_committee_lcl <- total_donations_per_committee |> collect()

donors_lcl <-
donors |>
left_join(donor_type, by = "donor_id") |>
collect()

donor_type_amounts_lcl <-
donor_receipt_types |>
collect() |>
left_join(donors_lcl, by = "donor_id") |>
select(-donor_id, -n_records) |>
relocate(donor_name, donor_address, donor_type) |>
rename(name = receipt_type_desc, value = total) |>
mutate(name = fct_reorder(name, value, .fun = sum, .desc = TRUE)) |>
pivot_wider(values_fill = 0)

committees_lcl <- committees |> collect()

candidates_lcl <-
candidates |>
collect() |>
replace_na(list(candidate_name = "")) |>
mutate(
candidate_name = gsub('"[^"]+"', "", candidate_name),
candidate_name = gsub("\\(.+\\)?$", "", candidate_name),
candidate_name = stringr::str_squish(candidate_name),
candidate_name = toupper(candidate_name)
) |>
jaccard_left_join(candidate_affiliation, by = "candidate_name", threshold = 0.8) |>
rename(
candidate_name_sboe = candidate_name.x,
candidate_name_list = candidate_name.y,
candidate_party = party_candidate
) |>
group_by(sboe_id, candidate_name_sboe) |>
slice(1) |>
ungroup()

total_donations <-
total_donations_per_committee_lcl |>
left_join(
donors_lcl |> select(-donor_id),
by = c("donor_name", "donor_address")
) |>
left_join(committees_lcl, by = "sboe_id") |>
left_join(candidates_lcl, by = "sboe_id") |>
group_by(sboe_id) |>
mutate(total_committee = sum(total)) |>
ungroup() |>
arrange(desc(total_committee), desc(total))
```

## Output

```{r prepare-receipts}
report_list_lcl <-
report_list_2020_2022 |>
select(year:amended, sboe_start_date, sboe_end_date, contains("received_")) |>
collect()

receipts_2020_2022_lcl <-
receipts_2020_2022 |>
collect() |>
semi_join(total_donations, by = c("sboe_id", "donor_name", "donor_address")) |>
inner_join(
report_list_lcl |> select(year:report_id),
y = _,
by = c("sboe_id", "report_id")
)

receipts_split <-
receipts_2020_2022_lcl |>
arrange(year, doc_name, report_id) |>
(\(x) split(x, paste("Receipts -", x$year)))()
```

```{r create-sheets}
sheets <- list(
"Donation Summary" = total_donations,
"Donors" = donor_type_amounts_lcl,
"Committees" = committees_lcl,
"Candidates" = candidates_lcl,
"Report List" = report_list_lcl
)

sheets <- c(sheets, receipts_split)
```

```{r preview-sheets}
sheets
```

```{r write-sheets}
writexl::write_xlsx(sheets, "donors_2020_2022.xlsx")
```

Laden…
Abbrechen
Speichern