You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

273 lines
7.5KB

  1. ---
  2. title: "Republican Donors, 2020-2022"
  3. author: Garrick Aden-Buie
  4. format: html
  5. execute:
  6. echo: true
  7. ---
  8. ## Task
  9. > I’m working on some donor data and I’m wondering if we may have it already in
  10. > what we’re doing… I’m trying to pull all disclosed donors ($50+) that gave to
  11. > Republican campaigns in ’20 and ’22. Even if we couldn’t specify party of
  12. > campaign but I could sort it, that would even be helpful. Is that something we
  13. > have already?
  14. ## Setup
  15. ```{r setup}
  16. library(tidyverse)
  17. library(fs)
  18. library(zoomerjoin)
  19. pkgload::load_all(here::here("process"))
  20. report_dates <- prep_open_dataset_db("report_dates")
  21. officers <- prep_open_dataset_db("officers")
  22. receipts <- prep_open_dataset_db("receipts")
  23. candidate_listing <- prep_open_dataset_db("candidate_listing")
  24. cover <- prep_open_dataset_db("cover")
  25. report_list_local <- targets::tar_read(report_list, store = here::here("process", "_targets"))
  26. copy_to(duckdb_global_con(), report_list_local, "report_list")
  27. report_list <- tbl(duckdb_global_con(), "report_list")
  28. con <- DBI::dbConnect(RSQLite::SQLite(), here::here("data-prep", "address_lookup.sqlite"))
  29. address_db <- tbl(con, "resolved")
  30. ```
  31. ## Reports
  32. 1. Reports from 2020 and 2022
  33. 1. Where an individual donated more than $50
  34. 1. Receipt types that match individual donors and not clerical records
  35. ```{r receipt-types}
  36. receipts |>
  37. count(receipt_type_desc, receipt_type_code, sort = TRUE) |>
  38. print(n = 20)
  39. ```
  40. ```{r reports-first-pass}
  41. report_list_2020_2022 <-
  42. report_list |>
  43. filter(year %in% c(2020, 2022))
  44. receipts_2020_2022_src <-
  45. receipts |>
  46. semi_join(report_list_2020_2022, by = "report_id") |>
  47. mutate(donor_name = toupper(org_name)) |>
  48. filter(
  49. # Keep individual/party donors; drop record keeping things, like refunds
  50. receipt_type_code %in% c("IND", "CPCM", "GEN", "PPTY", "OUTS", "NFPC"),
  51. !donor_name %in% c("AGGREGATED INDIVIDUAL CONTRIBUTION", "VARIOUS VARIOUS")
  52. ) |>
  53. add_address_lookup(name = "donor_address") |>
  54. select(sboe_id, report_id, donor_name, donor_address, amount, profession, employers_name, form_of_payment_desc, everything())
  55. total_donations_per_committee <-
  56. receipts_2020_2022_src |>
  57. group_by(sboe_id, donor_name, donor_address) |>
  58. summarize(total = sum(amount), .groups = "drop") |>
  59. filter(total >= 50)
  60. receipts_2020_2022 <-
  61. receipts_2020_2022_src |>
  62. semi_join(total_donations_per_committee, by = c("donor_name", "donor_address"))
  63. ```
  64. ## Donors
  65. ```{r donors}
  66. donors <-
  67. receipts_2020_2022 |>
  68. distinct(donor_name, donor_address) |>
  69. semi_join(total_donations_per_committee, by = c("donor_name", "donor_address")) |>
  70. dbplyr::window_order(donor_name) |>
  71. mutate(donor_id = row_number(), .before = 1)
  72. donor_receipt_types <-
  73. receipts_2020_2022 |>
  74. left_join(donors, by = c("donor_name", "donor_address")) |>
  75. group_by(donor_id, receipt_type_desc) |>
  76. summarize(n_records = n(), total = sum(amount), .groups = "drop")
  77. donor_type <-
  78. donor_receipt_types |>
  79. slice_max(total, by = donor_id, n = 1, with_ties = FALSE) |>
  80. select(donor_id, donor_type = receipt_type_desc)
  81. ```
  82. ## Add Committee Information
  83. ```{r committee-info}
  84. cover_2020_2022 <-
  85. cover |>
  86. semi_join(report_list_2020_2022, by = "report_id") |>
  87. left_join(report_dates |> select(report_id, sboe_end_date))
  88. committee_names <-
  89. cover_2020_2022 |>
  90. mutate(committee_name = toupper(committee_name)) |>
  91. filter(!is.na(committee_name)) |>
  92. add_address_lookup(street_1, city, state, zip_code, name = "committee_address") |>
  93. group_by(sboe_id) |>
  94. slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
  95. select(sboe_id, committee_name, committee_address)
  96. committee_types <-
  97. cover_2020_2022 |>
  98. filter(!is.na(committee_type)) |>
  99. group_by(sboe_id) |>
  100. slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
  101. ungroup() |>
  102. select(sboe_id, committee_type)
  103. committees <-
  104. committee_names |>
  105. left_join(committee_types, by = "sboe_id")
  106. ```
  107. ## Candidates
  108. ```{r candidate}
  109. candidates <-
  110. officers |>
  111. filter(type == "Candidate") |>
  112. semi_join(report_list_2020_2022, by = "report_id") |>
  113. left_join(report_dates |> select(report_id, sboe_end_date)) |>
  114. group_by(sboe_id) |>
  115. slice_max(sboe_end_date, n = 1, with_ties = FALSE) |>
  116. ungroup() |>
  117. select(sboe_id, candidate_name = name, candidate_address = address, candidate_phone = phone)
  118. ```
  119. ## Candidate Affiliation
  120. ```{r candidate-affilitaions}
  121. candidate_listing <- targets::tar_read(candidate_listing, store = here::here("process", "_targets"))
  122. candidate_2020_2022 <-
  123. candidate_listing |>
  124. filter(
  125. between(year(election_dt), 2020, 2022),
  126. name_on_ballot != "No Preference",
  127. party_candidate %in% c("REP", "DEM")
  128. )
  129. candidate_affiliation <-
  130. candidate_2020_2022 |>
  131. mutate(
  132. across(c(first_name, middle_name, last_name), ~ if_else(is.na(.), "", .)),
  133. candidate_name = paste(first_name, middle_name, last_name),
  134. candidate_name = gsub(" +", " ", candidate_name)
  135. ) |>
  136. distinct(election_dt, candidate_name, party_candidate) |>
  137. group_by(candidate_name, party_candidate) |>
  138. slice_max(election_dt, n = 1, with_ties = FALSE) |>
  139. ungroup() |>
  140. select(-election_dt)
  141. ```
  142. ## Summary
  143. ```{r collect-data}
  144. total_donations_per_committee_lcl <- total_donations_per_committee |> collect()
  145. donors_lcl <-
  146. donors |>
  147. left_join(donor_type, by = "donor_id") |>
  148. collect()
  149. donor_type_amounts_lcl <-
  150. donor_receipt_types |>
  151. collect() |>
  152. left_join(donors_lcl, by = "donor_id") |>
  153. select(-donor_id, -n_records) |>
  154. relocate(donor_name, donor_address, donor_type) |>
  155. rename(name = receipt_type_desc, value = total) |>
  156. mutate(name = fct_reorder(name, value, .fun = sum, .desc = TRUE)) |>
  157. pivot_wider(values_fill = 0)
  158. committees_lcl <- committees |> collect()
  159. candidates_lcl <-
  160. candidates |>
  161. collect() |>
  162. replace_na(list(candidate_name = "")) |>
  163. mutate(
  164. candidate_name = gsub('"[^"]+"', "", candidate_name),
  165. candidate_name = gsub("\\(.+\\)?$", "", candidate_name),
  166. candidate_name = stringr::str_squish(candidate_name),
  167. candidate_name = toupper(candidate_name)
  168. ) |>
  169. jaccard_left_join(candidate_affiliation, by = "candidate_name", threshold = 0.8) |>
  170. rename(
  171. candidate_name_sboe = candidate_name.x,
  172. candidate_name_list = candidate_name.y,
  173. candidate_party = party_candidate
  174. ) |>
  175. group_by(sboe_id, candidate_name_sboe) |>
  176. slice(1) |>
  177. ungroup()
  178. total_donations <-
  179. total_donations_per_committee_lcl |>
  180. left_join(
  181. donors_lcl |> select(-donor_id),
  182. by = c("donor_name", "donor_address")
  183. ) |>
  184. left_join(committees_lcl, by = "sboe_id") |>
  185. left_join(candidates_lcl, by = "sboe_id") |>
  186. group_by(sboe_id) |>
  187. mutate(total_committee = sum(total)) |>
  188. ungroup() |>
  189. arrange(desc(total_committee), desc(total))
  190. ```
  191. ## Output
  192. ```{r prepare-receipts}
  193. report_list_lcl <-
  194. report_list_2020_2022 |>
  195. select(year:amended, sboe_start_date, sboe_end_date, contains("received_")) |>
  196. collect()
  197. receipts_2020_2022_lcl <-
  198. receipts_2020_2022 |>
  199. collect() |>
  200. semi_join(total_donations, by = c("sboe_id", "donor_name", "donor_address")) |>
  201. inner_join(
  202. report_list_lcl |> select(year:report_id),
  203. y = _,
  204. by = c("sboe_id", "report_id")
  205. )
  206. receipts_split <-
  207. receipts_2020_2022_lcl |>
  208. arrange(year, doc_name, report_id) |>
  209. (\(x) split(x, paste("Receipts -", x$year)))()
  210. ```
  211. ```{r create-sheets}
  212. sheets <- list(
  213. "Donation Summary" = total_donations,
  214. "Donors" = donor_type_amounts_lcl,
  215. "Committees" = committees_lcl,
  216. "Candidates" = candidates_lcl,
  217. "Report List" = report_list_lcl
  218. )
  219. sheets <- c(sheets, receipts_split)
  220. ```
  221. ```{r preview-sheets}
  222. sheets
  223. ```
  224. ```{r write-sheets}
  225. writexl::write_xlsx(sheets, "donors_2020_2022.xlsx")
  226. ```