You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 line
1.5KB

  1. out_cover <- function(
  2. path_data_prep_cover,
  3. path_out_report_list
  4. ) {
  5. lg_info_target(lg_get_logger())
  6. out <- path("data-out", "cover", "cover.parquet")
  7. dir_create(path_dir(out))
  8. report_list <- out_open_dataset_db(path_out_report_list)
  9. cover <-
  10. prep_open_dataset_db(path_data_prep_cover) |>
  11. semi_join(report_list, by = "report_id") |>
  12. collect()
  13. arrow::write_parquet(cover, out)
  14. dirname(out)
  15. }
  16. out_committees <- function(path_out_cover, path_out_report_list) {
  17. out <- path("data-out", "committees", "committees.parquet")
  18. dir_create(path_dir(out))
  19. cover <- out_open_dataset_db(path_out_cover)
  20. report_list <- out_open_dataset_db(path_out_report_list)
  21. committee_name_address <-
  22. cover |>
  23. inner_join(
  24. report_list |> select(report_id, year, doc_order),
  25. by = "report_id"
  26. ) |>
  27. group_by(sboe_id) |>
  28. slice_max(year, n = 1) |>
  29. slice_max(doc_order, n = 1) |>
  30. slice_max(date_filed, n = 1, with_ties = FALSE) |>
  31. ungroup() |>
  32. select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |>
  33. add_address_lookup(postal_code = zip_code) |>
  34. collect()
  35. committee_type <-
  36. cover |>
  37. group_by(sboe_id) |>
  38. count(committee_type, fund_type, fund_name) |>
  39. slice_max(n, n = 1, with_ties = FALSE) |>
  40. ungroup() |>
  41. select(-n) |>
  42. collect()
  43. committees <-
  44. committee_name_address |>
  45. left_join(committee_type, by = join_by(sboe_id), relationship = "one-to-one")
  46. arrow::write_parquet(committees, out)
  47. dirname(out)
  48. }