You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

58 lines
1.4KB

  1. out_cover <- function(
  2. path_data_prep_cover,
  3. path_out_report_list
  4. ) {
  5. lg_info_target(lg_get_logger())
  6. out <- path("data-out", "cover", "cover.parquet")
  7. dir_create(path_dir(out))
  8. report_list <- out_open_dataset_db(path_out_report_list)
  9. cover <-
  10. prep_open_dataset_db(path_data_prep_cover) |>
  11. semi_join(report_list, by = "report_id") |>
  12. collect()
  13. arrow::write_parquet(cover, out)
  14. dirname(out)
  15. }
  16. out_committees <- function(path_out_cover, path_out_report_list) {
  17. out <- path("data-out", "committees", "committees.parquet")
  18. dir_create(path_dir(out))
  19. cover <- out_open_dataset_db(path_out_cover)
  20. report_list <- out_open_dataset_db(path_out_report_list)
  21. committee_name_address <-
  22. cover |>
  23. inner_join(
  24. report_list |> select(report_id, doc_order),
  25. by = "report_id"
  26. ) |>
  27. group_by(sboe_id) |>
  28. slice_max(doc_order, n = 1) |>
  29. ungroup() |>
  30. select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |>
  31. add_address_lookup(postal_code = zip_code)
  32. committee_type <-
  33. cover |>
  34. group_by(sboe_id) |>
  35. count(committee_type, fund_type, fund_name) |>
  36. slice_max(n, n = 1) |>
  37. ungroup() |>
  38. select(-n)
  39. committees <-
  40. committee_name_address |>
  41. left_join(committee_type, by = "sboe_id") |>
  42. collect()
  43. arrow::write_parquet(committees, out)
  44. dirname(out)
  45. }