|
- out_cover <- function(
- path_data_prep_cover,
- path_out_report_list
- ) {
- lg_info_target(lg_get_logger())
-
- out <- path("data-out", "cover", "cover.parquet")
- dir_create(path_dir(out))
-
- report_list <- out_open_dataset_db(path_out_report_list)
-
- cover <-
- prep_open_dataset_db(path_data_prep_cover) |>
- semi_join(report_list, by = "report_id") |>
- collect()
-
- arrow::write_parquet(cover, out)
-
- dirname(out)
- }
-
- out_committees <- function(path_out_cover, path_out_report_list) {
- out <- path("data-out", "committees", "committees.parquet")
- dir_create(path_dir(out))
-
- cover <- out_open_dataset_db(path_out_cover)
- report_list <- out_open_dataset_db(path_out_report_list)
-
- committee_name_address <-
- cover |>
- inner_join(
- report_list |> select(report_id, year, doc_order),
- by = "report_id"
- ) |>
- group_by(sboe_id) |>
- slice_max(year, n = 1) |>
- slice_max(doc_order, n = 1) |>
- slice_max(date_filed, n = 1, with_ties = FALSE) |>
- ungroup() |>
- select(sboe_id, committee_name, report_id, street_1, street_2, city, state, zip_code) |>
- add_address_lookup(postal_code = zip_code) |>
- collect()
-
- committee_type <-
- cover |>
- group_by(sboe_id) |>
- count(committee_type, fund_type, fund_name) |>
- slice_max(n, n = 1, with_ties = FALSE) |>
- ungroup() |>
- select(-n) |>
- collect()
-
- committees <-
- committee_name_address |>
- left_join(committee_type, by = join_by(sboe_id), relationship = "one-to-one")
-
- arrow::write_parquet(committees, out)
-
- dirname(out)
- }
|