| @@ -0,0 +1,11 @@ | |||
| prepare_committees <- function(cover_raw, report_list = tar_read(report_list)) { | |||
| latest_report_by_committee <- report_list |> slice_max(end_date, by = "sboe_id") | |||
| cover_raw |> | |||
| semi_join(latest_report_by_committee, by = "report_id") |> | |||
| select(sboe_id:committee_type, fund_type, fund_name) | |||
| } | |||
| write_committee_parquet <- function(committees, out_path = "../data/committees.parquet") { | |||
| arrow::write_parquet(committees, out_path) | |||
| } | |||
| @@ -13,7 +13,7 @@ process_report_dates <- function(report_list_raw, cover_raw) { | |||
| select( | |||
| report_id, sboe_id, year, doc_name, amended, | |||
| contains("received_"), | |||
| matches("sboe_(start|end)_date") | |||
| matches("(sboe_)?(start|end)_date") | |||
| ) |> | |||
| left_join(cover_dates, by = "report_id") |> | |||
| mutate(across(matches("received|date"), na_if_obviously_wrong_date)) |> | |||
| @@ -1,14 +0,0 @@ | |||
| write_committee_parquet <- function(report_list = tar_read(report_list)) { | |||
| cover_path <- here::here("../data/cover") | |||
| cover <- arrow::open_dataset(cover_path, partitioning = "sboe_id") | |||
| latest_report_by_committee <- report_list |> slice_max(end_date, by = "sboe_id") | |||
| committees <- | |||
| cover |> | |||
| semi_join(latest_report_by_committee, by = "report_id") |> | |||
| collect() |> | |||
| select(sboe_id:committee_type, fund_type, fund_name) | |||
| arrow::write_parquet(committees, here::here("data/committees/part-0.parquet")) | |||
| } | |||
| @@ -82,5 +82,7 @@ list( | |||
| tar_target( | |||
| report_list, | |||
| process_report_list(report_list_raw, report_amended_score) | |||
| ) | |||
| ), | |||
| tar_target(committees, prepare_committees(cover_raw, report_list)) | |||
| ) | |||