# Created by use_targets(). # Follow the comments below to fill in this target script. # Then follow the manual to check and run the pipeline: # https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline # Load packages required to define the pipeline: library(targets) # Set target options: tar_option_set( packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]], # For distributed computing in tar_make(), supply a {crew} controller # as discussed at https://books.ropensci.org/targets/crew.html. controller = crew::crew_controller_local(workers = 12), error = "null" # debug = "parquet_report_cover_path_e8fc956a" ) # Run the R scripts in the R/ folder with your custom functions: tar_source() # Replace the target list below with your own: list( tar_target(year, 2016:2023), tar_target(report, c("MY", "YE", "Q1", "Q2", "Q3", "Q4")), tar_target( doc_list, get_report_by_year_scrape(year, report), pattern = cross(year, report) ), tar_target( committees, doc_list |> # this is the list of committees and years reported # if `n_*` changes, we have to go re-read that year group_by(committee_name, sboe_id, year) |> summarize( n_reports = n(), n_amended = sum(amended), n_docs = sum(!is.na(report_id)), ) |> arrange(sboe_id) ), tar_target( report_list, doc_list |> filter(!is.na(report_id)) |> mutate(received = coalesce(received_data, received_image)) |> slice_max(received, by = report_id) ), tar_target(report_list_report_id, report_list$report_id), tar_target(report_list_sboe_id, report_list$sboe_id), tar_target(report_list_received, report_list$received), tar_target( reports_raw_path, save_raw_report_all( report_list_report_id, report_list_sboe_id, report_list_received ), pattern = map( report_list_report_id, report_list_sboe_id, report_list_received ), format = "file_fast" ), # Gets the JSON version of the report's receipts via an internal API call, # that is processed into a standard table before saving as CSV. tar_target( report_receipts_csv_path, save_report_receipts_csv( report_list_report_id, report_list_sboe_id, report_list_received ), pattern = map( report_list_report_id, report_list_sboe_id, report_list_received ), format = "file_fast" ), # Gets the JSON version of the report's expenses via an internal API call, # that is processed into a standard table before saving as CSV. tar_target( report_expenditures_csv_path, save_report_expenditures_csv( report_list_report_id, report_list_sboe_id, report_list_received ), pattern = map( report_list_report_id, report_list_sboe_id, report_list_received ), format = "file_fast" ), tar_target( dirs_all, fs::dir_ls("data-raw/reports", glob = "**/all", recurse = TRUE, type = "directory") ), tar_target( dirs_receipts, fs::dir_ls("data-raw/reports", glob = "**/receipts", recurse = TRUE, type = "directory") ), tar_target( dirs_expenditures, fs::dir_ls("data-raw/reports", glob = "**/expenditures", recurse = TRUE, type = "directory") ) # tar_target( # parquet_report_cover_path, # write_reports_by_sboe_id(report_list_sboe_id), # pattern = map(unique(report_list_sboe_id)), # format = "file_fast" # ) )