|
- # Created by use_targets().
- # Follow the comments below to fill in this target script.
- # Then follow the manual to check and run the pipeline:
- # https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
-
- # Load packages required to define the pipeline:
- library(targets)
-
- # Set target options:
- tar_option_set(
- packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]],
- # For distributed computing in tar_make(), supply a {crew} controller
- # as discussed at https://books.ropensci.org/targets/crew.html.
- controller = crew::crew_controller_local(workers = 24),
- error = "null"
- # debug = "parquet_report_cover_path_e8fc956a"
- )
-
- # Run the R scripts in the R/ folder with your custom functions:
- tar_source()
-
- # Run this manually to "transfer" updates from this step to the "process" step
- if (FALSE) {
- tar_read(report_list) |>
- arrange(report_id) |>
- write_csv("data-raw/report_list.csv")
- }
-
- list(
- tar_target(year, 2016:2023),
- tar_target(report, c("MY", "YE", "Q1", "Q2", "Q3", "Q4")),
- # invalidate the doc_list target to restart this pipeline
- # tar_invalidate(doc_list)
- tar_target(
- doc_list,
- get_report_by_year_scrape(year, report),
- pattern = cross(year, report)
- ),
- tar_target(
- committees,
- doc_list |>
- # this is the list of committees and years reported
- # if `n_*` changes, we have to go re-read that year
- group_by(committee_name, sboe_id, year) |>
- summarize(
- n_reports = n(),
- n_amended = sum(amended),
- n_docs = sum(!is.na(report_id)),
- ) |>
- arrange(sboe_id)
- ),
- tar_target(
- report_list,
- doc_list |>
- filter(!is.na(report_id)) |>
- mutate(received = coalesce(received_data, received_image)) |>
- slice_max(received, by = report_id) |>
- group_by(sboe_id) |>
- tar_group(),
- iteration = "group"
- ),
- tar_target(
- reports_raw_path,
- pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
- save_raw_report_all(report_id, sboe_id, received)
- }),
- pattern = map(report_list),
- iteration = "vector"
- ),
- # Gets the JSON version of the report's receipts via an internal API call,
- # that is processed into a standard table before saving as CSV.
- tar_target(
- report_receipts_csv_path,
- pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
- save_report_receipts_csv(report_id, sboe_id, received)
- }),
- pattern = map(report_list),
- iteration = "vector"
- ),
- # Gets the JSON version of the report's expenses via an internal API call,
- # that is processed into a standard table before saving as CSV.
- tar_target(
- report_expenditures_csv_path,
- pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
- save_report_expenditures_csv(report_id, sboe_id, received)
- }),
- pattern = map(report_list),
- iteration = "vector"
- )
- )
|