|
- # Created by use_targets().
- # Follow the comments below to fill in this target script.
- # Then follow the manual to check and run the pipeline:
- # https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
-
- # Load packages required to define the pipeline:
- library(targets)
-
- # Set target options:
- tar_option_set(
- packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]],
- # For distributed computing in tar_make(), supply a {crew} controller
- # as discussed at https://books.ropensci.org/targets/crew.html.
- controller = crew::crew_controller_local(workers = 24),
- # debug = "path_out_addresses",
- # cue = tar_cue(mode = "never"),
- error = "null"
- )
-
- # Run the R scripts in the R/ folder with your custom functions:
- tar_source()
-
- # Replace the target list below with your own:
- list(
- tar_target(path_report_list_csv, "../data-raw/report_list.csv", format = "file"),
- tar_target(path_report_list_raw, prepare_report_list(path_report_list_csv)),
- tar_target(report_list_raw, arrow::read_parquet(path_report_list_raw)),
-
- tar_target(
- dirs_all_src,
- fs::dir_ls("../data-raw/reports", glob = "**/all", recurse = TRUE, type = "directory"),
- format = "file"
- ),
- # This comes from Will's answer in https://stackoverflow.com/a/70293576
- # We're basically tricking targets into letting us branch over a file target
- tar_target(dirs_all_names, dirs_all_src),
- tar_target(dirs_all, {dirs_all_src; dirs_all_names}, pattern = map(dirs_all_names), format = "file"),
-
- tar_target(
- dirs_receipts_src,
- fs::dir_ls("../data-raw/reports", glob = "**/receipts", recurse = TRUE, type = "directory"),
- format = "file"
- ),
- tar_target(dirs_receipts_names, dirs_receipts_src),
- tar_target(dirs_receipts, {dirs_receipts_src; dirs_receipts_names}, pattern = map(dirs_receipts_names), format = "file"),
-
- tar_target(
- dirs_expenditures_src,
- fs::dir_ls("../data-raw/reports", glob = "**/expenditures", recurse = TRUE, type = "directory"),
- format = "file"
- ),
- tar_target(dirs_expenditures_names, dirs_expenditures_src),
- tar_target(dirs_expenditures, {dirs_expenditures_src; dirs_expenditures_names}, pattern = map(dirs_expenditures_names), format = "file"),
-
- tar_target(
- paths_all_parquet,
- write_prepared_report_export(dirs_all, report_list_raw),
- pattern = map(dirs_all),
- format = "file"
- ),
-
- tar_target(
- path_receipts_parquet,
- write_prepared_receipts_parquet(dirs_receipts, report_list_raw),
- pattern = map(dirs_receipts),
- format = "file"
- ),
-
- tar_target(
- path_expenditures_parquet,
- write_prepared_expenditures_parquet(dirs_expenditures, report_list_raw),
- pattern = map(dirs_expenditures),
- format = "file"
- ),
-
- tar_target(path_data_prep_cover, { paths_all_parquet; "../data-prep/cover" }, format = "file"),
- tar_target(path_data_prep_officers, { paths_all_parquet; "../data-prep/officers" }, format = "file"),
- tar_target(path_data_prep_receipts, { paths_all_parquet; "../data-prep/receipts" }, format = "file"),
- tar_target(path_data_prep_expenditures, { paths_all_parquet; "../data-prep/expenditures" }, format = "file"),
-
-
- tar_target(
- cover_raw,
- arrow::open_dataset(path_data_prep_cover, partitioning = "sboe_id") |> dplyr::collect()
- ),
-
- tar_target(
- report_dates,
- process_report_dates(report_list_raw, cover_raw)
- ),
- tar_target(
- path_report_dates, {
- out_path <- "../data-prep/report_dates/part-0.parquet"
- fs::dir_create(fs::path_dir(out_path))
- arrow::write_parquet(report_dates, out_path)
- }),
-
- tar_target(
- report_amended_score,
- calc_report_amended_score(report_dates)
- ),
-
- tar_target(
- addresses_raw,
- prep_collect_addresses_raw(
- path_officers = path_data_prep_officers,
- path_receipts = path_data_prep_receipts,
- path_expenditures = path_data_prep_expenditures,
- path_candidate_listing = path_candidate_listing,
- path_voters = NULL # path_voters_parquet
- ),
- format = "parquet"
- ),
-
- tar_target(
- path_addresses_db,
- prepare_addresses_lookup_db(addresses_raw$address),
- format = "file"
- ),
-
- # This report list uses the latest amended report -----
- tar_target(
- report_list,
- process_report_list(report_list_raw, report_amended_score)
- ),
-
- tar_target(committees, prepare_committees(cover_raw, report_list)),
-
- tar_target(candidates, prepare_candidates(path_data_prep_officers, report_list)),
-
- # Outside data sources -----
- tar_target(candidate_listing, get_candidate_listing(2016:2023)),
- tar_target(
- path_candidate_listing,
- write_parquet(candidate_listing, "../data-prep/candidate_listing/part-0.parquet"),
- format = "file"
- ),
-
- ## Voter registration records
- tar_target(path_voters_txt, voter_statewide_download(), cue = tar_cue("never")), #<< invalidate to get latest
- tar_target(
- path_voters_parquet,
- voter_statewide_convert_parquet(path_voters_txt),
- cue = tar_cue("never"),
- format = "file"
- ),
-
-
- # Output ------------------------------------------------------------------
- tar_target(
- path_out_addresses,
- # This needs to be run manually, otherwise it doesn't run in {targets}
- # and throws an error: `bad value`
- out_addresses(path_addresses_db, "data/addresses.parquet"),
- cue = tar_cue("never"),
- format = "file"
- )
-
- )
|