# Created by use_targets(). # Follow the comments below to fill in this target script. # Then follow the manual to check and run the pipeline: # https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline # Load packages required to define the pipeline: library(targets) # Set target options: tar_option_set( packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]], # For distributed computing in tar_make(), supply a {crew} controller # as discussed at https://books.ropensci.org/targets/crew.html. controller = crew::crew_controller_local(workers = 24), # debug = "path_receipts_parquet_8d195f7e", # cue = tar_cue(mode = "never") error = "null" ) # Run the R scripts in the R/ folder with your custom functions: tar_source() # Replace the target list below with your own: list( tar_target(path_report_list_csv, "../data-raw/report_list.csv", format = "file"), tar_target(path_report_list_raw, prepare_report_list(path_report_list_csv)), tar_target(report_list_raw, arrow::read_parquet(path_report_list_raw)), tar_target( dirs_all_src, fs::dir_ls("../data-raw/reports", glob = "**/all", recurse = TRUE, type = "directory"), format = "file" ), # This comes from Will's answer in https://stackoverflow.com/a/70293576 # We're basically tricking targets into letting us branch over a file target tar_target(dirs_all_names, dirs_all_src), tar_target(dirs_all, {dirs_all_src; dirs_all_names}, pattern = map(dirs_all_names), format = "file"), tar_target( dirs_receipts_src, fs::dir_ls("../data-raw/reports", glob = "**/receipts", recurse = TRUE, type = "directory"), format = "file" ), tar_target(dirs_receipts_names, dirs_receipts_src), tar_target(dirs_receipts, {dirs_receipts_src; dirs_receipts_names}, pattern = map(dirs_receipts_names), format = "file"), tar_target( dirs_expenditures_src, fs::dir_ls("../data-raw/reports", glob = "**/expenditures", recurse = TRUE, type = "directory"), format = "file" ), tar_target(dirs_expenditures_names, dirs_expenditures_src), tar_target(dirs_expenditures, {dirs_expenditures_src; dirs_expenditures_names}, pattern = map(dirs_expenditures_names), format = "file"), tar_target( paths_all_parquet, write_prepared_report_export(dirs_all, report_list_raw), pattern = map(dirs_all), format = "file" ), tar_target( path_receipts_parquet, write_prepared_receipts_parquet(dirs_receipts, report_list_raw), pattern = map(dirs_receipts), format = "file" ), tar_target( path_expenditures_parquet, write_prepared_expenditures_parquet(dirs_expenditures, report_list_raw), pattern = map(dirs_expenditures), format = "file" ), tar_target(path_data_prep_cover, { paths_all_parquet; "../data-prep/cover" }, format = "file"), tar_target(path_data_prep_officers, { paths_all_parquet; "../data-prep/officers" }, format = "file"), tar_target(path_data_prep_receipts, { paths_all_parquet; "../data-prep/receipts" }, format = "file"), tar_target(path_data_prep_expenditures, { paths_all_parquet; "../data-prep/expenditures" }, format = "file"), tar_target( cover_raw, arrow::open_dataset(path_data_prep_cover, partitioning = "sboe_id") |> dplyr::collect() ), tar_target( report_dates, process_report_dates(report_list_raw, cover_raw) ), tar_target( path_report_dates, { out_path <- "../data-prep/report_dates/part-0.parquet" fs::dir_create(fs::path_dir(out_path)) arrow::write_parquet(report_dates, out_path) }), tar_target( report_amended_score, calc_report_amended_score(report_dates) ), tar_target( addresses_raw, prep_collect_addresses_raw( path_officers = path_data_prep_officers, path_receipts = path_data_prep_receipts, path_expenditures = path_data_prep_expenditures, path_candidate_listing = path_candidate_listing, path_voters = NULL # path_voters_parquet ), format = "parquet" ), tar_target( path_addresses_db, prepare_addresses_lookup_db(addresses_raw$address) ), # This report list uses the latest amended report ----- tar_target( report_list, process_report_list(report_list_raw, report_amended_score) ), tar_target(committees, prepare_committees(cover_raw, report_list)), tar_target(candidates, prepare_candidates(path_data_prep_officers, report_list)), # Outside data sources ----- tar_target(candidate_listing, get_candidate_listing(2016:2023)), tar_target(path_candidate_listing, write_parquet(candidate_listing, "../data-prep/candidate_listing/part-0.parquet")), ## Voter registration records tar_target(path_voters_txt, voter_statewide_download(), cue = tar_cue("never")), #<< invalidate to get latest tar_target( path_voters_parquet, voter_statewide_convert_parquet(path_voters_txt), cue = tar_cue("never") ) )