You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

90 lines
2.8KB

  1. # Created by use_targets().
  2. # Follow the comments below to fill in this target script.
  3. # Then follow the manual to check and run the pipeline:
  4. # https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
  5. # Load packages required to define the pipeline:
  6. library(targets)
  7. # Set target options:
  8. tar_option_set(
  9. packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]],
  10. # For distributed computing in tar_make(), supply a {crew} controller
  11. # as discussed at https://books.ropensci.org/targets/crew.html.
  12. controller = crew::crew_controller_local(workers = 24),
  13. error = "null"
  14. # debug = "parquet_report_cover_path_e8fc956a"
  15. )
  16. # Run the R scripts in the R/ folder with your custom functions:
  17. tar_source()
  18. # Run this manually to "transfer" updates from this step to the "process" step
  19. if (FALSE) {
  20. tar_read(report_list) |>
  21. arrange(report_id) |>
  22. write_csv("data-raw/report_list.csv")
  23. }
  24. # Replace the target list below with your own:
  25. list(
  26. tar_target(year, 2016:2023),
  27. tar_target(report, c("MY", "YE", "Q1", "Q2", "Q3", "Q4")),
  28. tar_target(
  29. doc_list,
  30. get_report_by_year_scrape(year, report),
  31. pattern = cross(year, report)
  32. ),
  33. tar_target(
  34. committees,
  35. doc_list |>
  36. # this is the list of committees and years reported
  37. # if `n_*` changes, we have to go re-read that year
  38. group_by(committee_name, sboe_id, year) |>
  39. summarize(
  40. n_reports = n(),
  41. n_amended = sum(amended),
  42. n_docs = sum(!is.na(report_id)),
  43. ) |>
  44. arrange(sboe_id)
  45. ),
  46. tar_target(
  47. report_list,
  48. doc_list |>
  49. filter(!is.na(report_id)) |>
  50. mutate(received = coalesce(received_data, received_image)) |>
  51. slice_max(received, by = report_id) |>
  52. group_by(sboe_id) |>
  53. tar_group(),
  54. iteration = "group"
  55. ),
  56. tar_target(
  57. reports_raw_path,
  58. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  59. save_raw_report_all(report_id, sboe_id, received)
  60. }),
  61. pattern = map(report_list),
  62. iteration = "vector"
  63. ),
  64. # Gets the JSON version of the report's receipts via an internal API call,
  65. # that is processed into a standard table before saving as CSV.
  66. tar_target(
  67. report_receipts_csv_path,
  68. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  69. save_report_receipts_csv(report_id, sboe_id, received)
  70. }),
  71. pattern = map(report_list),
  72. iteration = "vector"
  73. ),
  74. # Gets the JSON version of the report's expenses via an internal API call,
  75. # that is processed into a standard table before saving as CSV.
  76. tar_target(
  77. report_expenditures_csv_path,
  78. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  79. save_report_expenditures_csv(report_id, sboe_id, received)
  80. }),
  81. pattern = map(report_list),
  82. iteration = "vector"
  83. )
  84. )