You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

119 lines
3.4KB

  1. # Created by use_targets().
  2. # Follow the comments below to fill in this target script.
  3. # Then follow the manual to check and run the pipeline:
  4. # https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
  5. # Load packages required to define the pipeline:
  6. library(targets)
  7. # Set target options:
  8. tar_option_set(
  9. packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]],
  10. # For distributed computing in tar_make(), supply a {crew} controller
  11. # as discussed at https://books.ropensci.org/targets/crew.html.
  12. controller = crew::crew_controller_local(workers = 12),
  13. error = "null"
  14. # debug = "parquet_report_cover_path_e8fc956a"
  15. )
  16. # Run the R scripts in the R/ folder with your custom functions:
  17. tar_source()
  18. # Replace the target list below with your own:
  19. list(
  20. tar_target(year, 2016:2023),
  21. tar_target(report, c("MY", "YE", "Q1", "Q2", "Q3", "Q4")),
  22. tar_target(
  23. doc_list,
  24. get_report_by_year_scrape(year, report),
  25. pattern = cross(year, report)
  26. ),
  27. tar_target(
  28. committees,
  29. doc_list |>
  30. # this is the list of committees and years reported
  31. # if `n_*` changes, we have to go re-read that year
  32. group_by(committee_name, sboe_id, year) |>
  33. summarize(
  34. n_reports = n(),
  35. n_amended = sum(amended),
  36. n_docs = sum(!is.na(report_id)),
  37. ) |>
  38. arrange(sboe_id)
  39. ),
  40. tar_target(
  41. report_list,
  42. doc_list |>
  43. filter(!is.na(report_id)) |>
  44. mutate(received = coalesce(received_data, received_image)) |>
  45. slice_max(received, by = report_id)
  46. ),
  47. tar_target(report_list_report_id, report_list$report_id),
  48. tar_target(report_list_sboe_id, report_list$sboe_id),
  49. tar_target(report_list_received, report_list$received),
  50. tar_target(
  51. reports_raw_path,
  52. save_raw_report_all(
  53. report_list_report_id,
  54. report_list_sboe_id,
  55. report_list_received
  56. ),
  57. pattern = map(
  58. report_list_report_id,
  59. report_list_sboe_id,
  60. report_list_received
  61. ),
  62. format = "file_fast"
  63. ),
  64. # Gets the JSON version of the report's receipts via an internal API call,
  65. # that is processed into a standard table before saving as CSV.
  66. tar_target(
  67. report_receipts_csv_path,
  68. save_report_receipts_csv(
  69. report_list_report_id,
  70. report_list_sboe_id,
  71. report_list_received
  72. ),
  73. pattern = map(
  74. report_list_report_id,
  75. report_list_sboe_id,
  76. report_list_received
  77. ),
  78. format = "file_fast"
  79. ),
  80. # Gets the JSON version of the report's expenses via an internal API call,
  81. # that is processed into a standard table before saving as CSV.
  82. tar_target(
  83. report_expenditures_csv_path,
  84. save_report_expenditures_csv(
  85. report_list_report_id,
  86. report_list_sboe_id,
  87. report_list_received
  88. ),
  89. pattern = map(
  90. report_list_report_id,
  91. report_list_sboe_id,
  92. report_list_received
  93. ),
  94. format = "file_fast"
  95. ),
  96. tar_target(
  97. dirs_all,
  98. fs::dir_ls("data-raw/reports", glob = "**/all", recurse = TRUE, type = "directory")
  99. ),
  100. tar_target(
  101. dirs_receipts,
  102. fs::dir_ls("data-raw/reports", glob = "**/receipts", recurse = TRUE, type = "directory")
  103. ),
  104. tar_target(
  105. dirs_expenditures,
  106. fs::dir_ls("data-raw/reports", glob = "**/expenditures", recurse = TRUE, type = "directory")
  107. )
  108. # tar_target(
  109. # parquet_report_cover_path,
  110. # write_reports_by_sboe_id(report_list_sboe_id),
  111. # pattern = map(unique(report_list_sboe_id)),
  112. # format = "file_fast"
  113. # )
  114. )