Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

91 line
2.8KB

  1. # Created by use_targets().
  2. # Follow the comments below to fill in this target script.
  3. # Then follow the manual to check and run the pipeline:
  4. # https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
  5. # Load packages required to define the pipeline:
  6. library(targets)
  7. # Set target options:
  8. tar_option_set(
  9. packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]],
  10. # For distributed computing in tar_make(), supply a {crew} controller
  11. # as discussed at https://books.ropensci.org/targets/crew.html.
  12. controller = crew::crew_controller_local(workers = 24),
  13. error = "null"
  14. # debug = "parquet_report_cover_path_e8fc956a"
  15. )
  16. # Run the R scripts in the R/ folder with your custom functions:
  17. tar_source()
  18. # Run this manually to "transfer" updates from this step to the "process" step
  19. if (FALSE) {
  20. tar_read(report_list) |>
  21. arrange(report_id) |>
  22. write_csv("data-raw/report_list.csv")
  23. }
  24. list(
  25. tar_target(year, 2016:2023),
  26. tar_target(report, c("MY", "YE", "Q1", "Q2", "Q3", "Q4")),
  27. # invalidate the doc_list target to restart this pipeline
  28. # tar_invalidate(doc_list)
  29. tar_target(
  30. doc_list,
  31. get_report_by_year_scrape(year, report),
  32. pattern = cross(year, report)
  33. ),
  34. tar_target(
  35. committees,
  36. doc_list |>
  37. # this is the list of committees and years reported
  38. # if `n_*` changes, we have to go re-read that year
  39. group_by(committee_name, sboe_id, year) |>
  40. summarize(
  41. n_reports = n(),
  42. n_amended = sum(amended),
  43. n_docs = sum(!is.na(report_id)),
  44. ) |>
  45. arrange(sboe_id)
  46. ),
  47. tar_target(
  48. report_list,
  49. doc_list |>
  50. filter(!is.na(report_id)) |>
  51. mutate(received = coalesce(received_data, received_image)) |>
  52. slice_max(received, by = report_id) |>
  53. group_by(sboe_id) |>
  54. tar_group(),
  55. iteration = "group"
  56. ),
  57. tar_target(
  58. reports_raw_path,
  59. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  60. save_raw_report_all(report_id, sboe_id, received)
  61. }),
  62. pattern = map(report_list),
  63. iteration = "vector"
  64. ),
  65. # Gets the JSON version of the report's receipts via an internal API call,
  66. # that is processed into a standard table before saving as CSV.
  67. tar_target(
  68. report_receipts_csv_path,
  69. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  70. save_report_receipts_csv(report_id, sboe_id, received)
  71. }),
  72. pattern = map(report_list),
  73. iteration = "vector"
  74. ),
  75. # Gets the JSON version of the report's expenses via an internal API call,
  76. # that is processed into a standard table before saving as CSV.
  77. tar_target(
  78. report_expenditures_csv_path,
  79. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  80. save_report_expenditures_csv(report_id, sboe_id, received)
  81. }),
  82. pattern = map(report_list),
  83. iteration = "vector"
  84. )
  85. )