Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

89 lines
2.7KB

  1. # Created by use_targets().
  2. # Follow the comments below to fill in this target script.
  3. # Then follow the manual to check and run the pipeline:
  4. # https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
  5. # Load packages required to define the pipeline:
  6. library(targets)
  7. # Set target options:
  8. tar_option_set(
  9. packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]],
  10. # For distributed computing in tar_make(), supply a {crew} controller
  11. # as discussed at https://books.ropensci.org/targets/crew.html.
  12. controller = crew::crew_controller_local(workers = 24),
  13. error = "null"
  14. # debug = "parquet_report_cover_path_e8fc956a"
  15. )
  16. # Run the R scripts in the R/ folder with your custom functions:
  17. tar_source()
  18. # Replace the target list below with your own:
  19. list(
  20. tar_target(year, 2016:2023),
  21. tar_target(report, c("MY", "YE", "Q1", "Q2", "Q3", "Q4")),
  22. tar_target(
  23. doc_list,
  24. get_report_by_year_scrape(year, report),
  25. pattern = cross(year, report)
  26. ),
  27. tar_target(
  28. committees,
  29. doc_list |>
  30. # this is the list of committees and years reported
  31. # if `n_*` changes, we have to go re-read that year
  32. group_by(committee_name, sboe_id, year) |>
  33. summarize(
  34. n_reports = n(),
  35. n_amended = sum(amended),
  36. n_docs = sum(!is.na(report_id)),
  37. ) |>
  38. arrange(sboe_id)
  39. ),
  40. tar_target(
  41. report_list,
  42. doc_list |>
  43. filter(!is.na(report_id)) |>
  44. mutate(received = coalesce(received_data, received_image)) |>
  45. slice_max(received, by = report_id) |>
  46. group_by(sboe_id) |>
  47. tar_group(),
  48. iteration = "group"
  49. ),
  50. tar_target(
  51. reports_raw_path,
  52. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  53. save_raw_report_all(report_id, sboe_id, received)
  54. }),
  55. pattern = map(report_list),
  56. iteration = "vector"
  57. ),
  58. # Gets the JSON version of the report's receipts via an internal API call,
  59. # that is processed into a standard table before saving as CSV.
  60. tar_target(
  61. report_receipts_csv_path,
  62. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  63. save_report_receipts_csv(report_id, sboe_id, received)
  64. }),
  65. pattern = map(report_list),
  66. iteration = "vector"
  67. ),
  68. # Gets the JSON version of the report's expenses via an internal API call,
  69. # that is processed into a standard table before saving as CSV.
  70. tar_target(
  71. report_expenditures_csv_path,
  72. pmap_chr(report_list, function(sboe_id, report_id, received, ...) {
  73. save_report_expenditures_csv(report_id, sboe_id, received)
  74. }),
  75. pattern = map(report_list),
  76. iteration = "vector"
  77. )
  78. )
  79. if (FALSE) {
  80. tar_read(report_list) |>
  81. write_csv("data-raw/report_list.csv")
  82. }