You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

214 lines
5.9KB

  1. spec_export_search_results <- function() {
  2. readr::cols(
  3. `Committee Name` = readr::col_character(),
  4. `SBoE ID` = readr::col_character(),
  5. Year = readr::col_character(),
  6. `Doc Type` = readr::col_character(),
  7. `Doc Name` = readr::col_character(),
  8. Amend = readr::col_character(),
  9. `Received Image` = readr::col_date(format = "%m/%d/%Y"),
  10. `Received Data` = readr::col_date(format = "%m/%d/%Y"),
  11. `Start Date` = readr::col_date(format = "%m/%d/%Y"),
  12. `End Date` = readr::col_date(format = "%m/%d/%Y"),
  13. Image = readr::col_character(),
  14. Data = readr::col_character()
  15. )
  16. }
  17. spec_report_by_year_scrape <- function() {
  18. readr::cols(
  19. CommitteeName = readr::col_character(),
  20. SBoEID = readr::col_character(),
  21. DocumentType = readr::col_character(),
  22. ReportYear = readr::col_character(),
  23. ReportType = readr::col_character(),
  24. IsAmendment = readr::col_character(),
  25. ImageReceiptDate = readr::col_date(format = "%m/%d/%Y"),
  26. DataImportDate = readr::col_date(format = "%m/%d/%Y"),
  27. PeriodStartDate = readr::col_date(format = "%m/%d/%Y"),
  28. PeriodEndDate = readr::col_date(format = "%m/%d/%Y"),
  29. ImageType = readr::col_character(),
  30. DataType = readr::col_character(),
  31. DataLink = readr::col_character(),
  32. ImageLink = readr::col_character()
  33. )
  34. }
  35. get_report_by_year_export <- function(year, report) {
  36. res <-
  37. req_report_by_year_export(year, report) |>
  38. req_perform() |>
  39. resp_body_string() |>
  40. readr::read_csv(col_types = spec_export_search_results())
  41. names(res)[which(names(res) == "SBoE ID")] <- "sboe_id"
  42. names(res) <- snakecase::to_snake_case(names(res))
  43. res |>
  44. mutate(amend = amend == "Y") |>
  45. relocate(year, doc_name) |>
  46. group_by(year, doc_name) |>
  47. targets::tar_group()
  48. }
  49. get_report_by_year_scrape <- function(year, report) {
  50. res <-
  51. req_report_by_year(year, report) |>
  52. req_perform() |>
  53. resp_body_string()
  54. res <- strsplit(res, "\r\n")[[1]]
  55. res <- res[grepl("^\\s*var data = \\[", res)]
  56. res <- sub("\\s*var data = ", "", res)
  57. tbl <- jsonlite::fromJSON(res) |>
  58. as_tibble()
  59. if (nrow(tbl) == 0) return(NULL)
  60. tbl |>
  61. readr::type_convert(col_types = spec_report_by_year_scrape()) |>
  62. select(
  63. year = ReportYear,
  64. doc_name = ReportType,
  65. sboe_id = SBoEID,
  66. committee_name = CommitteeName,
  67. report_id = DataLink,
  68. doc_type = DocumentType,
  69. amended = IsAmendment,
  70. received_image = ImageReceiptDate,
  71. received_data = DataImportDate,
  72. start_date = PeriodStartDate,
  73. end_date = PeriodEndDate,
  74. img_link = ImageLink
  75. ) |>
  76. mutate(amended = amended == "Y") |>
  77. group_by(year, doc_name) |>
  78. targets::tar_group()
  79. }
  80. spec_report_section_receipts <- function() {
  81. readr::cols(
  82. Date = readr::col_date(format = "%m/%d/%Y"),
  83. `Is Prior` = readr::col_character(),
  84. Name = readr::col_character(),
  85. `Street 1` = readr::col_character(),
  86. `Street 2` = readr::col_character(),
  87. City = readr::col_character(),
  88. State = readr::col_character(),
  89. `Full Zip` = readr::col_character(),
  90. `Country Name` = readr::col_character(),
  91. `Outside US Postal Code` = readr::col_character(),
  92. Profession = readr::col_character(),
  93. `Employers Name` = readr::col_character(),
  94. Purpose = readr::col_character(),
  95. `Receipt Type Desc` = readr::col_character(),
  96. `Account Abbr` = readr::col_character(),
  97. `Form Of Payment Desc` = readr::col_character(),
  98. Description = readr::col_character(),
  99. Amount = readr::col_double(),
  100. `Sum To Date` = readr::col_double()
  101. )
  102. }
  103. spec_report_section_expenses <- function() {
  104. cols(
  105. Date = col_date(format = "%m/%d/%Y"),
  106. Name = col_character(),
  107. `Street 1` = col_character(),
  108. `Street 2` = col_character(),
  109. City = col_character(),
  110. State = col_character(),
  111. `Full Zip` = col_character(),
  112. `Country Name` = col_character(),
  113. `Outside US Postal Code` = col_character(),
  114. Profession = col_character(),
  115. `Employer Name` = col_character(),
  116. `Purpose Type Code` = col_character(),
  117. Purpose = col_character(),
  118. Candidate = col_character(),
  119. `Office Sought` = col_character(),
  120. Declaration = col_character(),
  121. Amount = col_double(),
  122. `Expenditure Type Desc` = col_character(),
  123. `Account Abbr` = col_character(),
  124. `Form Of Payment Desc` = col_character(),
  125. Description = col_character(),
  126. Amount1 = col_double(),
  127. `Sum To Date` = col_double()
  128. )
  129. }
  130. get_report_section <- function(
  131. report_id,
  132. section = "receipts",
  133. sboe_id = NULL
  134. ) {
  135. delay()
  136. res <-
  137. req_report_detail(report_id, section) |>
  138. req_perform()
  139. if (identical(res$body, raw(0))) return(NULL)
  140. res <- resp_body_string(res)
  141. spec <- switch(
  142. section,
  143. receipts = list(skip = 1, col_types = spec_report_section_receipts()),
  144. expenses = ,
  145. expenditures = list(skip = 1, col_types = spec_report_section_expenses()),
  146. NULL
  147. )
  148. if (is.null(spec)) return(res)
  149. res <- readr::read_csv(res, col_types = spec$col_types, skip = spec$skip)
  150. names(res) <- snakecase::to_snake_case(names(res))
  151. res <- mutate(res, report_id = !!report_id, .before = 1)
  152. if (!is.null(sboe_id)) {
  153. res <- mutate(res, sboe_id = !!sboe_id, .after = report_id)
  154. }
  155. res
  156. }
  157. get_raw_report_all <- function(report_id) {
  158. delay()
  159. if (!identical(Sys.getenv("ALLOW_DOWNLOADS"), "true")) {
  160. stop("Shouldn't be downloading reports now...")
  161. }
  162. res <-
  163. req_report_detail(report_id, "all") |>
  164. req_perform()
  165. if (identical(res$body, raw(0))) return("")
  166. resp_body_string(res)
  167. }
  168. save_raw_report_all <- function(report_id, sboe_id, received) {
  169. dir <- here::here("data-raw", "reports", sboe_id)
  170. file_name <- paste0(report_id, "_", received)
  171. path <- path(dir, file_name, ext = "txt")
  172. if (file_exists(path)) {
  173. # We don't need to re-download any reports
  174. return(path)
  175. }
  176. res <- get_raw_report_all(report_id)
  177. dir_create(dir)
  178. brio::write_lines(res, path)
  179. invisible(path)
  180. }