|
- spec_export_search_results <- function() {
- readr::cols(
- `Committee Name` = readr::col_character(),
- `SBoE ID` = readr::col_character(),
- Year = readr::col_character(),
- `Doc Type` = readr::col_character(),
- `Doc Name` = readr::col_character(),
- Amend = readr::col_character(),
- `Received Image` = readr::col_date(format = "%m/%d/%Y"),
- `Received Data` = readr::col_date(format = "%m/%d/%Y"),
- `Start Date` = readr::col_date(format = "%m/%d/%Y"),
- `End Date` = readr::col_date(format = "%m/%d/%Y"),
- Image = readr::col_character(),
- Data = readr::col_character()
- )
- }
-
- spec_report_by_year_scrape <- function() {
- readr::cols(
- CommitteeName = readr::col_character(),
- SBoEID = readr::col_character(),
- DocumentType = readr::col_character(),
- ReportYear = readr::col_character(),
- ReportType = readr::col_character(),
- IsAmendment = readr::col_character(),
- ImageReceiptDate = readr::col_date(format = "%m/%d/%Y"),
- DataImportDate = readr::col_date(format = "%m/%d/%Y"),
- PeriodStartDate = readr::col_date(format = "%m/%d/%Y"),
- PeriodEndDate = readr::col_date(format = "%m/%d/%Y"),
- ImageType = readr::col_character(),
- DataType = readr::col_character(),
- DataLink = readr::col_character(),
- ImageLink = readr::col_character()
- )
- }
-
- get_report_by_year_export <- function(year, report) {
- res <-
- req_report_by_year_export(year, report) |>
- req_perform() |>
- resp_body_string() |>
- readr::read_csv(col_types = spec_export_search_results())
-
- names(res)[which(names(res) == "SBoE ID")] <- "sboe_id"
- names(res) <- snakecase::to_snake_case(names(res))
-
- res |>
- mutate(amend = amend == "Y") |>
- relocate(year, doc_name) |>
- group_by(year, doc_name) |>
- targets::tar_group()
- }
-
- get_report_by_year_scrape <- function(year, report) {
- res <-
- req_report_by_year(year, report) |>
- req_perform() |>
- resp_body_string()
-
- res <- strsplit(res, "\r\n")[[1]]
- res <- res[grepl("^\\s*var data = \\[", res)]
- res <- sub("\\s*var data = ", "", res)
-
- tbl <- jsonlite::fromJSON(res) |>
- as_tibble()
-
- if (nrow(tbl) == 0) return(NULL)
-
- tbl |>
- readr::type_convert(col_types = spec_report_by_year_scrape()) |>
- select(
- year = ReportYear,
- doc_name = ReportType,
- sboe_id = SBoEID,
- committee_name = CommitteeName,
- report_id = DataLink,
- doc_type = DocumentType,
- amended = IsAmendment,
- received_image = ImageReceiptDate,
- received_data = DataImportDate,
- start_date = PeriodStartDate,
- end_date = PeriodEndDate,
- img_link = ImageLink
- ) |>
- mutate(amended = amended == "Y") |>
- group_by(year, doc_name) |>
- targets::tar_group()
- }
-
- spec_report_section_receipts <- function() {
- readr::cols(
- Date = readr::col_date(format = "%m/%d/%Y"),
- `Is Prior` = readr::col_character(),
- Name = readr::col_character(),
- `Street 1` = readr::col_character(),
- `Street 2` = readr::col_character(),
- City = readr::col_character(),
- State = readr::col_character(),
- `Full Zip` = readr::col_character(),
- `Country Name` = readr::col_character(),
- `Outside US Postal Code` = readr::col_character(),
- Profession = readr::col_character(),
- `Employers Name` = readr::col_character(),
- Purpose = readr::col_character(),
- `Receipt Type Desc` = readr::col_character(),
- `Account Abbr` = readr::col_character(),
- `Form Of Payment Desc` = readr::col_character(),
- Description = readr::col_character(),
- Amount = readr::col_double(),
- `Sum To Date` = readr::col_double()
- )
- }
-
- spec_report_section_expenses <- function() {
- cols(
- Date = col_date(format = "%m/%d/%Y"),
- Name = col_character(),
- `Street 1` = col_character(),
- `Street 2` = col_character(),
- City = col_character(),
- State = col_character(),
- `Full Zip` = col_character(),
- `Country Name` = col_character(),
- `Outside US Postal Code` = col_character(),
- Profession = col_character(),
- `Employer Name` = col_character(),
- `Purpose Type Code` = col_character(),
- Purpose = col_character(),
- Candidate = col_character(),
- `Office Sought` = col_character(),
- Declaration = col_character(),
- Amount = col_double(),
- `Expenditure Type Desc` = col_character(),
- `Account Abbr` = col_character(),
- `Form Of Payment Desc` = col_character(),
- Description = col_character(),
- Amount1 = col_double(),
- `Sum To Date` = col_double()
- )
- }
-
- get_report_section <- function(
- report_id,
- section = "receipts",
- sboe_id = NULL
- ) {
- delay()
-
- res <-
- req_report_detail(report_id, section) |>
- req_perform()
-
- if (identical(res$body, raw(0))) return(NULL)
-
- res <- resp_body_string(res)
-
- spec <- switch(
- section,
- receipts = list(skip = 1, col_types = spec_report_section_receipts()),
- expenses = ,
- expenditures = list(skip = 1, col_types = spec_report_section_expenses()),
- NULL
- )
-
- if (is.null(spec)) return(res)
-
- res <- readr::read_csv(res, col_types = spec$col_types, skip = spec$skip)
-
- names(res) <- snakecase::to_snake_case(names(res))
-
- res <- mutate(res, report_id = !!report_id, .before = 1)
-
- if (!is.null(sboe_id)) {
- res <- mutate(res, sboe_id = !!sboe_id, .after = report_id)
- }
-
- res
- }
-
- get_raw_report_all <- function(report_id) {
- delay()
-
- if (!identical(Sys.getenv("ALLOW_DOWNLOADS"), "true")) {
- stop("Shouldn't be downloading reports now...")
- }
-
- res <-
- req_report_detail(report_id, "all") |>
- req_perform()
-
- if (identical(res$body, raw(0))) return("")
-
- resp_body_string(res)
- }
-
- save_raw_report_all <- function(report_id, sboe_id, received) {
- dir <- here::here("data-raw", "reports", sboe_id)
- file_name <- paste0(report_id, "_", received)
- path <- path(dir, file_name, ext = "txt")
-
- if (file_exists(path)) {
- # We don't need to re-download any reports
- return(path)
- }
-
- res <- get_raw_report_all(report_id)
-
- dir_create(dir)
-
- brio::write_lines(res, path)
-
- invisible(path)
- }
|