Selaa lähdekoodia

get document list for all committees since 2016

* Finds all relevant reporting docs for all committees
* This data doesn't have the link to the actual report data,
  but it does give us SBOE ID, which will lead us to the right place.
main
Garrick Aden-Buie 2 vuotta sitten
commit
1f1bd52d9f
No known key found for this signature in database
9 muutettua tiedostoa jossa 251 lisäystä ja 0 poistoa
  1. +4
    -0
      0-time-log.csv
  2. +25
    -0
      DESCRIPTION
  3. +33
    -0
      R/get.R
  4. +54
    -0
      R/urls.R
  5. +26
    -0
      README.Rmd
  6. +29
    -0
      _targets.R
  7. +5
    -0
      _targets/.gitignore
  8. +65
    -0
      _targets/meta/meta
  9. +10
    -0
      run.R

+ 4
- 0
0-time-log.csv Näytä tiedosto

@@ -0,0 +1,4 @@
day, hours, tasks
2023-08-25,1,research and email
2023-08-28,1,project setup
2023-08-29,1,get document list for all reports and committees from 2016 on

+ 25
- 0
DESCRIPTION Näytä tiedosto

@@ -0,0 +1,25 @@
Type: Project
Package: nc-campaign-finance
Title: What the Package Does (One Line, Title Case)
Version: 0.0.0.9000
Authors@R:
person("Garrick", "Aden-Buie", , "garrick@adenbuie.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-7111-0077"))
Description: What the package does (one paragraph).
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Depends:
dplyr,
glue,
httr2,
rlang,
rvest
Imports:
cli,
crew,
desc,
readr,
targets,
visNetwork

+ 33
- 0
R/get.R Näytä tiedosto

@@ -0,0 +1,33 @@
spec_export_search_results <- function() {
readr::cols(
`Committee Name` = readr::col_character(),
`SBoE ID` = readr::col_character(),
Year = readr::col_character(),
`Doc Type` = readr::col_character(),
`Doc Name` = readr::col_character(),
Amend = readr::col_character(),
`Received Image` = readr::col_date(format = "%m/%d/%Y"),
`Received Data` = readr::col_date(format = "%m/%d/%Y"),
`Start Date` = readr::col_date(format = "%m/%d/%Y"),
`End Date` = readr::col_date(format = "%m/%d/%Y"),
Image = readr::col_character(),
Data = readr::col_character()
)
}

get_report_by_year_export <- function(year, report) {
res <-
req_report_by_year_export(year, report) |>
req_perform() |>
resp_body_string() |>
readr::read_csv(col_types = spec_export_search_results())

names(res)[which(names(res) == "SBoE ID")] <- "sboe_id"
names(res) <- snakecase::to_snake_case(names(res))

res |>
mutate(amend = amend == "Y") |>
relocate(year, doc_name) |>
group_by(year, doc_name) |>
targets::tar_group()
}

+ 54
- 0
R/urls.R Näytä tiedosto

@@ -0,0 +1,54 @@
url_nc_cf_search_doc <- function() {
"https://cf.ncsbe.gov/CFDocLkup/DocumentResult/"
}

url_nc_cf_export_search_doc <- function() {
"https://cf.ncsbe.gov/CFDocLkup/ExportSearchResults/"
}

match_report_type <- function(report, collapse = TRUE) {
report <- toupper(report)

reports <- c("RPMYSA", "RPYESA", "RPQTR1", "RPQTR2", "RPQTR3", "RPQTR4")
valid <- c(
reports,
sub("^RP", "", reports),
gsub("^RP|SA$", "", reports),
sub("RPQTR", "Q", reports)
)
names(valid) <- rep(reports, 4)

x <- names(valid)[match(report, valid)]
if (any(is.na(x))) {
report <- report[is.na(x)]
cli::cli_abort(c("Invalid report {.val {report}}", i = "Valid: {.val {reports}}"))
}

if (!collapse) return(x)

paste(paste0("'", x, "'"), collapse = ", ")
}

req_report_by_year <- function(
year,
report = c("RPMYSA", "RPYESA", "RPQTR1", "RPQTR2", "RPQTR3", "RPQTR4")
) {
reports <- match_report_type(report)

req <- request(url_nc_cf_search_doc())
req <- req_url_query(req, year = year, reports = reports)

req
}

req_report_by_year_export <- function(
year,
report = c("RPMYSA", "RPYESA", "RPQTR1", "RPQTR2", "RPQTR3", "RPQTR4")
) {
reports <- match_report_type(report)

req <- request(url_nc_cf_export_search_doc())
req <- req_url_query(req, year = year, reports = reports)

req
}

+ 26
- 0
README.Rmd Näytä tiedosto

@@ -0,0 +1,26 @@


https://cf.ncsbe.gov/CFDocLkup/DocumentResult/?year=2022&reports=%27RPQTR4%27

On election years: 2008, 2012, 2016, 2020
`https://cf.ncsbe.gov/CFDocLkup/DocumentResult/?year={year}&reports='RPQTR{quarter}'`

On non-election years:
```
https://cf.ncsbe.gov/CFDocLkup/DocumentResult/?year=2023&reports=%27RPMYSA%27%2C%20%27RPYESA%27
https://cf.ncsbe.gov/CFDocLkup/DocumentResult/?year=2023&reports=%27RPQTR1%27%2C%20%27RPQTR4%27%2C%20%27RPQTR2%27%2C%20%27RPQTR3%27
```


- `RPMYSA` - Mid-Year Semi-Annual Report
- `RPYESA` - Year-End Semi-Annual Report
- `RPQTR1` - 1st Quarter Report
- `RPQTR2` - 2nd Quarter Report
- `RPQTR3` - 3rd Quarter Report
- `RPQTR4` - 4th Quarter Report

```{r}
year <- 2022
reports <- "'RPQTR1', 'RPQTR4', 'RPQTR2', 'RPQTR3'"
url <- glue::glue("https://cf.ncsbe.gov/CFDocLkup/DocumentResult/?year={year}&reports={reports}")
```

+ 29
- 0
_targets.R Näytä tiedosto

@@ -0,0 +1,29 @@
# Created by use_targets().
# Follow the comments below to fill in this target script.
# Then follow the manual to check and run the pipeline:
# https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline

# Load packages required to define the pipeline:
library(targets)

# Set target options:
tar_option_set(
packages = strsplit(desc::desc_get_field("Depends"), ", ")[[1]],
# For distributed computing in tar_make(), supply a {crew} controller
# as discussed at https://books.ropensci.org/targets/crew.html.
controller = crew::crew_controller_local(workers = 2)
)

# Run the R scripts in the R/ folder with your custom functions:
tar_source()

# Replace the target list below with your own:
list(
tar_target(year, 2016:2023),
tar_target(report, c("MY", "YE", "Q1", "Q2", "Q3", "Q4")),
tar_target(
doc_list,
get_report_by_year_export(year, report),
pattern = cross(year, report)
)
)

+ 5
- 0
_targets/.gitignore Näytä tiedosto

@@ -0,0 +1,5 @@
*
!.gitignore
!meta
meta/*
!meta/meta

+ 65
- 0
_targets/meta/meta Näytä tiedosto

@@ -0,0 +1,65 @@
name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error
doc_list|pattern|11d7f7725087279a|5fff0c5cda12b47a||463924853||||1031328|rds|local|vector||doc_list_9ec26390*doc_list_1383f472*doc_list_8c36acd3*doc_list_09ceab10*doc_list_003f7bd5*doc_list_a895c4bc*doc_list_e0bf2658*doc_list_684a0d03*doc_list_e79bac86*doc_list_03c24dc0*doc_list_05132216*doc_list_6ca5d8cf*doc_list_4aeb55d8*doc_list_1c665de9*doc_list_99d9270e*doc_list_9145eb35*doc_list_2af55433*doc_list_b17a1e54*doc_list_b5035e3f*doc_list_1ff6dafe*doc_list_b1f403fa*doc_list_6486d272*doc_list_1fb98e3e*doc_list_352387b1*doc_list_93f6437d*doc_list_4b2c7d67*doc_list_e8e99f77*doc_list_3b598963*doc_list_b6cefb21*doc_list_b9281fad*doc_list_4657e8ba*doc_list_3cd506a1*doc_list_30d835c0*doc_list_d80d73d4*doc_list_fb41e2ef*doc_list_c0a62d0f*doc_list_7c58427e*doc_list_5bfe5f1e*doc_list_9448724e*doc_list_f311f56a*doc_list_854a24d4*doc_list_9b378f94*doc_list_a48a02a2*doc_list_ddb03379*doc_list_b043e6ad*doc_list_efcdcda6*doc_list_e63b4dbd*doc_list_aeb57b3f|10.195||
doc_list_003f7bd5|branch|5393af35d5cef006|5fff0c5cda12b47a|a471fb45114fb391|-1766338929||t19598.5465776s|67617550c1d67fe8|32280|rds|local|vector|doc_list||0.2||
doc_list_03c24dc0|branch|2f6399649328f55c|5fff0c5cda12b47a|d4224f5dcf37fa96|43002501||t19598.5465388786s|bf8f9b5758e72d99|619|rds|local|vector|doc_list||0.119||
doc_list_05132216|branch|174c1f5446e2b7a3|5fff0c5cda12b47a|488059842aa99362|223551675||t19598.5465386126s|daf9ee54e690ed50|710|rds|local|vector|doc_list||0.156||
doc_list_09ceab10|branch|3f01ca60f862f846|5fff0c5cda12b47a|8451d539ac0772b4|-1717594545||t19598.5465743909s|c48c4bfb02d7a61a|31919|rds|local|vector|doc_list||0.207||
doc_list_1383f472|branch|85b5d08fe66d80d8|5fff0c5cda12b47a|b12bd0ba6f51b50b|-766063971||t19598.5465122587s|28e9bfdd4b348557|4090|rds|local|vector|doc_list||0.121||
doc_list_1c665de9|branch|9bf0ab68e6231630|5fff0c5cda12b47a|c9eae3a8f0b20101|696679256||t19598.5465511297s|71d6b7d0af4ddf55|6976|rds|local|vector|doc_list||0.163||
doc_list_1fb98e3e|branch|543a3c48b12e7c03|5fff0c5cda12b47a|ef438ad2ea59d867|-233810849||t19598.5465601077s|17fd65357607abf9|576|rds|local|vector|doc_list||0.167||
doc_list_1ff6dafe|branch|20890d6be5ed1236|5fff0c5cda12b47a|f607079189e33e1c|2030600982||t19598.5465647203s|7dd36e47b399d745|58013|rds|local|vector|doc_list||0.335||
doc_list_2af55433|branch|ff150e1507549db2|5fff0c5cda12b47a|64ced2ebfe91a908|1413526563||t19598.5465816085s|fa8c9a1c442466dd|44653|rds|local|vector|doc_list||0.235||
doc_list_30d835c0|branch|65ea1621de6865a8|5fff0c5cda12b47a|093255b44616004e|386659605||t19598.5465578568s|303824524f4bfcf3|795|rds|local|vector|doc_list||0.18||
doc_list_352387b1|branch|32945eae74669771|5fff0c5cda12b47a|4eea22f83e56aaca|-1346795928||t19598.5465481945s|813a0d7697333517|548|rds|local|vector|doc_list||0.137||
doc_list_3b598963|branch|34ae92f2124f1d99|5fff0c5cda12b47a|21b77ebab17672f1|440503025||t19598.5465711649s|8152d5aa37a43435|44504|rds|local|vector|doc_list||0.238||
doc_list_3cd506a1|branch|ff7ee99fd9b3d7a4|5fff0c5cda12b47a|dd7803b67113fba0|1990120871||t19598.5465289902s|5b24ceab1f1cf884|53283|rds|local|vector|doc_list||0.286||
doc_list_4657e8ba|branch|ac1937394724ce5b|5fff0c5cda12b47a|8ea85ae04fe2ba43|1661077530||t19598.5465431564s|4333aa119d717231|46609|rds|local|vector|doc_list||0.314||
doc_list_4aeb55d8|branch|830ef1b1a9a3bca7|5fff0c5cda12b47a|d49fe72d2d09f2b2|-1666801946||t19598.5465779803s|24c1a52e282ead6c|10668|rds|local|vector|doc_list||0.18||
doc_list_4b2c7d67|branch|2049a6bbe14f745d|5fff0c5cda12b47a|54a4c25e238ba005|1578416857||t19598.5465453808s|06896b2fdc8a8fa3|7770|rds|local|vector|doc_list||0.127||
doc_list_51d9c6f1|branch|88c63b452a8bc81f|5fff0c5cda12b47a|df13504dc8f954c4|-1528472383||t19598.5459229849s|543785243dfaf17e|595|rds|local|vector|doc_list||0.543||
doc_list_5bfe5f1e|branch|f1e134d86f46f5cd|5fff0c5cda12b47a|9eb5ea5d73d8c288|1778491039||t19598.5465362753s|9f8f37a744291d4f|8981|rds|local|vector|doc_list||0.175||
doc_list_6486d272|branch|b8c536ed8590cea0|5fff0c5cda12b47a|a4c004e3a282030f|-1136217831||t19598.546560033s|5e74fa54212725ba|660|rds|local|vector|doc_list||0.15||
doc_list_684a0d03|branch|c595250887dd1eb0|5fff0c5cda12b47a|a4555b1dd6f8068e|1647460837||t19598.5465486874s|e7293d92215f8478|38724|rds|local|vector|doc_list||0.197||
doc_list_6ca5d8cf|branch|7ec05d2071aa8680|5fff0c5cda12b47a|8d8eb5fde9aa844f|505032805||t19598.5465172499s|b768230cdbc96fc9|735|rds|local|vector|doc_list||0.143||
doc_list_7c58427e|branch|ad264c405810798c|5fff0c5cda12b47a|fbd9718e2f98d219|-1271473549||t19598.546536142s|33f00d23e03f2bd9|9593|rds|local|vector|doc_list||0.176||
doc_list_832ae65a|branch|b73378ea726d4360|5fff0c5cda12b47a|28ce9fee9a73740b|-1672096894||t19598.5459260208s|a97bf76e690c0a49|388|rds|local|vector|doc_list||0.11||
doc_list_854a24d4|branch|683d75296de0da22|5fff0c5cda12b47a|f9dd2496fe26e4bd|628903604||t19598.5465548102s|607a8111535ec4ff|42417|rds|local|vector|doc_list||0.218||
doc_list_8c36acd3|branch|ed39df3fa873af41|5fff0c5cda12b47a|c1cd98511053360b|1779557373||t19598.5465510508s|5d9e4582ff981eed|32615|rds|local|vector|doc_list||0.213||
doc_list_9145eb35|branch|b56799a5bc04e335|5fff0c5cda12b47a|be50be49a10c1bce|-1789737407||t19598.5465333926s|5db3b595b84517c7|39341|rds|local|vector|doc_list||0.274||
doc_list_93f6437d|branch|e1f63eb4ca1fdb02|5fff0c5cda12b47a|86c133a0608529a3|-386702910||t19598.5465197949s|38c3737cdc43a978|9123|rds|local|vector|doc_list||0.151||
doc_list_9448724e|branch|37c82642924bc316|5fff0c5cda12b47a|ebdbfb1c9cb3d2ae|2113732381||t19598.546584962s|6f74b906868b9e98|46636|rds|local|vector|doc_list||0.228||
doc_list_953cffe7|branch|26fb539981a3b7df|5fff0c5cda12b47a|2be5ced6d55ef889|-1565868935||t19598.5459248532s|4dce41727c3315e3|372|rds|local|vector|doc_list||0.139||
doc_list_97384f0d|branch|ae35b37121050df9|5fff0c5cda12b47a|12dc56cb8d922bd3|1182980770||t19598.5447344621s|0a1588e865a9fcad|666|rds|local|vector|doc_list||0.506||
doc_list_99d9270e|branch|a32b4170f98192c5|5fff0c5cda12b47a|0e1f9718284f7786|-127136218||t19598.5465331888s|4f9a96c42e6b45fc|42443|rds|local|vector|doc_list||0.238||
doc_list_9b378f94|branch|c7f865f4c3da3e21|5fff0c5cda12b47a|71902d255a3a9529|-2001761740||t19598.546555076s|79c701ff07e89494|38726|rds|local|vector|doc_list||0.282||
doc_list_9ec26390|branch|c5b44771e6f545a5|5fff0c5cda12b47a|a8086bd2a8998c59|-1743376540||t19598.5465098524s|b2ddc3ddd5a3145c|4933|rds|local|vector|doc_list||0.521||
doc_list_a48a02a2|branch|706acdba9d62d6d9|5fff0c5cda12b47a|aec315e10bcd440b|-318378921||t19598.5465756523s|13b0fa92889e4f8c|38140|rds|local|vector|doc_list||0.275||
doc_list_a895c4bc|branch|444e220d3d310d8e|5fff0c5cda12b47a|fcdc76118bdb7311|-935614933||t19598.5465445598s|937a1072bf2dfddc|29647|rds|local|vector|doc_list||0.204||
doc_list_aeb57b3f|branch|b73378ea726d4360|5fff0c5cda12b47a|84023cdd2a8e8dab|-767690114||t19598.5465292668s|a97bf76e690c0a49|388|rds|local|vector|doc_list||0.117||
doc_list_b043e6ad|branch|ae35b37121050df9|5fff0c5cda12b47a|026658c04244985e|-537327322||t19598.5465716839s|0a1588e865a9fcad|666|rds|local|vector|doc_list||0.132||
doc_list_b17a1e54|branch|392554bb826fa884|5fff0c5cda12b47a|2c158b1f3beed3e2|-845837830||t19598.5465274048s|33ac606aba91eced|39564|rds|local|vector|doc_list||0.229||
doc_list_b1f403fa|branch|ec7e2ecc62295369|5fff0c5cda12b47a|4aa461dd331c9f7c|511764391||t19598.5465129304s|2f298f6926ddcf14|844|rds|local|vector|doc_list||0.13||
doc_list_b5035e3f|branch|65a2f9c4833d371c|5fff0c5cda12b47a|fd9afec9a3b7473c|-1287796578||t19598.5465635796s|38bbab2465a1e203|45646|rds|local|vector|doc_list||0.262||
doc_list_b6cefb21|branch|71b5a28e548fb201|5fff0c5cda12b47a|7cf6b8fabe203d71|-1372754113||t19598.5465837305s|258e735f20b71f81|44922|rds|local|vector|doc_list||0.232||
doc_list_b9281fad|branch|460e33cc5f17b7d5|5fff0c5cda12b47a|106680725823e536|-1981039170||t19598.5465417956s|6b3cb49b933c8071|42585|rds|local|vector|doc_list||0.219||
doc_list_ba8eb44d|branch|706acdba9d62d6d9|5fff0c5cda12b47a|e10988b88252464f|-1369452465||t19598.5438525558s|13b0fa92889e4f8c|38140|rds|local|vector|doc_list||0.778||
doc_list_c0a62d0f|branch|ede336cd98c8b19e|5fff0c5cda12b47a|90221b3a22668e6c|1720736103||t19598.5465699591s|469b8095d0b903f7|865|rds|local|vector|doc_list||0.132||
doc_list_cc9774d2|branch|ea90c341613d51b8|5fff0c5cda12b47a|45fedd645df2742e|-476693472||t19598.5438503249s|9fdc1e0d0bbc131d|417|rds|local|vector|doc_list||0.653||
doc_list_d80d73d4|branch|d2b051c434bc96cd|5fff0c5cda12b47a|946045fe5e6539ba|141630882||t19598.5465579271s|ab40b86a407614d0|804|rds|local|vector|doc_list||0.156||
doc_list_ddb03379|branch|ea90c341613d51b8|5fff0c5cda12b47a|e4dbc37b3f9b842b|139637007||t19598.5465152173s|9fdc1e0d0bbc131d|417|rds|local|vector|doc_list||0.103||
doc_list_e0bf2658|branch|8b94cc2aee330e1b|5fff0c5cda12b47a|04a1b961a0f08fc2|189293869||t19598.5465678857s|548ca660b94e54f8|34721|rds|local|vector|doc_list||0.228||
doc_list_e63b4dbd|branch|26fb539981a3b7df|5fff0c5cda12b47a|7e847a22249e293c|-1154434156||t19598.5465801073s|4dce41727c3315e3|372|rds|local|vector|doc_list||0.151||
doc_list_e79bac86|branch|f505b9ad656db688|5fff0c5cda12b47a|e5bb056a20152dcf|34563957||t19598.5465246397s|8ae12254da0710ff|803|rds|local|vector|doc_list||0.13||
doc_list_e8e99f77|branch|853ca48d6004bbf2|5fff0c5cda12b47a|293911aaa63fc166|-159130530||t19598.5465675178s|cba19b749dd5ba29|47733|rds|local|vector|doc_list||0.247||
doc_list_efcdcda6|branch|88c63b452a8bc81f|5fff0c5cda12b47a|fb80f5d8c58351a3|-1618718411||t19598.546546392s|543785243dfaf17e|595|rds|local|vector|doc_list||0.138||
doc_list_f311f56a|branch|1b77a57b31fdfe63|5fff0c5cda12b47a|d58729cd72a92efc|-1152540171||t19598.5465235689s|afe17df42d7af49d|42774|rds|local|vector|doc_list||0.272||
doc_list_fb41e2ef|branch|825ab038db830230|5fff0c5cda12b47a|a9ab4ae91920e919|1281216406||t19598.546522976s|27763cdafaa76b0a|902|rds|local|vector|doc_list||0.707||
get_report_by_year_export|function|dcee4ae72b66d802|||||||||||||||
match_report_type|function|2c3458cc75c4002e|||||||||||||||
report|stem|8a155243430d384c|d91acccfe042f5e9|a3dad144c40657ed|-237041703||t19598.5459143258s|79365c26e374407f|71|rds|local|vector||report_e10bcc09*report_8619abab*report_7e3b8410*report_4b100feb*report_29a0b592*report_109efe87|0.149||
req_report_by_year|function|69dec6cd371b2e22|||||||||||||||
req_report_by_year_export|function|5a4d7d97d5beec03|||||||||||||||
spec_export_search_results|function|405cdc17d81883c1|||||||||||||||
url_nc_cf_export_search_doc|function|529be13474f99caf|||||||||||||||
url_nc_cf_search_doc|function|375430504434ae9c|||||||||||||||
year|stem|5700b3289c040758|63e40418178dba89|787f005495551c49|-1645897793||t19598.5465014597s|bdb532178c146fbe|104|rds|local|vector||year_f90769be*year_6549f23e*year_ead54fb0*year_c3c42a21*year_42c0dfe9*year_14a7fccb*year_71778769*year_69cc6f61|0.145||

+ 10
- 0
run.R Näytä tiedosto

@@ -0,0 +1,10 @@
#!/usr/bin/env Rscript

# This is a helper script to run the pipeline.
# Choose how to execute the pipeline below.
# See https://books.ropensci.org/targets/hpc.html
# to learn about your options.

targets::tar_make()
# targets::tar_make_clustermq(workers = 2) # nolint
# targets::tar_make_future(workers = 2) # nolint

Loading…
Peruuta
Tallenna