|
- voter_statewide_download <- function(output_dir = here::here("../data-raw/voters")) {
- url <- "https://s3.amazonaws.com/dl.ncsbe.gov/data/ncvoter_Statewide.zip"
- fs::dir_create(output_dir)
- path <- fs::path(output_dir, fs::path_file(url))
- download.file(url, path)
-
- withr::with_dir(output_dir, {
- zip::unzip(fs::path_file(url))
- })
-
- invisible(fs::path_ext_set(path, "txt"))
- }
-
- voter_statewide_convert_parquet <- function(path) {
- path <- fs::path_norm(path)
- path_out <- path("data-out", "voters", "voters.parquet")
- dir_create(path_dir(path_out))
-
- x <- readr::read_tsv(path, col_types = voter_statewide_spec())
- for (col in c("res_street_address", "mail_addr1")) {
- x[[col]] <- iconv(x[[col]], "UTF-8", "UTF-8")
- }
- arrow::write_parquet(x, path_out)
-
- invisible(path_out)
- }
-
- voter_statewide_spec <- function() {
- col_state_abbr <- col_factor(
- levels = c(
- state.abb,
- "AP", "DC", "GU", "MP", "NO", "OC", "PR", "UN", "VI"
- )
- )
-
- codes_status <- c(
- "A" = "ACTIVE",
- "D" = "DENIED",
- "I" = "INACTIVE",
- "R" = "REMOVED",
- "S" = "TEMPORARY"
- )
-
- codes_race <- c(
- "A" = "ASIAN",
- "B" = "BLACK or AFRICAN AMERICAN",
- "I" = "AMERICAN INDIAN or ALASKA NATIVE",
- "M" = "TWO or MORE RACES ",
- "O" = "OTHER",
- "P" = "NATIVE HAWAIIAN or PACIFIC ISLANDER",
- "U" = "UNDESIGNATED",
- "W" = "WHITE"
- )
-
- codes_ethnic <- c(
- "HL" = "HISPANIC or LATINO",
- "NL" = "NOT HISPANIC or NOT LATINO",
- "UN" = "UNDESIGNATED"
- )
-
- codes_gender <- c(
- "F" = "FEMALE",
- "M" = "MALE",
- "U" = "UNDESIGNATED"
- )
-
- codes_reason <- c(
- "AV" = "VERIFIED",
- "IN" = "CONFIRMATION NOT RETURNED",
- "RD" = "DECEASED",
- "IU" = "CONFIRMATION RETURNED UNDELIVERABLE",
- "DU" = "VERIFICATION RETURNED UNDELIVERABLE",
- "RM" = "REMOVED AFTER 2 FED GENERAL ELECTIONS IN INACTIVE STATUS",
- "RL" = "MOVED FROM COUNTY",
- "RS" = "MOVED FROM STATE",
- "A2" = "CONFIRMATION PENDING",
- "AP" = "VERIFICATION PENDING",
- "DI" = "UNAVAILABLE ESSENTIAL INFORMATION",
- "RF" = "FELONY CONVICTION",
- "RH" = "MOVED WITHIN STATE",
- "RQ" = "REQUEST FROM VOTER",
- "SO" = "OVERSEAS CITIZEN",
- "SM" = "MILITARY",
- "RT" = "TEMPORARY REGISTRANT",
- "RA" = "ADMINISTRATIVE",
- "A1" = "UNVERIFIED"
- )
-
- cols(
- county_id = col_character(),
- county_desc = col_character(),
- voter_reg_num = col_character(),
- ncid = col_character(),
- last_name = col_character(),
- first_name = col_character(),
- middle_name = col_character(),
- name_suffix_lbl = col_character(),
- status_cd = col_factor(names(codes_status), ordered = TRUE),
- voter_status_desc = col_factor(unname(codes_status), ordered = TRUE),
- reason_cd = col_factor(names(codes_reason)),
- voter_status_reason_desc = col_factor(unname(codes_reason)),
- res_street_address = col_character(),
- res_city_desc = col_character(),
- state_cd = col_state_abbr,
- zip_code = col_character(),
- mail_addr1 = col_character(),
- mail_addr2 = col_character(),
- mail_addr3 = col_character(),
- mail_addr4 = col_character(),
- mail_city = col_character(),
- mail_state = col_state_abbr,
- mail_zipcode = col_character(),
- full_phone_number = col_character(),
- confidential_ind = col_character(),
- registr_dt = col_date(format = "%m/%d/%Y"),
- race_code = col_factor(names(codes_race)),
- ethnic_code = col_factor(names(codes_ethnic)),
- party_cd = col_factor(),
- gender_code = col_factor(names(codes_gender)),
- birth_year = col_integer(),
- age_at_year_end = col_integer(),
- birth_state = col_state_abbr,
- drivers_lic = col_character(),
- precinct_abbrv = col_factor(),
- precinct_desc = col_character(),
- municipality_abbrv = col_factor(),
- municipality_desc = col_character(),
- ward_abbrv = col_factor(),
- ward_desc = col_character(),
- cong_dist_abbrv = col_factor(),
- super_court_abbrv = col_factor(),
- judic_dist_abbrv = col_factor(),
- nc_senate_abbrv = col_factor(),
- nc_house_abbrv = col_factor(),
- county_commiss_abbrv = col_factor(),
- county_commiss_desc = col_character(),
- township_abbrv = col_factor(),
- township_desc = col_character(),
- school_dist_abbrv = col_factor(),
- school_dist_desc = col_character(),
- fire_dist_abbrv = col_factor(),
- fire_dist_desc = col_character(),
- water_dist_abbrv = col_factor(),
- water_dist_desc = col_character(),
- sewer_dist_abbrv = col_factor(),
- sewer_dist_desc = col_character(),
- sanit_dist_abbrv = col_factor(),
- sanit_dist_desc = col_character(),
- rescue_dist_abbrv = col_factor(),
- rescue_dist_desc = col_character(),
- munic_dist_abbrv = col_factor(),
- munic_dist_desc = col_character(),
- dist_1_abbrv = col_factor(),
- dist_1_desc = col_character(),
- vtd_abbrv = col_factor(),
- vtd_desc = col_character()
- )
- }
-
- voter_snapshot_list <- function() {
- url <- "https://s3.amazonaws.com/dl.ncsbe.gov/?delimiter=/&prefix=data/Snapshots/"
-
- res <-
- httr2::request(url) |>
- httr2::req_perform() |>
- httr2::resp_body_xml() |>
- xml2::as_list()
-
- res <- res$ListBucketResult
- res <- res[which(names(res) == "Contents")]
- res <- map(res, unlist)
- res <- dplyr::bind_rows(res)
- res <- janitor::clean_names(res)
- res$size <- rlang::as_bytes(as.integer(res$size))
- res$url <- paste0("https://s3.amazonaws.com/dl.ncsbe.gov/", res$key)
- res
- }
-
- voter_snapshot_download <- function(year, output_dir = here::here("data-raw/voting")) {
- listing <-
- get_voter_snapshot_list() |>
- dplyr::filter(str_detect(key, paste0("VR_Snapshot_", year)))
-
- if (!nrow(listing)) {
- return(NULL)
- } else if (nrow(listing) > 1) {
- listing <- listing |> dplyr::slice_max(key, n = 1)
- }
-
- fs::dir_create(output_dir)
-
- download.file(
- listing$url,
- fs::path(output_dir, fs::path_file(listing$key))
- )
- }
|