You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

193 lines
5.6KB

  1. voter_statewide_download <- function(output_dir = here::here("../data-raw/voters")) {
  2. url <- "https://s3.amazonaws.com/dl.ncsbe.gov/data/ncvoter_Statewide.zip"
  3. fs::dir_create(output_dir)
  4. path <- fs::path(output_dir, fs::path_file(url))
  5. download.file(url, path)
  6. withr::with_dir(output_dir, {
  7. zip::unzip(fs::path_file(url))
  8. })
  9. invisible(fs::path_ext_set(path, "txt"))
  10. }
  11. voter_statewide_convert_parquet <- function(path) {
  12. path <- fs::path_norm(path)
  13. path_out <- fs::path_ext_set(tolower(path), "parquet")
  14. x <- readr::read_tsv(path, col_types = voter_statewide_spec())
  15. arrow::write_parquet(x, path_out)
  16. invisible(path_out)
  17. }
  18. voter_statewide_spec <- function() {
  19. col_state_abbr <- col_factor(
  20. levels = c(
  21. state.abb,
  22. "AP", "DC", "GU", "MP", "NO", "OC", "PR", "UN", "VI"
  23. )
  24. )
  25. codes_status <- c(
  26. "A" = "ACTIVE",
  27. "D" = "DENIED",
  28. "I" = "INACTIVE",
  29. "R" = "REMOVED",
  30. "S" = "TEMPORARY"
  31. )
  32. codes_race <- c(
  33. "A" = "ASIAN",
  34. "B" = "BLACK or AFRICAN AMERICAN",
  35. "I" = "AMERICAN INDIAN or ALASKA NATIVE",
  36. "M" = "TWO or MORE RACES ",
  37. "O" = "OTHER",
  38. "P" = "NATIVE HAWAIIAN or PACIFIC ISLANDER",
  39. "U" = "UNDESIGNATED",
  40. "W" = "WHITE"
  41. )
  42. codes_ethnic <- c(
  43. "HL" = "HISPANIC or LATINO",
  44. "NL" = "NOT HISPANIC or NOT LATINO",
  45. "UN" = "UNDESIGNATED"
  46. )
  47. codes_gender <- c(
  48. "F" = "FEMALE",
  49. "M" = "MALE",
  50. "U" = "UNDESIGNATED"
  51. )
  52. codes_reason <- c(
  53. "AV" = "VERIFIED",
  54. "IN" = "CONFIRMATION NOT RETURNED",
  55. "RD" = "DECEASED",
  56. "IU" = "CONFIRMATION RETURNED UNDELIVERABLE",
  57. "DU" = "VERIFICATION RETURNED UNDELIVERABLE",
  58. "RM" = "REMOVED AFTER 2 FED GENERAL ELECTIONS IN INACTIVE STATUS",
  59. "RL" = "MOVED FROM COUNTY",
  60. "RS" = "MOVED FROM STATE",
  61. "A2" = "CONFIRMATION PENDING",
  62. "AP" = "VERIFICATION PENDING",
  63. "DI" = "UNAVAILABLE ESSENTIAL INFORMATION",
  64. "RF" = "FELONY CONVICTION",
  65. "RH" = "MOVED WITHIN STATE",
  66. "RQ" = "REQUEST FROM VOTER",
  67. "SO" = "OVERSEAS CITIZEN",
  68. "SM" = "MILITARY",
  69. "RT" = "TEMPORARY REGISTRANT",
  70. "RA" = "ADMINISTRATIVE",
  71. "A1" = "UNVERIFIED"
  72. )
  73. cols(
  74. county_id = col_character(),
  75. county_desc = col_character(),
  76. voter_reg_num = col_character(),
  77. ncid = col_character(),
  78. last_name = col_character(),
  79. first_name = col_character(),
  80. middle_name = col_character(),
  81. name_suffix_lbl = col_character(),
  82. status_cd = col_factor(names(codes_status), ordered = TRUE),
  83. voter_status_desc = col_factor(unname(codes_status), ordered = TRUE),
  84. reason_cd = col_factor(names(codes_reason)),
  85. voter_status_reason_desc = col_factor(unname(codes_reason)),
  86. res_street_address = col_character(),
  87. res_city_desc = col_character(),
  88. state_cd = col_state_abbr,
  89. zip_code = col_character(),
  90. mail_addr1 = col_character(),
  91. mail_addr2 = col_character(),
  92. mail_addr3 = col_character(),
  93. mail_addr4 = col_character(),
  94. mail_city = col_character(),
  95. mail_state = col_state_abbr,
  96. mail_zipcode = col_character(),
  97. full_phone_number = col_character(),
  98. confidential_ind = col_character(),
  99. registr_dt = col_date(format = "%m/%d/%Y"),
  100. race_code = col_factor(names(codes_race)),
  101. ethnic_code = col_factor(names(codes_ethnic)),
  102. party_cd = col_factor(),
  103. gender_code = col_factor(names(codes_gender)),
  104. birth_year = col_integer(),
  105. age_at_year_end = col_integer(),
  106. birth_state = col_state_abbr,
  107. drivers_lic = col_character(),
  108. precinct_abbrv = col_factor(),
  109. precinct_desc = col_character(),
  110. municipality_abbrv = col_factor(),
  111. municipality_desc = col_character(),
  112. ward_abbrv = col_factor(),
  113. ward_desc = col_character(),
  114. cong_dist_abbrv = col_factor(),
  115. super_court_abbrv = col_factor(),
  116. judic_dist_abbrv = col_factor(),
  117. nc_senate_abbrv = col_factor(),
  118. nc_house_abbrv = col_factor(),
  119. county_commiss_abbrv = col_factor(),
  120. county_commiss_desc = col_character(),
  121. township_abbrv = col_factor(),
  122. township_desc = col_character(),
  123. school_dist_abbrv = col_factor(),
  124. school_dist_desc = col_character(),
  125. fire_dist_abbrv = col_factor(),
  126. fire_dist_desc = col_character(),
  127. water_dist_abbrv = col_factor(),
  128. water_dist_desc = col_character(),
  129. sewer_dist_abbrv = col_factor(),
  130. sewer_dist_desc = col_character(),
  131. sanit_dist_abbrv = col_factor(),
  132. sanit_dist_desc = col_character(),
  133. rescue_dist_abbrv = col_factor(),
  134. rescue_dist_desc = col_character(),
  135. munic_dist_abbrv = col_factor(),
  136. munic_dist_desc = col_character(),
  137. dist_1_abbrv = col_factor(),
  138. dist_1_desc = col_character(),
  139. vtd_abbrv = col_factor(),
  140. vtd_desc = col_character()
  141. )
  142. }
  143. voter_snapshot_list <- function() {
  144. url <- "https://s3.amazonaws.com/dl.ncsbe.gov/?delimiter=/&prefix=data/Snapshots/"
  145. res <-
  146. httr2::request(url) |>
  147. httr2::req_perform() |>
  148. httr2::resp_body_xml() |>
  149. xml2::as_list()
  150. res <- res$ListBucketResult
  151. res <- res[which(names(res) == "Contents")]
  152. res <- map(res, unlist)
  153. res <- dplyr::bind_rows(res)
  154. res <- janitor::clean_names(res)
  155. res$size <- rlang::as_bytes(as.integer(res$size))
  156. res$url <- paste0("https://s3.amazonaws.com/dl.ncsbe.gov/", res$key)
  157. res
  158. }
  159. voter_snapshot_download <- function(year, output_dir = here::here("data-raw/voting")) {
  160. listing <-
  161. get_voter_snapshot_list() |>
  162. dplyr::filter(str_detect(key, paste0("VR_Snapshot_", year)))
  163. if (!nrow(listing)) {
  164. return(NULL)
  165. } else if (nrow(listing) > 1) {
  166. listing <- listing |> dplyr::slice_max(key, n = 1)
  167. }
  168. fs::dir_create(output_dir)
  169. download.file(
  170. listing$url,
  171. fs::path(output_dir, fs::path_file(listing$key))
  172. )
  173. }