You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

194 lines
5.6KB

  1. voter_statewide_download <- function(output_dir = here::here("../data-raw/voters")) {
  2. url <- "https://s3.amazonaws.com/dl.ncsbe.gov/data/ncvoter_Statewide.zip"
  3. fs::dir_create(output_dir)
  4. path <- fs::path(output_dir, fs::path_file(url))
  5. download.file(url, path)
  6. withr::with_dir(output_dir, {
  7. zip::unzip(fs::path_file(url))
  8. })
  9. invisible(fs::path_ext_set(path, "txt"))
  10. }
  11. voter_statewide_convert_parquet <- function(path) {
  12. path <- fs::path_norm(path)
  13. path_out <- path("data-out", "voters", "voters.parquet")
  14. dir_create(path_dir(path_out))
  15. x <- readr::read_tsv(path, col_types = voter_statewide_spec())
  16. arrow::write_parquet(x, path_out)
  17. invisible(path_out)
  18. }
  19. voter_statewide_spec <- function() {
  20. col_state_abbr <- col_factor(
  21. levels = c(
  22. state.abb,
  23. "AP", "DC", "GU", "MP", "NO", "OC", "PR", "UN", "VI"
  24. )
  25. )
  26. codes_status <- c(
  27. "A" = "ACTIVE",
  28. "D" = "DENIED",
  29. "I" = "INACTIVE",
  30. "R" = "REMOVED",
  31. "S" = "TEMPORARY"
  32. )
  33. codes_race <- c(
  34. "A" = "ASIAN",
  35. "B" = "BLACK or AFRICAN AMERICAN",
  36. "I" = "AMERICAN INDIAN or ALASKA NATIVE",
  37. "M" = "TWO or MORE RACES ",
  38. "O" = "OTHER",
  39. "P" = "NATIVE HAWAIIAN or PACIFIC ISLANDER",
  40. "U" = "UNDESIGNATED",
  41. "W" = "WHITE"
  42. )
  43. codes_ethnic <- c(
  44. "HL" = "HISPANIC or LATINO",
  45. "NL" = "NOT HISPANIC or NOT LATINO",
  46. "UN" = "UNDESIGNATED"
  47. )
  48. codes_gender <- c(
  49. "F" = "FEMALE",
  50. "M" = "MALE",
  51. "U" = "UNDESIGNATED"
  52. )
  53. codes_reason <- c(
  54. "AV" = "VERIFIED",
  55. "IN" = "CONFIRMATION NOT RETURNED",
  56. "RD" = "DECEASED",
  57. "IU" = "CONFIRMATION RETURNED UNDELIVERABLE",
  58. "DU" = "VERIFICATION RETURNED UNDELIVERABLE",
  59. "RM" = "REMOVED AFTER 2 FED GENERAL ELECTIONS IN INACTIVE STATUS",
  60. "RL" = "MOVED FROM COUNTY",
  61. "RS" = "MOVED FROM STATE",
  62. "A2" = "CONFIRMATION PENDING",
  63. "AP" = "VERIFICATION PENDING",
  64. "DI" = "UNAVAILABLE ESSENTIAL INFORMATION",
  65. "RF" = "FELONY CONVICTION",
  66. "RH" = "MOVED WITHIN STATE",
  67. "RQ" = "REQUEST FROM VOTER",
  68. "SO" = "OVERSEAS CITIZEN",
  69. "SM" = "MILITARY",
  70. "RT" = "TEMPORARY REGISTRANT",
  71. "RA" = "ADMINISTRATIVE",
  72. "A1" = "UNVERIFIED"
  73. )
  74. cols(
  75. county_id = col_character(),
  76. county_desc = col_character(),
  77. voter_reg_num = col_character(),
  78. ncid = col_character(),
  79. last_name = col_character(),
  80. first_name = col_character(),
  81. middle_name = col_character(),
  82. name_suffix_lbl = col_character(),
  83. status_cd = col_factor(names(codes_status), ordered = TRUE),
  84. voter_status_desc = col_factor(unname(codes_status), ordered = TRUE),
  85. reason_cd = col_factor(names(codes_reason)),
  86. voter_status_reason_desc = col_factor(unname(codes_reason)),
  87. res_street_address = col_character(),
  88. res_city_desc = col_character(),
  89. state_cd = col_state_abbr,
  90. zip_code = col_character(),
  91. mail_addr1 = col_character(),
  92. mail_addr2 = col_character(),
  93. mail_addr3 = col_character(),
  94. mail_addr4 = col_character(),
  95. mail_city = col_character(),
  96. mail_state = col_state_abbr,
  97. mail_zipcode = col_character(),
  98. full_phone_number = col_character(),
  99. confidential_ind = col_character(),
  100. registr_dt = col_date(format = "%m/%d/%Y"),
  101. race_code = col_factor(names(codes_race)),
  102. ethnic_code = col_factor(names(codes_ethnic)),
  103. party_cd = col_factor(),
  104. gender_code = col_factor(names(codes_gender)),
  105. birth_year = col_integer(),
  106. age_at_year_end = col_integer(),
  107. birth_state = col_state_abbr,
  108. drivers_lic = col_character(),
  109. precinct_abbrv = col_factor(),
  110. precinct_desc = col_character(),
  111. municipality_abbrv = col_factor(),
  112. municipality_desc = col_character(),
  113. ward_abbrv = col_factor(),
  114. ward_desc = col_character(),
  115. cong_dist_abbrv = col_factor(),
  116. super_court_abbrv = col_factor(),
  117. judic_dist_abbrv = col_factor(),
  118. nc_senate_abbrv = col_factor(),
  119. nc_house_abbrv = col_factor(),
  120. county_commiss_abbrv = col_factor(),
  121. county_commiss_desc = col_character(),
  122. township_abbrv = col_factor(),
  123. township_desc = col_character(),
  124. school_dist_abbrv = col_factor(),
  125. school_dist_desc = col_character(),
  126. fire_dist_abbrv = col_factor(),
  127. fire_dist_desc = col_character(),
  128. water_dist_abbrv = col_factor(),
  129. water_dist_desc = col_character(),
  130. sewer_dist_abbrv = col_factor(),
  131. sewer_dist_desc = col_character(),
  132. sanit_dist_abbrv = col_factor(),
  133. sanit_dist_desc = col_character(),
  134. rescue_dist_abbrv = col_factor(),
  135. rescue_dist_desc = col_character(),
  136. munic_dist_abbrv = col_factor(),
  137. munic_dist_desc = col_character(),
  138. dist_1_abbrv = col_factor(),
  139. dist_1_desc = col_character(),
  140. vtd_abbrv = col_factor(),
  141. vtd_desc = col_character()
  142. )
  143. }
  144. voter_snapshot_list <- function() {
  145. url <- "https://s3.amazonaws.com/dl.ncsbe.gov/?delimiter=/&prefix=data/Snapshots/"
  146. res <-
  147. httr2::request(url) |>
  148. httr2::req_perform() |>
  149. httr2::resp_body_xml() |>
  150. xml2::as_list()
  151. res <- res$ListBucketResult
  152. res <- res[which(names(res) == "Contents")]
  153. res <- map(res, unlist)
  154. res <- dplyr::bind_rows(res)
  155. res <- janitor::clean_names(res)
  156. res$size <- rlang::as_bytes(as.integer(res$size))
  157. res$url <- paste0("https://s3.amazonaws.com/dl.ncsbe.gov/", res$key)
  158. res
  159. }
  160. voter_snapshot_download <- function(year, output_dir = here::here("data-raw/voting")) {
  161. listing <-
  162. get_voter_snapshot_list() |>
  163. dplyr::filter(str_detect(key, paste0("VR_Snapshot_", year)))
  164. if (!nrow(listing)) {
  165. return(NULL)
  166. } else if (nrow(listing) > 1) {
  167. listing <- listing |> dplyr::slice_max(key, n = 1)
  168. }
  169. fs::dir_create(output_dir)
  170. download.file(
  171. listing$url,
  172. fs::path(output_dir, fs::path_file(listing$key))
  173. )
  174. }