Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

197 linhas
5.7KB

  1. voter_statewide_download <- function(output_dir = here::here("../data-raw/voters")) {
  2. url <- "https://s3.amazonaws.com/dl.ncsbe.gov/data/ncvoter_Statewide.zip"
  3. fs::dir_create(output_dir)
  4. path <- fs::path(output_dir, fs::path_file(url))
  5. download.file(url, path)
  6. withr::with_dir(output_dir, {
  7. zip::unzip(fs::path_file(url))
  8. })
  9. invisible(fs::path_ext_set(path, "txt"))
  10. }
  11. voter_statewide_convert_parquet <- function(path) {
  12. path <- fs::path_norm(path)
  13. path_out <- path("data-out", "voters", "voters.parquet")
  14. dir_create(path_dir(path_out))
  15. x <- readr::read_tsv(path, col_types = voter_statewide_spec())
  16. for (col in c("res_street_address", "mail_addr1")) {
  17. x[[col]] <- iconv(x[[col]], "UTF-8", "UTF-8")
  18. }
  19. arrow::write_parquet(x, path_out)
  20. invisible(path_out)
  21. }
  22. voter_statewide_spec <- function() {
  23. col_state_abbr <- col_factor(
  24. levels = c(
  25. state.abb,
  26. "AP", "DC", "GU", "MP", "NO", "OC", "PR", "UN", "VI"
  27. )
  28. )
  29. codes_status <- c(
  30. "A" = "ACTIVE",
  31. "D" = "DENIED",
  32. "I" = "INACTIVE",
  33. "R" = "REMOVED",
  34. "S" = "TEMPORARY"
  35. )
  36. codes_race <- c(
  37. "A" = "ASIAN",
  38. "B" = "BLACK or AFRICAN AMERICAN",
  39. "I" = "AMERICAN INDIAN or ALASKA NATIVE",
  40. "M" = "TWO or MORE RACES ",
  41. "O" = "OTHER",
  42. "P" = "NATIVE HAWAIIAN or PACIFIC ISLANDER",
  43. "U" = "UNDESIGNATED",
  44. "W" = "WHITE"
  45. )
  46. codes_ethnic <- c(
  47. "HL" = "HISPANIC or LATINO",
  48. "NL" = "NOT HISPANIC or NOT LATINO",
  49. "UN" = "UNDESIGNATED"
  50. )
  51. codes_gender <- c(
  52. "F" = "FEMALE",
  53. "M" = "MALE",
  54. "U" = "UNDESIGNATED"
  55. )
  56. codes_reason <- c(
  57. "AV" = "VERIFIED",
  58. "IN" = "CONFIRMATION NOT RETURNED",
  59. "RD" = "DECEASED",
  60. "IU" = "CONFIRMATION RETURNED UNDELIVERABLE",
  61. "DU" = "VERIFICATION RETURNED UNDELIVERABLE",
  62. "RM" = "REMOVED AFTER 2 FED GENERAL ELECTIONS IN INACTIVE STATUS",
  63. "RL" = "MOVED FROM COUNTY",
  64. "RS" = "MOVED FROM STATE",
  65. "A2" = "CONFIRMATION PENDING",
  66. "AP" = "VERIFICATION PENDING",
  67. "DI" = "UNAVAILABLE ESSENTIAL INFORMATION",
  68. "RF" = "FELONY CONVICTION",
  69. "RH" = "MOVED WITHIN STATE",
  70. "RQ" = "REQUEST FROM VOTER",
  71. "SO" = "OVERSEAS CITIZEN",
  72. "SM" = "MILITARY",
  73. "RT" = "TEMPORARY REGISTRANT",
  74. "RA" = "ADMINISTRATIVE",
  75. "A1" = "UNVERIFIED"
  76. )
  77. cols(
  78. county_id = col_character(),
  79. county_desc = col_character(),
  80. voter_reg_num = col_character(),
  81. ncid = col_character(),
  82. last_name = col_character(),
  83. first_name = col_character(),
  84. middle_name = col_character(),
  85. name_suffix_lbl = col_character(),
  86. status_cd = col_factor(names(codes_status), ordered = TRUE),
  87. voter_status_desc = col_factor(unname(codes_status), ordered = TRUE),
  88. reason_cd = col_factor(names(codes_reason)),
  89. voter_status_reason_desc = col_factor(unname(codes_reason)),
  90. res_street_address = col_character(),
  91. res_city_desc = col_character(),
  92. state_cd = col_state_abbr,
  93. zip_code = col_character(),
  94. mail_addr1 = col_character(),
  95. mail_addr2 = col_character(),
  96. mail_addr3 = col_character(),
  97. mail_addr4 = col_character(),
  98. mail_city = col_character(),
  99. mail_state = col_state_abbr,
  100. mail_zipcode = col_character(),
  101. full_phone_number = col_character(),
  102. confidential_ind = col_character(),
  103. registr_dt = col_date(format = "%m/%d/%Y"),
  104. race_code = col_factor(names(codes_race)),
  105. ethnic_code = col_factor(names(codes_ethnic)),
  106. party_cd = col_factor(),
  107. gender_code = col_factor(names(codes_gender)),
  108. birth_year = col_integer(),
  109. age_at_year_end = col_integer(),
  110. birth_state = col_state_abbr,
  111. drivers_lic = col_character(),
  112. precinct_abbrv = col_factor(),
  113. precinct_desc = col_character(),
  114. municipality_abbrv = col_factor(),
  115. municipality_desc = col_character(),
  116. ward_abbrv = col_factor(),
  117. ward_desc = col_character(),
  118. cong_dist_abbrv = col_factor(),
  119. super_court_abbrv = col_factor(),
  120. judic_dist_abbrv = col_factor(),
  121. nc_senate_abbrv = col_factor(),
  122. nc_house_abbrv = col_factor(),
  123. county_commiss_abbrv = col_factor(),
  124. county_commiss_desc = col_character(),
  125. township_abbrv = col_factor(),
  126. township_desc = col_character(),
  127. school_dist_abbrv = col_factor(),
  128. school_dist_desc = col_character(),
  129. fire_dist_abbrv = col_factor(),
  130. fire_dist_desc = col_character(),
  131. water_dist_abbrv = col_factor(),
  132. water_dist_desc = col_character(),
  133. sewer_dist_abbrv = col_factor(),
  134. sewer_dist_desc = col_character(),
  135. sanit_dist_abbrv = col_factor(),
  136. sanit_dist_desc = col_character(),
  137. rescue_dist_abbrv = col_factor(),
  138. rescue_dist_desc = col_character(),
  139. munic_dist_abbrv = col_factor(),
  140. munic_dist_desc = col_character(),
  141. dist_1_abbrv = col_factor(),
  142. dist_1_desc = col_character(),
  143. vtd_abbrv = col_factor(),
  144. vtd_desc = col_character()
  145. )
  146. }
  147. voter_snapshot_list <- function() {
  148. url <- "https://s3.amazonaws.com/dl.ncsbe.gov/?delimiter=/&prefix=data/Snapshots/"
  149. res <-
  150. httr2::request(url) |>
  151. httr2::req_perform() |>
  152. httr2::resp_body_xml() |>
  153. xml2::as_list()
  154. res <- res$ListBucketResult
  155. res <- res[which(names(res) == "Contents")]
  156. res <- map(res, unlist)
  157. res <- dplyr::bind_rows(res)
  158. res <- janitor::clean_names(res)
  159. res$size <- rlang::as_bytes(as.integer(res$size))
  160. res$url <- paste0("https://s3.amazonaws.com/dl.ncsbe.gov/", res$key)
  161. res
  162. }
  163. voter_snapshot_download <- function(year, output_dir = here::here("data-raw/voting")) {
  164. listing <-
  165. get_voter_snapshot_list() |>
  166. dplyr::filter(str_detect(key, paste0("VR_Snapshot_", year)))
  167. if (!nrow(listing)) {
  168. return(NULL)
  169. } else if (nrow(listing) > 1) {
  170. listing <- listing |> dplyr::slice_max(key, n = 1)
  171. }
  172. fs::dir_create(output_dir)
  173. download.file(
  174. listing$url,
  175. fs::path(output_dir, fs::path_file(listing$key))
  176. )
  177. }