|
- prepare_addresses_lookup_db <- function(
- addresses,
- path_address_db = "../data-prep/address_lookup.sqlite"
- ) {
- if (!fs::file_exists(path_address_db)) {
- prepare_addresses_create_db(path_address_db)
- }
-
- con <- DBI::dbConnect(RSQLite::SQLite(), path_address_db)
- withr::defer(DBI::dbDisconnect(con))
- db <- dplyr::tbl(con, "resolved")
-
- seen <- db |> dplyr::pull(address)
-
- if (length(seen)) {
- addresses <- setdiff(trimws(addresses), seen)
- }
-
- if (!length(addresses)) {
- cli::cli_alert_success("All addresses have been geocoded")
- return(path_address_db)
- }
-
- cli::cli_inform("Geocoding {length(addresses)} addresses")
-
- blocks <- seq(1, length(addresses), by = 5000L)
- if (blocks[length(blocks)] != length(addresses)) {
- blocks <- c(blocks, length(addresses) + 1L)
- }
- blocks <- blocks - 1L
- for (i in seq_along(blocks)[-1]) {
- start <- blocks[i - 1] + 1
- end <- blocks[i]
-
- resolved <-
- tidygeocoder::geo(
- addresses[start:end],
- method = "census",
- full_results = TRUE
- ) |>
- dplyr::select(-input_address)
-
- dplyr::rows_upsert(db, resolved, by = "address", in_place = TRUE, copy = TRUE)
- }
-
- return(path_address_db)
- }
-
- prepare_addresses_create_db <- function(path_address_db) {
- sql <- "CREATE TABLE `resolved` (
- `address` TEXT UNIQUE,
- `lat` REAL,
- `long` REAL,
- `id` INTEGER,
- `match_indicator` TEXT,
- `match_type` TEXT,
- `matched_address` TEXT,
- `tiger_line_id` INTEGER,
- `tiger_side` TEXT
- )"
-
- con <- DBI::dbConnect(RSQLite::SQLite(), path_address_db)
- DBI::dbExecute(con, sql)
- DBI::dbDisconnect(con)
-
- invisible(path_address_db)
- }
|