garrick
/
nc-campaign-finance


			
							# Migrating to Snowflake ----

# Send the data to the S3 bucket
#+ bash
# cd data-out
# find . -type f -name "*.parquet" -exec aws s3 cp {} s3://dd-ncsboe-cf/ \;

# Generate SQL for snowflake ----

library(tidyverse)
library(dbplyr)
pkgload::load_all()

files <- fs::dir_ls("data-out", recurse = TRUE, glob = "*.parquet")

con <- duckdb_global_con()

data <-
  map(files, \(x) {
    arrow::read_parquet(x) |>
      slice_head(n = 1) |>
      mutate(across(where(is.factor), as.character)) |>
      copy_to(dest = con, fs::path_ext_remove(fs::path_file(x)))
  }) |>
 set_names(nm = \(x) fs::path_file(fs::path_dir(x)))


sql <- db_collect(
  con,
  sql("SELECT sql FROM duckdb_tables()")
)

glue::glue_data(sql, "\n{sql}\n")

# https://docs.snowflake.com/en/user-guide/data-load-s3
# copy into cities
#  from (select $1:continent::varchar,
#               $1:country:name::varchar,
#               $1:country:city::variant
#       from @sf_tut_stage/cities.parquet);

tibble(
  file_name = fs::path_file(files),
  table_name = fs::path_ext_remove(fs::path_file(files))
) |>
  mutate(
    colnames = map_chr(
      table_name,
      \(x) paste(sprintf("$1:%s", colnames(data[[x]])), collapse = ",\n         ")
    )
  ) |>
  glue::glue_data("
COPY INTO {table_name}
FROM (
  SELECT {colnames}
  FROM @FINANCE_PARQUET/{file_name} (FILE_FORMAT => \"PARQUETFORMAT\")
);
") |>
  glue::glue_collapse(sep = "\n\n") |>
  clipr::write_clip()