2年前 · 5e1312d55b
--- a/process/inst/deliver.R
+++ b/process/inst/deliver.R
@@ -0,0 +1,60 @@
 # Migrating to Snowflake ----

 # Send the data to the S3 bucket
 #+ bash
 # cd data-out
 # find . -type f -name "*.parquet" -exec aws s3 cp {} s3://dd-ncsboe-cf/ \;

 # Generate SQL for snowflake ----

 library(tidyverse)
 library(dbplyr)
 pkgload::load_all()

 files <- fs::dir_ls("data-out", recurse = TRUE, glob = "*.parquet")

 con <- duckdb_global_con()

 data <-
  map(files, \(x) {
    arrow::read_parquet(x) |>
      slice_head(n = 1) |>
      mutate(across(where(is.factor), as.character)) |>
      copy_to(dest = con, fs::path_ext_remove(fs::path_file(x)))
  }) |>
 set_names(nm = \(x) fs::path_file(fs::path_dir(x)))


 sql <- db_collect(
  con,
  sql("SELECT sql FROM duckdb_tables()")
 )

 glue::glue_data(sql, "\n{sql}\n")

 # https://docs.snowflake.com/en/user-guide/data-load-s3
 # copy into cities
 #  from (select $1:continent::varchar,
 #               $1:country:name::varchar,
 #               $1:country:city::variant
 #       from @sf_tut_stage/cities.parquet);

 tibble(
  file_name = fs::path_file(files),
  table_name = fs::path_ext_remove(fs::path_file(files))
 ) |>
  mutate(
    colnames = map_chr(
      table_name,
      \(x) paste(sprintf("$1:%s", colnames(data[[x]])), collapse = ",\n         ")
    )
  ) |>
  glue::glue_data("
 COPY INTO {table_name}
 FROM (
  SELECT {colnames}
  FROM @FINANCE_PARQUET/{file_name} (FILE_FORMAT => \"PARQUETFORMAT\")
 );
 ") |>
  glue::glue_collapse(sep = "\n\n") |>
  clipr::write_clip()