|
- # Migrating to Snowflake ----
-
- # Send the data to the S3 bucket
- #+ bash
- # cd data-out
- # find . -type f -name "*.parquet" -exec aws s3 cp {} s3://dd-ncsboe-cf/ \;
-
- # Generate SQL for snowflake ----
-
- library(tidyverse)
- library(dbplyr)
- pkgload::load_all()
-
- files <- fs::dir_ls("data-out", recurse = TRUE, glob = "*.parquet")
-
- con <- duckdb_global_con()
-
- data <-
- map(files, \(x) {
- arrow::read_parquet(x) |>
- slice_head(n = 1) |>
- mutate(across(where(is.factor), as.character)) |>
- copy_to(dest = con, fs::path_ext_remove(fs::path_file(x)))
- }) |>
- set_names(nm = \(x) fs::path_file(fs::path_dir(x)))
-
-
- sql <- db_collect(
- con,
- sql("SELECT sql FROM duckdb_tables()")
- )
-
- glue::glue_data(sql, "\n{sql}\n")
-
- # https://docs.snowflake.com/en/user-guide/data-load-s3
- # copy into cities
- # from (select $1:continent::varchar,
- # $1:country:name::varchar,
- # $1:country:city::variant
- # from @sf_tut_stage/cities.parquet);
-
- tibble(
- file_name = fs::path_file(files),
- table_name = fs::path_ext_remove(fs::path_file(files))
- ) |>
- mutate(
- colnames = map_chr(
- table_name,
- \(x) paste(sprintf("$1:%s", colnames(data[[x]])), collapse = ",\n ")
- )
- ) |>
- glue::glue_data("
- COPY INTO {table_name}
- FROM (
- SELECT {colnames}
- FROM @FINANCE_PARQUET/{file_name} (FILE_FORMAT => \"PARQUETFORMAT\")
- );
- ") |>
- glue::glue_collapse(sep = "\n\n") |>
- clipr::write_clip()
|