# Migrating to Snowflake ---- # Send the data to the S3 bucket #+ bash # cd data-out # find . -type f -name "*.parquet" -exec aws s3 cp {} s3://dd-ncsboe-cf/ \; # Generate SQL for snowflake ---- library(tidyverse) library(dbplyr) pkgload::load_all() files <- fs::dir_ls("data-out", recurse = TRUE, glob = "*.parquet") con <- duckdb_global_con() data <- map(files, \(x) { arrow::read_parquet(x) |> slice_head(n = 1) |> mutate(across(where(is.factor), as.character)) |> copy_to(dest = con, fs::path_ext_remove(fs::path_file(x))) }) |> set_names(nm = \(x) fs::path_file(fs::path_dir(x))) sql <- db_collect( con, sql("SELECT sql FROM duckdb_tables()") ) glue::glue_data(sql, "\n{sql}\n") # https://docs.snowflake.com/en/user-guide/data-load-s3 # copy into cities # from (select $1:continent::varchar, # $1:country:name::varchar, # $1:country:city::variant # from @sf_tut_stage/cities.parquet); tibble( file_name = fs::path_file(files), table_name = fs::path_ext_remove(fs::path_file(files)) ) |> mutate( colnames = map_chr( table_name, \(x) paste(sprintf("$1:%s", colnames(data[[x]])), collapse = ",\n ") ) ) |> glue::glue_data(" COPY INTO {table_name} FROM ( SELECT {colnames} FROM @FINANCE_PARQUET/{file_name} (FILE_FORMAT => \"PARQUETFORMAT\") ); ") |> glue::glue_collapse(sep = "\n\n") |> clipr::write_clip()