| # Migrating to Snowflake ---- | |||||
| # Send the data to the S3 bucket | |||||
| #+ bash | |||||
| # cd data-out | |||||
| # find . -type f -name "*.parquet" -exec aws s3 cp {} s3://dd-ncsboe-cf/ \; | |||||
| # Generate SQL for snowflake ---- | |||||
| library(tidyverse) | |||||
| library(dbplyr) | |||||
| pkgload::load_all() | |||||
| files <- fs::dir_ls("data-out", recurse = TRUE, glob = "*.parquet") | |||||
| con <- duckdb_global_con() | |||||
| data <- | |||||
| map(files, \(x) { | |||||
| arrow::read_parquet(x) |> | |||||
| slice_head(n = 1) |> | |||||
| mutate(across(where(is.factor), as.character)) |> | |||||
| copy_to(dest = con, fs::path_ext_remove(fs::path_file(x))) | |||||
| }) |> | |||||
| set_names(nm = \(x) fs::path_file(fs::path_dir(x))) | |||||
| sql <- db_collect( | |||||
| con, | |||||
| sql("SELECT sql FROM duckdb_tables()") | |||||
| ) | |||||
| glue::glue_data(sql, "\n{sql}\n") | |||||
| # https://docs.snowflake.com/en/user-guide/data-load-s3 | |||||
| # copy into cities | |||||
| # from (select $1:continent::varchar, | |||||
| # $1:country:name::varchar, | |||||
| # $1:country:city::variant | |||||
| # from @sf_tut_stage/cities.parquet); | |||||
| tibble( | |||||
| file_name = fs::path_file(files), | |||||
| table_name = fs::path_ext_remove(fs::path_file(files)) | |||||
| ) |> | |||||
| mutate( | |||||
| colnames = map_chr( | |||||
| table_name, | |||||
| \(x) paste(sprintf("$1:%s", colnames(data[[x]])), collapse = ",\n ") | |||||
| ) | |||||
| ) |> | |||||
| glue::glue_data(" | |||||
| COPY INTO {table_name} | |||||
| FROM ( | |||||
| SELECT {colnames} | |||||
| FROM @FINANCE_PARQUET/{file_name} (FILE_FORMAT => \"PARQUETFORMAT\") | |||||
| ); | |||||
| ") |> | |||||
| glue::glue_collapse(sep = "\n\n") |> | |||||
| clipr::write_clip() |