| @@ -0,0 +1,60 @@ | |||
| # Migrating to Snowflake ---- | |||
| # Send the data to the S3 bucket | |||
| #+ bash | |||
| # cd data-out | |||
| # find . -type f -name "*.parquet" -exec aws s3 cp {} s3://dd-ncsboe-cf/ \; | |||
| # Generate SQL for snowflake ---- | |||
| library(tidyverse) | |||
| library(dbplyr) | |||
| pkgload::load_all() | |||
| files <- fs::dir_ls("data-out", recurse = TRUE, glob = "*.parquet") | |||
| con <- duckdb_global_con() | |||
| data <- | |||
| map(files, \(x) { | |||
| arrow::read_parquet(x) |> | |||
| slice_head(n = 1) |> | |||
| mutate(across(where(is.factor), as.character)) |> | |||
| copy_to(dest = con, fs::path_ext_remove(fs::path_file(x))) | |||
| }) |> | |||
| set_names(nm = \(x) fs::path_file(fs::path_dir(x))) | |||
| sql <- db_collect( | |||
| con, | |||
| sql("SELECT sql FROM duckdb_tables()") | |||
| ) | |||
| glue::glue_data(sql, "\n{sql}\n") | |||
| # https://docs.snowflake.com/en/user-guide/data-load-s3 | |||
| # copy into cities | |||
| # from (select $1:continent::varchar, | |||
| # $1:country:name::varchar, | |||
| # $1:country:city::variant | |||
| # from @sf_tut_stage/cities.parquet); | |||
| tibble( | |||
| file_name = fs::path_file(files), | |||
| table_name = fs::path_ext_remove(fs::path_file(files)) | |||
| ) |> | |||
| mutate( | |||
| colnames = map_chr( | |||
| table_name, | |||
| \(x) paste(sprintf("$1:%s", colnames(data[[x]])), collapse = ",\n ") | |||
| ) | |||
| ) |> | |||
| glue::glue_data(" | |||
| COPY INTO {table_name} | |||
| FROM ( | |||
| SELECT {colnames} | |||
| FROM @FINANCE_PARQUET/{file_name} (FILE_FORMAT => \"PARQUETFORMAT\") | |||
| ); | |||
| ") |> | |||
| glue::glue_collapse(sep = "\n\n") |> | |||
| clipr::write_clip() | |||