ソースを参照

add delivery script

main
コミット
5e1312d55b
この署名に対応する既知のキーがデータベースに存在しません
1個のファイルの変更60行の追加0行の削除
  1. +60
    -0
      process/inst/deliver.R

+ 60
- 0
process/inst/deliver.R ファイルの表示

@@ -0,0 +1,60 @@
# Migrating to Snowflake ----

# Send the data to the S3 bucket
#+ bash
# cd data-out
# find . -type f -name "*.parquet" -exec aws s3 cp {} s3://dd-ncsboe-cf/ \;

# Generate SQL for snowflake ----

library(tidyverse)
library(dbplyr)
pkgload::load_all()

files <- fs::dir_ls("data-out", recurse = TRUE, glob = "*.parquet")

con <- duckdb_global_con()

data <-
map(files, \(x) {
arrow::read_parquet(x) |>
slice_head(n = 1) |>
mutate(across(where(is.factor), as.character)) |>
copy_to(dest = con, fs::path_ext_remove(fs::path_file(x)))
}) |>
set_names(nm = \(x) fs::path_file(fs::path_dir(x)))


sql <- db_collect(
con,
sql("SELECT sql FROM duckdb_tables()")
)

glue::glue_data(sql, "\n{sql}\n")

# https://docs.snowflake.com/en/user-guide/data-load-s3
# copy into cities
# from (select $1:continent::varchar,
# $1:country:name::varchar,
# $1:country:city::variant
# from @sf_tut_stage/cities.parquet);

tibble(
file_name = fs::path_file(files),
table_name = fs::path_ext_remove(fs::path_file(files))
) |>
mutate(
colnames = map_chr(
table_name,
\(x) paste(sprintf("$1:%s", colnames(data[[x]])), collapse = ",\n ")
)
) |>
glue::glue_data("
COPY INTO {table_name}
FROM (
SELECT {colnames}
FROM @FINANCE_PARQUET/{file_name} (FILE_FORMAT => \"PARQUETFORMAT\")
);
") |>
glue::glue_collapse(sep = "\n\n") |>
clipr::write_clip()

読み込み中…
キャンセル
保存