Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

61 lines
1.4KB

  1. # Migrating to Snowflake ----
  2. # Send the data to the S3 bucket
  3. #+ bash
  4. # cd data-out
  5. # find . -type f -name "*.parquet" -exec aws s3 cp {} s3://dd-ncsboe-cf/ \;
  6. # Generate SQL for snowflake ----
  7. library(tidyverse)
  8. library(dbplyr)
  9. pkgload::load_all()
  10. files <- fs::dir_ls("data-out", recurse = TRUE, glob = "*.parquet")
  11. con <- duckdb_global_con()
  12. data <-
  13. map(files, \(x) {
  14. arrow::read_parquet(x) |>
  15. slice_head(n = 1) |>
  16. mutate(across(where(is.factor), as.character)) |>
  17. copy_to(dest = con, fs::path_ext_remove(fs::path_file(x)))
  18. }) |>
  19. set_names(nm = \(x) fs::path_file(fs::path_dir(x)))
  20. sql <- db_collect(
  21. con,
  22. sql("SELECT sql FROM duckdb_tables()")
  23. )
  24. glue::glue_data(sql, "\n{sql}\n")
  25. # https://docs.snowflake.com/en/user-guide/data-load-s3
  26. # copy into cities
  27. # from (select $1:continent::varchar,
  28. # $1:country:name::varchar,
  29. # $1:country:city::variant
  30. # from @sf_tut_stage/cities.parquet);
  31. tibble(
  32. file_name = fs::path_file(files),
  33. table_name = fs::path_ext_remove(fs::path_file(files))
  34. ) |>
  35. mutate(
  36. colnames = map_chr(
  37. table_name,
  38. \(x) paste(sprintf("$1:%s", colnames(data[[x]])), collapse = ",\n ")
  39. )
  40. ) |>
  41. glue::glue_data("
  42. COPY INTO {table_name}
  43. FROM (
  44. SELECT {colnames}
  45. FROM @FINANCE_PARQUET/{file_name} (FILE_FORMAT => \"PARQUETFORMAT\")
  46. );
  47. ") |>
  48. glue::glue_collapse(sep = "\n\n") |>
  49. clipr::write_clip()