Pārlūkot izejas kodu

add snowflake queries

main
Garrick Aden-Buie pirms 2 gadiem
vecāks
revīzija
0f340597d4
Šim parakstam datu bāzē netika atrasta zināma atslēga
3 mainītis faili ar 327 papildinājumiem un 0 dzēšanām
  1. +16
    -0
      process/inst/snowflake/01-create-tables.sql
  2. +6
    -0
      process/inst/snowflake/02-create-external-stage.sql
  3. +305
    -0
      process/inst/snowflake/03-copy-from-s3.sql

+ 16
- 0
process/inst/snowflake/01-create-tables.sql Parādīt failu

@@ -0,0 +1,16 @@
CREATE OR REPLACE TABLE voters(county_id VARCHAR, county_desc VARCHAR, voter_reg_num VARCHAR, ncid VARCHAR, last_name VARCHAR, first_name VARCHAR, middle_name VARCHAR, name_suffix_lbl VARCHAR, status_cd VARCHAR, voter_status_desc VARCHAR, reason_cd VARCHAR, voter_status_reason_desc VARCHAR, res_street_address VARCHAR, res_city_desc VARCHAR, state_cd VARCHAR, zip_code VARCHAR, mail_addr1 VARCHAR, mail_addr2 VARCHAR, mail_addr3 VARCHAR, mail_addr4 VARCHAR, mail_city VARCHAR, mail_state VARCHAR, mail_zipcode VARCHAR, full_phone_number VARCHAR, confidential_ind VARCHAR, registr_dt DATE, race_code VARCHAR, ethnic_code VARCHAR, party_cd VARCHAR, gender_code VARCHAR, birth_year INTEGER, age_at_year_end INTEGER, birth_state VARCHAR, drivers_lic VARCHAR, precinct_abbrv VARCHAR, precinct_desc VARCHAR, municipality_abbrv VARCHAR, municipality_desc VARCHAR, ward_abbrv VARCHAR, ward_desc VARCHAR, cong_dist_abbrv VARCHAR, super_court_abbrv VARCHAR, judic_dist_abbrv VARCHAR, nc_senate_abbrv VARCHAR, nc_house_abbrv VARCHAR, county_commiss_abbrv VARCHAR, county_commiss_desc VARCHAR, township_abbrv VARCHAR, township_desc VARCHAR, school_dist_abbrv VARCHAR, school_dist_desc VARCHAR, fire_dist_abbrv VARCHAR, fire_dist_desc VARCHAR, water_dist_abbrv VARCHAR, water_dist_desc VARCHAR, sewer_dist_abbrv VARCHAR, sewer_dist_desc VARCHAR, sanit_dist_abbrv VARCHAR, sanit_dist_desc VARCHAR, rescue_dist_abbrv VARCHAR, rescue_dist_desc VARCHAR, munic_dist_abbrv VARCHAR, munic_dist_desc VARCHAR, dist_1_abbrv VARCHAR, dist_1_desc VARCHAR, vtd_abbrv VARCHAR, vtd_desc VARCHAR);
CREATE OR REPLACE TABLE reports("year" INTEGER, doc_name VARCHAR, doc_order INTEGER, sboe_id VARCHAR, report_id INTEGER, amended BOOLEAN, image_id VARCHAR, received_image DATE, received_data DATE, start_date DATE, end_date DATE, sboe_start_date DATE, sboe_end_date DATE, cover_start_date DATE, cover_end_date DATE, cover_date_filed DATE);
CREATE OR REPLACE TABLE receipts_payer(payer_id DOUBLE, org_name VARCHAR, is_org BOOLEAN, is_us BOOLEAN, profession VARCHAR, employers_name VARCHAR, street_1 VARCHAR, city VARCHAR, state VARCHAR, full_zip VARCHAR, country_name VARCHAR, address_lookup VARCHAR);
CREATE OR REPLACE TABLE receipts(sboe_id VARCHAR, report_id INTEGER, payer_id DOUBLE, group_id INTEGER, occur_date DATE, amount DOUBLE, sum_to_date DOUBLE, is_aggregated BOOLEAN, receipt_type_desc VARCHAR, receipt_type_code VARCHAR, is_donation BOOLEAN, account_abbr VARCHAR, form_of_payment_desc VARCHAR, is_prior BOOLEAN);
CREATE OR REPLACE TABLE officers(sboe_id VARCHAR, report_id INTEGER, "type" VARCHAR, "name" VARCHAR, address_lookup VARCHAR, phone VARCHAR);
CREATE OR REPLACE TABLE expenses_payee(payee_id DOUBLE, org_name VARCHAR, is_org BOOLEAN, is_us BOOLEAN, profession VARCHAR, employers_name VARCHAR, street_1 VARCHAR, street_2 VARCHAR, city VARCHAR, state VARCHAR, full_zip VARCHAR, country_name VARCHAR, address_lookup VARCHAR);
CREATE OR REPLACE TABLE expenses(sboe_id VARCHAR, report_id INTEGER, payee_id DOUBLE, occur_date DATE, amount DOUBLE, sum_to_date DOUBLE, is_aggregated BOOLEAN, purpose_type_code VARCHAR, purpose VARCHAR, expenditure_type_desc VARCHAR, account_abbr VARCHAR, form_of_payment_desc VARCHAR);
CREATE OR REPLACE TABLE cl_name_on_ballot(candidate_id INTEGER, election_dt DATE, name_on_ballot VARCHAR, first_name VARCHAR, middle_name VARCHAR, last_name VARCHAR, name_suffix_lbl VARCHAR);
CREATE OR REPLACE TABLE cl_elections(election_dt DATE, county_name VARCHAR, contest_name VARCHAR, candidate_id INTEGER, first_name VARCHAR, middle_name VARCHAR, last_name VARCHAR, name_suffix_lbl VARCHAR);
CREATE OR REPLACE TABLE cl_contact(candidate_id INTEGER, election_dt DATE, street VARCHAR, city VARCHAR, state VARCHAR, zip_code VARCHAR, phone VARCHAR, email VARCHAR, address_lookup VARCHAR);
CREATE OR REPLACE TABLE cl_candidates(candidate_id INTEGER, name_on_ballot VARCHAR, first_name VARCHAR, middle_name VARCHAR, last_name VARCHAR, name_suffix_lbl VARCHAR, party_last VARCHAR, party_most VARCHAR, contest_n INTEGER, contest_first DATE, contest_latest DATE, street VARCHAR, city VARCHAR, state VARCHAR, zip_code VARCHAR, phone VARCHAR, email VARCHAR, address_lookup VARCHAR);
CREATE OR REPLACE TABLE addresses(address_lookup VARCHAR, address_resolved VARCHAR, lat DOUBLE, long DOUBLE, match_indicator VARCHAR, match_type VARCHAR, tiger_line_id INTEGER, tiger_side VARCHAR);
CREATE OR REPLACE TABLE cover(report_id INTEGER, sboe_id VARCHAR, committee_name VARCHAR, street_1 VARCHAR, street_2 VARCHAR, city VARCHAR, state VARCHAR, zip_code VARCHAR, country VARCHAR, postal_code VARCHAR, committee_type VARCHAR, report_type VARCHAR, fund_type VARCHAR, fund_name VARCHAR, date_from DATE, date_to DATE, date_filed DATE);
CREATE OR REPLACE TABLE committees(sboe_id VARCHAR, committee_name VARCHAR, report_id INTEGER, street_1 VARCHAR, street_2 VARCHAR, city VARCHAR, state VARCHAR, zip_code VARCHAR, address_lookup VARCHAR, committee_type VARCHAR, fund_type VARCHAR, fund_name VARCHAR);
CREATE OR REPLACE TABLE committee_candidate(sboe_id VARCHAR, candidate_id INTEGER);
CREATE OR REPLACE TABLE cl_party(candidate_id INTEGER, election_dt DATE, party_candidate VARCHAR);

+ 6
- 0
process/inst/snowflake/02-create-external-stage.sql Parādīt failu

@@ -0,0 +1,6 @@
CREATE OR REPLACE FILE FORMAT parquetformat TYPE = 'PARQUET';

CREATE OR REPLACE STAGE FINANCE_PARQUET
FILE_FORMAT = parquetformat
DIRECOTRY = (ENABLE = TRUE)
URL = 's3://dd-ncsboe-cf';

+ 305
- 0
process/inst/snowflake/03-copy-from-s3.sql Parādīt failu

@@ -0,0 +1,305 @@
-- CREATE OR REPLACE FILE FORMAT parquetformat TYPE = parquet;

COPY INTO addresses
FROM (
SELECT $1:address_lookup,
$1:address_resolved,
$1:lat,
$1:long,
$1:match_indicator,
$1:match_type,
$1:tiger_line_id,
$1:tiger_side
FROM @FINANCE_PARQUET/addresses.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO cl_candidates
FROM (
SELECT $1:candidate_id,
$1:name_on_ballot,
$1:first_name,
$1:middle_name,
$1:last_name,
$1:name_suffix_lbl,
$1:party_last,
$1:party_most,
$1:contest_n,
$1:contest_first,
$1:contest_latest,
$1:street,
$1:city,
$1:state,
$1:zip_code,
$1:phone,
$1:email,
$1:address_lookup
FROM @FINANCE_PARQUET/cl_candidates.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO cl_contact
FROM (
SELECT $1:candidate_id,
$1:election_dt,
$1:street,
$1:city,
$1:state,
$1:zip_code,
$1:phone,
$1:email,
$1:address_lookup
FROM @FINANCE_PARQUET/cl_contact.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO cl_elections
FROM (
SELECT $1:election_dt,
$1:county_name,
$1:contest_name,
$1:candidate_id,
$1:first_name,
$1:middle_name,
$1:last_name,
$1:name_suffix_lbl
FROM @FINANCE_PARQUET/cl_elections.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO cl_name_on_ballot
FROM (
SELECT $1:candidate_id,
$1:election_dt,
$1:name_on_ballot,
$1:first_name,
$1:middle_name,
$1:last_name,
$1:name_suffix_lbl
FROM @FINANCE_PARQUET/cl_name_on_ballot.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO cl_party
FROM (
SELECT $1:candidate_id,
$1:election_dt,
$1:party_candidate
FROM @FINANCE_PARQUET/cl_party.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO committee_candidate
FROM (
SELECT $1:sboe_id,
$1:candidate_id
FROM @FINANCE_PARQUET/committee_candidate.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO committees
FROM (
SELECT $1:sboe_id,
$1:committee_name,
$1:report_id,
$1:street_1,
$1:street_2,
$1:city,
$1:state,
$1:zip_code,
$1:address_lookup,
$1:committee_type,
$1:fund_type,
$1:fund_name
FROM @FINANCE_PARQUET/committees.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO cover
FROM (
SELECT $1:report_id,
$1:sboe_id,
$1:committee_name,
$1:street_1,
$1:street_2,
$1:city,
$1:state,
$1:zip_code,
$1:country,
$1:postal_code,
$1:committee_type,
$1:report_type,
$1:fund_type,
$1:fund_name,
$1:date_from,
$1:date_to,
$1:date_filed
FROM @FINANCE_PARQUET/cover.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO expenses
FROM (
SELECT $1:sboe_id,
$1:report_id,
$1:payee_id,
$1:occur_date,
$1:amount,
$1:sum_to_date,
$1:is_aggregated,
$1:purpose_type_code,
$1:purpose,
$1:expenditure_type_desc,
$1:account_abbr,
$1:form_of_payment_desc
FROM @FINANCE_PARQUET/expenses.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO expenses_payee
FROM (
SELECT $1:payee_id,
$1:org_name,
$1:is_org,
$1:is_us,
$1:profession,
$1:employers_name,
$1:street_1,
$1:street_2,
$1:city,
$1:state,
$1:full_zip,
$1:country_name,
$1:address_lookup
FROM @FINANCE_PARQUET/expenses_payee.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO officers
FROM (
SELECT $1:sboe_id,
$1:report_id,
$1:type,
$1:name,
$1:address_lookup,
$1:phone
FROM @FINANCE_PARQUET/officers.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO receipts
FROM (
SELECT $1:sboe_id,
$1:report_id,
$1:payer_id,
$1:group_id,
$1:occur_date,
$1:amount,
$1:sum_to_date,
$1:is_aggregated,
$1:receipt_type_desc,
$1:receipt_type_code,
$1:is_donation,
$1:account_abbr,
$1:form_of_payment_desc,
$1:is_prior
FROM @FINANCE_PARQUET/receipts.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO receipts_payer
FROM (
SELECT $1:payer_id,
$1:org_name,
$1:is_org,
$1:is_us,
$1:profession,
$1:employers_name,
$1:street_1,
$1:city,
$1:state,
$1:full_zip,
$1:country_name,
$1:address_lookup
FROM @FINANCE_PARQUET/receipts_payer.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO reports
FROM (
SELECT $1:year,
$1:doc_name,
$1:doc_order,
$1:sboe_id,
$1:report_id,
$1:amended,
$1:image_id,
$1:received_image,
$1:received_data,
$1:start_date,
$1:end_date,
$1:sboe_start_date,
$1:sboe_end_date,
$1:cover_start_date,
$1:cover_end_date,
$1:cover_date_filed
FROM @FINANCE_PARQUET/reports.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

COPY INTO voters
FROM (
SELECT $1:county_id,
$1:county_desc,
$1:voter_reg_num,
$1:ncid,
$1:last_name,
$1:first_name,
$1:middle_name,
$1:name_suffix_lbl,
$1:status_cd,
$1:voter_status_desc,
$1:reason_cd,
$1:voter_status_reason_desc,
$1:res_street_address,
$1:res_city_desc,
$1:state_cd,
$1:zip_code,
$1:mail_addr1,
$1:mail_addr2,
$1:mail_addr3,
$1:mail_addr4,
$1:mail_city,
$1:mail_state,
$1:mail_zipcode,
$1:full_phone_number,
$1:confidential_ind,
$1:registr_dt,
$1:race_code,
$1:ethnic_code,
$1:party_cd,
$1:gender_code,
$1:birth_year,
$1:age_at_year_end,
$1:birth_state,
$1:drivers_lic,
$1:precinct_abbrv,
$1:precinct_desc,
$1:municipality_abbrv,
$1:municipality_desc,
$1:ward_abbrv,
$1:ward_desc,
$1:cong_dist_abbrv,
$1:super_court_abbrv,
$1:judic_dist_abbrv,
$1:nc_senate_abbrv,
$1:nc_house_abbrv,
$1:county_commiss_abbrv,
$1:county_commiss_desc,
$1:township_abbrv,
$1:township_desc,
$1:school_dist_abbrv,
$1:school_dist_desc,
$1:fire_dist_abbrv,
$1:fire_dist_desc,
$1:water_dist_abbrv,
$1:water_dist_desc,
$1:sewer_dist_abbrv,
$1:sewer_dist_desc,
$1:sanit_dist_abbrv,
$1:sanit_dist_desc,
$1:rescue_dist_abbrv,
$1:rescue_dist_desc,
$1:munic_dist_abbrv,
$1:munic_dist_desc,
$1:dist_1_abbrv,
$1:dist_1_desc,
$1:vtd_abbrv,
$1:vtd_desc
FROM @FINANCE_PARQUET/voters.parquet (FILE_FORMAT => "PARQUETFORMAT")
);

Notiek ielāde…
Atcelt
Saglabāt