Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
CloudPak-Outcomes
GitHub Repository: CloudPak-Outcomes/Outcomes-Projects
Path: blob/main/Netezza/Delay_ExternalTables.ipynb
1928 views
Kernel: Python 3

Netezza python driver

# Install and import Netezza python driver !pip install nzpy import nzpy import datetime

Establish a Netezza Cloud Connection

# Netezza Cloud Connection Information nz_host = "" nz_port = 5480 nz_database = "" nz_user = "" nz_password = ""
# Connect to Netezza cloud instance nzcon = nzpy.connect(user=nz_user, password=nz_password, host=nz_host, database=nz_database, port=nz_port) if bool(nzcon): print("Host : " + nz_host) print("Port :", nz_port) print("User : " + nz_user) print("Password : ********") print("Database : " + nz_database) print() print("Connection successful.") print() print("Notebook is ready.")

Define Lakehouse Historical tables for 2003-2008

# Create a singularity table for all historical flight delay data on AWS S3 ddl="""create EXTERNAL table airline_delay_cause_history on cosdb.admin.airline_cos_datasource using ( DATAOBJECT ('/airline/airline_delay_cause_history_2003_2018.parquet') FORMAT 'PARQUET')""" with nzcon.cursor() as cursor: cursor.execute("set ENABLE_EXTERNAL_DATASOURCE = 1") try: cursor.execute(ddl) print("Netezza Parquet table AIRLINE_DELAY_CAUSE_HISTORY created successfully") except Exception as e: print(str(e))

Note: External Datasource object was created by the DBA and grant use to your user ID. See below for DDL used.

set ENABLE_EXTERNAL_DATASOURCE = 1 ; create EXTERNAL DATASOURCE cosdb.admin.airline_cos_datasource on AWSS3 using ( ACCESSKEYID '*****************' SECRETACCESSKEY '********************' BUCKET 'concerto-bnr-test' REGION 'us-east-1' ) ;

Define Lakehouse Historical tables for each year (2003...2008)

# Create the external tables for the historical data stored in Parquet files with nzcon.cursor() as cursor: years = ["2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018"] cursor.execute("set ENABLE_EXTERNAL_DATASOURCE = 1") for year in years: ddl="create EXTERNAL table ext_airline_delay_cause_" + year + " on cosdb.admin.airline_cos_datasource using ( DATAOBJECT ('/airline/airline_delay_cause_" + year + ".parquet') FORMAT 'PARQUET');" try: cursor.execute(ddl) print("Parquet table EXT_AIRLINE_DELAY_CAUSE_" + year + " created successfully") except Exception as e: print(str(e))

Persist Parquet Data into Native Netezza Storage (2008)

# CTAS: Create Netezza Native table using Parquet table for 2018 ddl="""create table airline_delay_cause_2018_local as select * from ext_airline_delay_cause_2018 distribute on random organize on (year, month, carrier, airport) ; """ with nzcon.cursor() as cursor: try: cursor.execute(ddl) print("Table AIRLINE_DELAY_CAUSE_2018_LOCAL created successfully.") except Exception as e: print(str(e))