Path: blob/main/docs/source/src/python/user-guide/sql/intro.py
7890 views
# --8<-- [start:setup]1import polars as pl23# --8<-- [end:setup]45# --8<-- [start:context]6ctx = pl.SQLContext()7# --8<-- [end:context]89# --8<-- [start:register_context]10df = pl.DataFrame({"a": [1, 2, 3]})11lf = pl.LazyFrame({"b": [4, 5, 6]})1213# Register all dataframes in the global namespace: registers both "df" and "lf"14ctx = pl.SQLContext(register_globals=True)1516# Register an explicit mapping of identifier name to frame17ctx = pl.SQLContext(frames={"table_one": df, "table_two": lf})1819# Register frames using kwargs; dataframe df as "df" and lazyframe lf as "lf"20ctx = pl.SQLContext(df=df, lf=lf)21# --8<-- [end:register_context]2223# --8<-- [start:register_pandas]24import pandas as pd2526df_pandas = pd.DataFrame({"c": [7, 8, 9]})27ctx = pl.SQLContext(df_pandas=pl.from_pandas(df_pandas))28# --8<-- [end:register_pandas]2930# --8<-- [start:execute]31# For local files use scan_csv instead32pokemon = pl.read_csv(33"https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv"34)35with pl.SQLContext(register_globals=True, eager=True) as ctx:36df_small = ctx.execute("SELECT * from pokemon LIMIT 5")37print(df_small)38# --8<-- [end:execute]3940# --8<-- [start:prepare_multiple_sources]41with open("docs/assets/data/products_categories.json", "w") as temp_file:42json_data = """{"product_id": 1, "category": "Category 1"}43{"product_id": 2, "category": "Category 1"}44{"product_id": 3, "category": "Category 2"}45{"product_id": 4, "category": "Category 2"}46{"product_id": 5, "category": "Category 3"}"""4748temp_file.write(json_data)4950with open("docs/assets/data/products_masterdata.csv", "w") as temp_file:51csv_data = """product_id,product_name521,Product A532,Product B543,Product C554,Product D565,Product E"""5758temp_file.write(csv_data)5960sales_data = pd.DataFrame(61{62"product_id": [1, 2, 3, 4, 5],63"sales": [100, 200, 150, 250, 300],64}65)66# --8<-- [end:prepare_multiple_sources]6768# --8<-- [start:execute_multiple_sources]69# Input data:70# products_masterdata.csv with schema {'product_id': Int64, 'product_name': String}71# products_categories.json with schema {'product_id': Int64, 'category': String}72# sales_data is a Pandas DataFrame with schema {'product_id': Int64, 'sales': Int64}7374with pl.SQLContext(75products_masterdata=pl.scan_csv("docs/assets/data/products_masterdata.csv"),76products_categories=pl.scan_ndjson("docs/assets/data/products_categories.json"),77sales_data=pl.from_pandas(sales_data),78eager=True,79) as ctx:80query = """81SELECT82product_id,83product_name,84category,85sales86FROM87products_masterdata88LEFT JOIN products_categories USING (product_id)89LEFT JOIN sales_data USING (product_id)90"""91print(ctx.execute(query))92# --8<-- [end:execute_multiple_sources]9394# --8<-- [start:clean_multiple_sources]95import os9697os.remove("docs/assets/data/products_categories.json")98os.remove("docs/assets/data/products_masterdata.csv")99# --8<-- [end:clean_multiple_sources]100101102