Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/sql/intro.py
7890 views
1
# --8<-- [start:setup]
2
import polars as pl
3
4
# --8<-- [end:setup]
5
6
# --8<-- [start:context]
7
ctx = pl.SQLContext()
8
# --8<-- [end:context]
9
10
# --8<-- [start:register_context]
11
df = pl.DataFrame({"a": [1, 2, 3]})
12
lf = pl.LazyFrame({"b": [4, 5, 6]})
13
14
# Register all dataframes in the global namespace: registers both "df" and "lf"
15
ctx = pl.SQLContext(register_globals=True)
16
17
# Register an explicit mapping of identifier name to frame
18
ctx = pl.SQLContext(frames={"table_one": df, "table_two": lf})
19
20
# Register frames using kwargs; dataframe df as "df" and lazyframe lf as "lf"
21
ctx = pl.SQLContext(df=df, lf=lf)
22
# --8<-- [end:register_context]
23
24
# --8<-- [start:register_pandas]
25
import pandas as pd
26
27
df_pandas = pd.DataFrame({"c": [7, 8, 9]})
28
ctx = pl.SQLContext(df_pandas=pl.from_pandas(df_pandas))
29
# --8<-- [end:register_pandas]
30
31
# --8<-- [start:execute]
32
# For local files use scan_csv instead
33
pokemon = pl.read_csv(
34
"https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv"
35
)
36
with pl.SQLContext(register_globals=True, eager=True) as ctx:
37
df_small = ctx.execute("SELECT * from pokemon LIMIT 5")
38
print(df_small)
39
# --8<-- [end:execute]
40
41
# --8<-- [start:prepare_multiple_sources]
42
with open("docs/assets/data/products_categories.json", "w") as temp_file:
43
json_data = """{"product_id": 1, "category": "Category 1"}
44
{"product_id": 2, "category": "Category 1"}
45
{"product_id": 3, "category": "Category 2"}
46
{"product_id": 4, "category": "Category 2"}
47
{"product_id": 5, "category": "Category 3"}"""
48
49
temp_file.write(json_data)
50
51
with open("docs/assets/data/products_masterdata.csv", "w") as temp_file:
52
csv_data = """product_id,product_name
53
1,Product A
54
2,Product B
55
3,Product C
56
4,Product D
57
5,Product E"""
58
59
temp_file.write(csv_data)
60
61
sales_data = pd.DataFrame(
62
{
63
"product_id": [1, 2, 3, 4, 5],
64
"sales": [100, 200, 150, 250, 300],
65
}
66
)
67
# --8<-- [end:prepare_multiple_sources]
68
69
# --8<-- [start:execute_multiple_sources]
70
# Input data:
71
# products_masterdata.csv with schema {'product_id': Int64, 'product_name': String}
72
# products_categories.json with schema {'product_id': Int64, 'category': String}
73
# sales_data is a Pandas DataFrame with schema {'product_id': Int64, 'sales': Int64}
74
75
with pl.SQLContext(
76
products_masterdata=pl.scan_csv("docs/assets/data/products_masterdata.csv"),
77
products_categories=pl.scan_ndjson("docs/assets/data/products_categories.json"),
78
sales_data=pl.from_pandas(sales_data),
79
eager=True,
80
) as ctx:
81
query = """
82
SELECT
83
product_id,
84
product_name,
85
category,
86
sales
87
FROM
88
products_masterdata
89
LEFT JOIN products_categories USING (product_id)
90
LEFT JOIN sales_data USING (product_id)
91
"""
92
print(ctx.execute(query))
93
# --8<-- [end:execute_multiple_sources]
94
95
# --8<-- [start:clean_multiple_sources]
96
import os
97
98
os.remove("docs/assets/data/products_categories.json")
99
os.remove("docs/assets/data/products_masterdata.csv")
100
# --8<-- [end:clean_multiple_sources]
101
102