Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/lazy/multiplexing.py
7890 views
1
# --8<-- [start:setup]
2
import polars as pl
3
import numpy as np
4
import tempfile
5
import base64
6
import polars.testing
7
8
9
def show_plan(q: pl.LazyFrame, optimized: bool = True):
10
with tempfile.NamedTemporaryFile() as fp:
11
q.show_graph(show=False, output_path=fp.name, optimized=optimized)
12
with open(fp.name, "rb") as f:
13
png = base64.b64encode(f.read()).decode()
14
print(f'<img src="data:image/png;base64, {png}"/>')
15
16
17
# --8<-- [end:setup]
18
19
20
# --8<-- [start:dataframe]
21
np.random.seed(0)
22
a = np.arange(0, 10)
23
np.random.shuffle(a)
24
df = pl.DataFrame({"n": a})
25
print(df)
26
# --8<-- [end:dataframe]
27
28
# --8<-- [start:eager]
29
# A group-by doesn't guarantee order
30
df1 = df.group_by("n").len()
31
32
# Take the lower half and the upper half in a list
33
out = [df1.slice(offset=i * 5, length=5) for i in range(2)]
34
35
# Assert df1 is equal to the sum of both halves
36
pl.testing.assert_frame_equal(df1, pl.concat(out))
37
# --8<-- [end:eager]
38
39
"""
40
# --8<-- [start:lazy]
41
lf1 = df.lazy().group_by("n").len()
42
43
out = [lf1.slice(offset=i * 5, length=5).collect() for i in range(2)]
44
45
pl.testing.assert_frame_equal(lf1.collect(), pl.concat(out))
46
# --8<-- [end:lazy]
47
"""
48
49
# --8<-- [start:plan_0]
50
q1 = df.lazy().group_by("n").len()
51
show_plan(q1, optimized=False)
52
# --8<-- [end:plan_0]
53
54
# --8<-- [start:plan_1]
55
q1 = df.lazy().group_by("n").len()
56
q2 = q1.slice(offset=0, length=5)
57
show_plan(q2, optimized=False)
58
# --8<-- [end:plan_1]
59
60
# --8<-- [start:plan_2]
61
q1 = df.lazy().group_by("n").len()
62
q2 = q1.slice(offset=5, length=5)
63
show_plan(q2, optimized=False)
64
# --8<-- [end:plan_2]
65
66
67
# --8<-- [start:collect_all]
68
lf1 = df.lazy().group_by("n").len()
69
70
out = [lf1.slice(offset=i * 5, length=5) for i in range(2)]
71
results = pl.collect_all([lf1] + out)
72
73
pl.testing.assert_frame_equal(results[0], pl.concat(results[1:]))
74
# --8<-- [end:collect_all]
75
76
# --8<-- [start:explain_all]
77
lf1 = df.lazy().group_by("n").len()
78
out = [lf1.slice(offset=i * 5, length=5) for i in range(2)]
79
80
print(pl.explain_all([lf1] + out))
81
# --8<-- [end:explain_all]
82
83