Path: blob/main/docs/source/src/python/user-guide/lazy/multiplexing.py
7890 views
# --8<-- [start:setup]1import polars as pl2import numpy as np3import tempfile4import base645import polars.testing678def show_plan(q: pl.LazyFrame, optimized: bool = True):9with tempfile.NamedTemporaryFile() as fp:10q.show_graph(show=False, output_path=fp.name, optimized=optimized)11with open(fp.name, "rb") as f:12png = base64.b64encode(f.read()).decode()13print(f'<img src="data:image/png;base64, {png}"/>')141516# --8<-- [end:setup]171819# --8<-- [start:dataframe]20np.random.seed(0)21a = np.arange(0, 10)22np.random.shuffle(a)23df = pl.DataFrame({"n": a})24print(df)25# --8<-- [end:dataframe]2627# --8<-- [start:eager]28# A group-by doesn't guarantee order29df1 = df.group_by("n").len()3031# Take the lower half and the upper half in a list32out = [df1.slice(offset=i * 5, length=5) for i in range(2)]3334# Assert df1 is equal to the sum of both halves35pl.testing.assert_frame_equal(df1, pl.concat(out))36# --8<-- [end:eager]3738"""39# --8<-- [start:lazy]40lf1 = df.lazy().group_by("n").len()4142out = [lf1.slice(offset=i * 5, length=5).collect() for i in range(2)]4344pl.testing.assert_frame_equal(lf1.collect(), pl.concat(out))45# --8<-- [end:lazy]46"""4748# --8<-- [start:plan_0]49q1 = df.lazy().group_by("n").len()50show_plan(q1, optimized=False)51# --8<-- [end:plan_0]5253# --8<-- [start:plan_1]54q1 = df.lazy().group_by("n").len()55q2 = q1.slice(offset=0, length=5)56show_plan(q2, optimized=False)57# --8<-- [end:plan_1]5859# --8<-- [start:plan_2]60q1 = df.lazy().group_by("n").len()61q2 = q1.slice(offset=5, length=5)62show_plan(q2, optimized=False)63# --8<-- [end:plan_2]646566# --8<-- [start:collect_all]67lf1 = df.lazy().group_by("n").len()6869out = [lf1.slice(offset=i * 5, length=5) for i in range(2)]70results = pl.collect_all([lf1] + out)7172pl.testing.assert_frame_equal(results[0], pl.concat(results[1:]))73# --8<-- [end:collect_all]7475# --8<-- [start:explain_all]76lf1 = df.lazy().group_by("n").len()77out = [lf1.slice(offset=i * 5, length=5) for i in range(2)]7879print(pl.explain_all([lf1] + out))80# --8<-- [end:explain_all]818283