Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/polars-cloud/distributed.py
6940 views
1
"""
2
# --8<-- [start:example]
3
import polars as pl
4
import polars_cloud as pc
5
from datetime import date
6
7
query = (
8
pl.scan_parquet("s3://dataset/")
9
.filter(pl.col("l_shipdate") <= date(1998, 9, 2))
10
.group_by("l_returnflag", "l_linestatus")
11
.agg(
12
avg_price=pl.mean("l_extendedprice"),
13
avg_disc=pl.mean("l_discount"),
14
count_order=pl.len(),
15
)
16
)
17
18
result = (
19
query.remote(pc.ComputeContext(cpus=16, memory=64, cluster_size=32))
20
.distributed()
21
.sink_parquet("s3://output/result.parquet")
22
)
23
# --8<-- [end:example]
24
"""
25
26