Path: blob/main/docs/source/src/python/polars-cloud/distributed.py
8338 views
"""1# --8<-- [start:setup]2import polars as pl3import polars_cloud as pc45lineitem_sf100 = pl.scan_parquet("s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/lineitem/*.parquet",6storage_options={"request_payer": "true"})7customer_sf100 = pl.scan_parquet("s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/customer/*.parquet",8storage_options={"request_payer": "true"})9orders_sf100 = pl.scan_parquet("s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/orders/*.parquet",10storage_options={"request_payer": "true"})1112# --8<-- [end:setup]1314# --8<-- [start:query]15def pdsh_q3(customer, lineitem, orders):1617return (18customer.filter(pl.col("c_mktsegment") == "BUILDING")19.join(orders, left_on="c_custkey", right_on="o_custkey")20.join(lineitem, left_on="o_orderkey", right_on="l_orderkey")21.filter(pl.col("o_orderdate") < pl.date(1995, 3, 15))22.filter(pl.col("l_shipdate") > pl.date(1995, 3, 15))23.with_columns(24(pl.col("l_extendedprice") * (1 - pl.col("l_discount"))).alias("revenue")25)26.group_by("o_orderkey", "o_orderdate", "o_shippriority")27.agg(pl.sum("revenue"))28.select(29pl.col("o_orderkey").alias("l_orderkey"),30"revenue",31"o_orderdate",32"o_shippriority",33)34.sort(by=["revenue", "o_orderdate"], descending=[True, False])35)3637# --8<-- [end:query]3839# --8<-- [start:context-run]40ctx = pc.ComputeContext(workspace="your-workspace", cpus=4, memory=4, cluster_size=5)4142pdsh_q3(customer_sf100, lineitem_sf100, orders_sf100)43.remote(ctx)44.distributed()45.show()4647# --8<-- [end:context-run]48"""495051