Path: blob/main/docs/source/src/python/user-guide/io/cloud-storage.py
7890 views
"""1# --8<-- [start:read_parquet]2import polars as pl34source = "s3://bucket/*.parquet"56df = pl.read_parquet(source)7# --8<-- [end:read_parquet]89# --8<-- [start:scan_parquet_query]10import polars as pl1112source = "s3://bucket/*.parquet"1314df = pl.scan_parquet(source).filter(pl.col("id") < 100).select("id","value").collect()15# --8<-- [end:scan_parquet_query]161718# --8<-- [start:scan_parquet_storage_options_aws]19import polars as pl2021source = "s3://bucket/*.parquet"2223storage_options = {24"aws_access_key_id": "<secret>",25"aws_secret_access_key": "<secret>",26"aws_region": "us-east-1",27}28df = pl.scan_parquet(source, storage_options=storage_options).collect()29# --8<-- [end:scan_parquet_storage_options_aws]3031# --8<-- [start:credential_provider_class]32lf = pl.scan_parquet(33"s3://.../...",34credential_provider=pl.CredentialProviderAWS(35profile_name="...",36assume_role={37"RoleArn": f"...",38"RoleSessionName": "...",39}40),41)4243df = lf.collect()44# --8<-- [end:credential_provider_class]4546# --8<-- [start:credential_provider_class_global_default]47pl.Config.set_default_credential_provider(48pl.CredentialProviderAWS(49profile_name="...",50assume_role={51"RoleArn": f"...",52"RoleSessionName": "...",53},54)55)56# --8<-- [end:credential_provider_class_global_default]5758# --8<-- [start:credential_provider_custom_func]59def get_credentials() -> pl.CredentialProviderFunctionReturn:60expiry = None6162return {63"aws_access_key_id": "...",64"aws_secret_access_key": "...",65"aws_session_token": "...",66}, expiry676869lf = pl.scan_parquet(70"s3://.../...",71credential_provider=get_credentials,72)7374df = lf.collect()75# --8<-- [end:credential_provider_custom_func]7677# --8<-- [start:credential_provider_custom_func_azure]78def credential_provider():79credential = DefaultAzureCredential(exclude_managed_identity_credential=True)80token = credential.get_token("https://storage.azure.com/.default")8182return {"bearer_token": token.token}, token.expires_on838485pl.scan_parquet(86"abfss://...@.../...",87credential_provider=credential_provider,88)8990# Note that for the above case, this shortcut is also available:9192pl.scan_parquet(93"abfss://...@.../...",94credential_provider=pl.CredentialProviderAzure(95credential=DefaultAzureCredential(exclude_managed_identity_credential=True)96),97)9899# --8<-- [end:credential_provider_custom_func_azure]100101# --8<-- [start:scan_pyarrow_dataset]102import polars as pl103import pyarrow.dataset as ds104105dset = ds.dataset("s3://my-partitioned-folder/", format="parquet")106(107pl.scan_pyarrow_dataset(dset)108.filter(pl.col("foo") == "a")109.select(["foo", "bar"])110.collect()111)112# --8<-- [end:scan_pyarrow_dataset]113114# --8<-- [start:write_parquet]115import polars as pl116117df = pl.DataFrame(118{119"foo": ["a", "b", "c", "d", "d"],120"bar": [1, 2, 3, 4, 5],121}122)123124destination = "s3://bucket/my_file.parquet"125126df.write_parquet(destination)127128# --8<-- [end:write_parquet]129130# --8<-- [start:write_file_object]131import polars as pl132import s3fs133import gzip134135df = pl.DataFrame(136{137"foo": ["a", "b", "c", "d", "d"],138"bar": [1, 2, 3, 4, 5],139}140)141142destination = "s3://bucket/my_file.csv.gz"143144fs = s3fs.S3FileSystem()145146with fs.open(destination, "wb") as cloud_f:147with gzip.open(cloud_f, "w") as f:148df.write_csv(f)149# --8<-- [end:write_file_object]150"""151152153