Path: blob/main/docs/source/src/python/user-guide/io/cloud-storage.py
8332 views
"""1# --8<-- [start:read_parquet]2import polars as pl34source = "s3://bucket/*.parquet"56df = pl.read_parquet(source)7# --8<-- [end:read_parquet]89# --8<-- [start:scan_parquet_query]10import polars as pl1112source = "s3://bucket/*.parquet"1314df = pl.scan_parquet(source).filter(pl.col("id") < 100).select("id","value").collect()15# --8<-- [end:scan_parquet_query]1617# --8<-- [start:storage_options_retry_configuration]18import polars as pl1920pl.scan_parquet(21"s3://bucket/*.parquet",22storage_options={23"max_retries": 3,24"retry_timeout_ms": 9873,25"retry_init_backoff_ms": 9874,26"retry_max_backoff_ms": 9875,27"retry_base_multiplier": 3.14159,28},29)30# --8<-- [end:storage_options_retry_configuration]3132# --8<-- [start:scan_parquet_storage_options_aws]33import polars as pl3435source = "s3://bucket/*.parquet"3637storage_options = {38"aws_access_key_id": "<secret>",39"aws_secret_access_key": "<secret>",40"aws_region": "us-east-1",41}42df = pl.scan_parquet(source, storage_options=storage_options).collect()43# --8<-- [end:scan_parquet_storage_options_aws]4445# --8<-- [start:credential_provider_class]46lf = pl.scan_parquet(47"s3://.../...",48credential_provider=pl.CredentialProviderAWS(49profile_name="...",50assume_role={51"RoleArn": f"...",52"RoleSessionName": "...",53}54),55)5657df = lf.collect()58# --8<-- [end:credential_provider_class]5960# --8<-- [start:credential_provider_class_global_default]61pl.Config.set_default_credential_provider(62pl.CredentialProviderAWS(63profile_name="...",64assume_role={65"RoleArn": f"...",66"RoleSessionName": "...",67},68)69)70# --8<-- [end:credential_provider_class_global_default]7172# --8<-- [start:credential_provider_custom_func]73def get_credentials() -> pl.CredentialProviderFunctionReturn:74expiry = None7576return {77"aws_access_key_id": "...",78"aws_secret_access_key": "...",79"aws_session_token": "...",80}, expiry818283lf = pl.scan_parquet(84"s3://.../...",85credential_provider=get_credentials,86)8788df = lf.collect()89# --8<-- [end:credential_provider_custom_func]9091# --8<-- [start:credential_provider_custom_func_azure]92def credential_provider():93credential = DefaultAzureCredential(exclude_managed_identity_credential=True)94token = credential.get_token("https://storage.azure.com/.default")9596return {"bearer_token": token.token}, token.expires_on979899pl.scan_parquet(100"abfss://...@.../...",101credential_provider=credential_provider,102)103104# Note that for the above case, this shortcut is also available:105106pl.scan_parquet(107"abfss://...@.../...",108credential_provider=pl.CredentialProviderAzure(109credential=DefaultAzureCredential(exclude_managed_identity_credential=True)110),111)112113# --8<-- [end:credential_provider_custom_func_azure]114115# --8<-- [start:scan_pyarrow_dataset]116import polars as pl117import pyarrow.dataset as ds118119dset = ds.dataset("s3://my-partitioned-folder/", format="parquet")120(121pl.scan_pyarrow_dataset(dset)122.filter(pl.col("foo") == "a")123.select(["foo", "bar"])124.collect()125)126# --8<-- [end:scan_pyarrow_dataset]127128# --8<-- [start:write_parquet]129import polars as pl130131df = pl.DataFrame(132{133"foo": ["a", "b", "c", "d", "d"],134"bar": [1, 2, 3, 4, 5],135}136)137138destination = "s3://bucket/my_file.parquet"139140df.write_parquet(destination)141142# --8<-- [end:write_parquet]143144# --8<-- [start:write_file_object]145import polars as pl146import s3fs147import gzip148149df = pl.DataFrame(150{151"foo": ["a", "b", "c", "d", "d"],152"bar": [1, 2, 3, 4, 5],153}154)155156destination = "s3://bucket/my_file.csv.gz"157158fs = s3fs.S3FileSystem()159160with fs.open(destination, "wb") as cloud_f:161with gzip.open(cloud_f, "w") as f:162df.write_csv(f)163# --8<-- [end:write_file_object]164"""165166167