Path: blob/main/py-polars/tests/unit/io/cloud/test_cloud.py
8415 views
from __future__ import annotations12import contextlib3import subprocess4import sys5from functools import partial6from typing import TYPE_CHECKING78import pytest910import polars as pl11from polars.io.cloud._utils import _is_aws_cloud1213if TYPE_CHECKING:14from tests.conftest import PlMonkeyPatch151617@pytest.mark.slow18@pytest.mark.parametrize("format", ["parquet", "csv", "ndjson", "ipc"])19def test_scan_nonexistent_cloud_path_17444(format: str) -> None:20# https://github.com/pola-rs/polars/issues/174442122path_str = f"s3://my-nonexistent-bucket/data.{format}"23scan_function = getattr(pl, f"scan_{format}")24# Prevent automatic credential provideder instantiation, otherwise CI may fail with25# * pytest.PytestUnraisableExceptionWarning:26# * Exception ignored:27# * ResourceWarning: unclosed socket28scan_function = partial(scan_function, credential_provider=None)2930# Just calling the scan function should not raise any errors31if format == "ndjson":32# NDJSON does not have a `retries` parameter yet - so use the default33result = scan_function(path_str)34else:35result = scan_function(path_str, storage_options={"max_retries": 0})36assert isinstance(result, pl.LazyFrame)3738# Upon collection, it should fail39with pytest.raises(IOError):40result.collect()414243def test_scan_err_rebuild_store_19933() -> None:44call_count = 04546def f() -> None:47nonlocal call_count48call_count += 149raise AssertionError5051q = pl.scan_parquet(52"s3://.../...",53storage_options={"aws_region": "eu-west-1"},54credential_provider=f, # type: ignore[arg-type]55)5657with contextlib.suppress(Exception):58q.collect()5960# Note: We get called 2 times per attempt61if call_count != 4:62raise AssertionError(call_count)636465def test_is_aws_cloud() -> None:66assert _is_aws_cloud(67scheme="https",68first_scan_path="https://bucket.s3.eu-west-1.amazonaws.com/key",69)7071# Slash in front of amazonaws.com72assert not _is_aws_cloud(73scheme="https",74first_scan_path="https://bucket/.s3.eu-west-1.amazonaws.com/key",75)7677assert not _is_aws_cloud(78scheme="https",79first_scan_path="https://bucket?.s3.eu-west-1.amazonaws.com/key",80)8182# Legacy global endpoint83assert not _is_aws_cloud(84scheme="https", first_scan_path="https://bucket.s3.amazonaws.com/key"85)8687# Has query parameters (e.g. presigned URL).88assert not _is_aws_cloud(89scheme="https",90first_scan_path="https://bucket.s3.eu-west-1.amazonaws.com/key?",91)929394def test_storage_options_retry_config(95plmonkeypatch: PlMonkeyPatch,96capfd: pytest.CaptureFixture[str],97) -> None:98plmonkeypatch.setenv("POLARS_VERBOSE", "1")99100capture = subprocess.check_output(101[102sys.executable,103"-c",104"""\105import contextlib106import os107108import polars as pl109110os.environ["POLARS_VERBOSE"] = "1"111os.environ["POLARS_CLOUD_MAX_RETRIES"] = "1"112os.environ["POLARS_CLOUD_RETRY_TIMEOUT_MS"] = "1"113os.environ["POLARS_CLOUD_RETRY_INIT_BACKOFF_MS"] = "2"114os.environ["POLARS_CLOUD_RETRY_MAX_BACKOFF_MS"] = "10373"115os.environ["POLARS_CLOUD_RETRY_BASE_MULTIPLIER"] = "6.28"116117q = pl.scan_parquet(118"s3://.../...",119storage_options={"aws_endpoint_url": "https://localhost:333"},120credential_provider=None,121)122123with contextlib.suppress(OSError):124q.collect()125126""",127],128stderr=subprocess.STDOUT,129).decode()130131assert (132"""\133init_backoff: 2ms, \134max_backoff: 10.373s, \135base: 6.28 }, \136max_retries: 1, \137retry_timeout: 1ms"""138in capture139)140141q = pl.scan_parquet(142"s3://.../...",143storage_options={144"file_cache_ttl": 7,145"max_retries": 0,146"retry_timeout_ms": 23,147"retry_init_backoff_ms": 24,148"retry_max_backoff_ms": 9875,149"retry_base_multiplier": 3.14159,150"aws_endpoint_url": "https://localhost:333",151},152credential_provider=None,153)154155capfd.readouterr()156157with pytest.raises(OSError):158q.collect()159160capture = capfd.readouterr().err161162assert "file_cache_ttl: 7" in capture163164assert (165"""\166init_backoff: 24ms, \167max_backoff: 9.875s, \168base: 3.14159 }, \169max_retries: 0, \170retry_timeout: 23ms"""171in capture172)173174175