Path: blob/main/py-polars/tests/unit/io/test_parquet_field_overwrites.py
6939 views
import io12import pyarrow.parquet as pq3import pytest45import polars as pl6from polars.io.parquet import ParquetFieldOverwrites789def test_required_flat() -> None:10f = io.BytesIO()11pl.Series("a", [1, 2, 3]).to_frame().lazy().sink_parquet(12f,13field_overwrites=pl.io.parquet.ParquetFieldOverwrites(name="a", required=False),14)1516f.seek(0)17assert pq.read_schema(f).field(0).nullable1819f.seek(0)20pl.Series("a", [1, 2, 3]).to_frame().lazy().sink_parquet(21f,22field_overwrites=pl.io.parquet.ParquetFieldOverwrites(name="a", required=True),23)2425f.truncate()26f.seek(0)27assert not pq.read_schema(f).field(0).nullable2829f = io.BytesIO()30with pytest.raises(pl.exceptions.InvalidOperationError, match="missing value"):31pl.Series("a", [1, 2, 3, None]).to_frame().lazy().sink_parquet(32f,33field_overwrites=pl.io.parquet.ParquetFieldOverwrites(34name="a", required=True35),36)373839@pytest.mark.parametrize("dtype", [pl.List(pl.Int64()), pl.Array(pl.Int64(), 1)])40def test_required_list(dtype: pl.DataType) -> None:41f = io.BytesIO()42pl.Series("a", [[1], [2], [3], [None]], dtype).to_frame().lazy().sink_parquet(43f,44field_overwrites=pl.io.parquet.ParquetFieldOverwrites(name="a", required=True),45)46f.seek(0)47schema = pq.read_schema(f)48assert not schema.field(0).nullable49assert schema.field(0).type.value_field.nullable5051with pytest.raises(pl.exceptions.InvalidOperationError, match="missing value"):52pl.Series("a", [[1], [2], [3], None], dtype).to_frame().lazy().sink_parquet(53io.BytesIO(),54field_overwrites=pl.io.parquet.ParquetFieldOverwrites(55name="a", required=True56),57)5859with pytest.raises(pl.exceptions.InvalidOperationError, match="missing value"):60pl.Series("a", [[1], [2], [3], [None]], dtype).to_frame().lazy().sink_parquet(61io.BytesIO(),62field_overwrites=pl.io.parquet.ParquetFieldOverwrites(63name="a",64required=True,65children=pl.io.parquet.ParquetFieldOverwrites(required=True),66),67)6869f = io.BytesIO()70pl.Series("a", [[1], [2], [3], [4]], dtype).to_frame().lazy().sink_parquet(71f,72field_overwrites=pl.io.parquet.ParquetFieldOverwrites(73name="a",74required=True,75children=pl.io.parquet.ParquetFieldOverwrites(required=True),76),77)78f.seek(0)79schema = pq.read_schema(f)80assert not schema.field(0).nullable81assert not schema.field(0).type.value_field.nullable828384def test_required_struct() -> None:85f = io.BytesIO()86pl.Series(87"a", [{"x": 1}, {"x": 2}, {"x": 3}, {"x": 4}]88).to_frame().lazy().sink_parquet(89f,90field_overwrites=pl.io.parquet.ParquetFieldOverwrites(91name="a",92required=True,93),94)95f.seek(0)96schema = pq.read_schema(f)97assert not schema.field(0).nullable98assert schema.field(0).type.fields[0].nullable99100f = io.BytesIO()101pl.Series(102"a", [{"x": 1}, {"x": None}, {"x": 2}, {"x": 3}]103).to_frame().lazy().sink_parquet(104f,105field_overwrites=pl.io.parquet.ParquetFieldOverwrites(106name="a",107required=True,108),109)110111f.seek(0)112schema = pq.read_schema(f)113assert not schema.field(0).nullable114assert schema.field(0).type.fields[0].nullable115116with pytest.raises(pl.exceptions.InvalidOperationError, match="missing value"):117pl.Series(118"a", [{"x": 1}, {"x": None}, {"x": 2}, {"x": 3}]119).to_frame().lazy().sink_parquet(120io.BytesIO(),121field_overwrites=ParquetFieldOverwrites(122name="a",123required=True,124children={"x": ParquetFieldOverwrites(required=True)},125),126)127128f = io.BytesIO()129pl.Series(130"a", [{"x": 1}, {"x": 2}, {"x": 2}, {"x": 3}]131).to_frame().lazy().sink_parquet(132f,133field_overwrites=ParquetFieldOverwrites(134name="a",135required=True,136children={"x": ParquetFieldOverwrites(required=True)},137),138)139f.seek(0)140schema = pq.read_schema(f)141assert not schema.field(0).nullable142assert not schema.field(0).type.fields[0].nullable143144145