Path: blob/main/py-polars/tests/unit/operations/test_drop.py
6939 views
from typing import Any12import pytest34import polars as pl5import polars.selectors as cs6from polars.testing import assert_frame_equal789def test_drop() -> None:10df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"], "c": [1, 2, 3]})11df = df.drop("a")12assert df.shape == (3, 2)1314df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"], "c": [1, 2, 3]})15s = df.drop_in_place("a")16assert s.name == "a"171819def test_drop_explode_6641() -> None:20df = pl.DataFrame(21{22"chromosome": ["chr1"] * 2,23"identifier": [["chr1:10426:10429:ACC>A"], ["chr1:10426:10429:ACC>*"]],24"alternate": [["A"], ["T"]],25"quality": pl.Series([None, None], dtype=pl.Float32()),26}27).lazy()2829assert (30df.explode(["identifier", "alternate"])31.with_columns(pl.struct(["identifier", "alternate"]).alias("test"))32.drop(["identifier", "alternate"])33.select(pl.concat_list([pl.col("test"), pl.col("test")]))34.collect()35).to_dict(as_series=False) == {36"test": [37[38{"identifier": "chr1:10426:10429:ACC>A", "alternate": "A"},39{"identifier": "chr1:10426:10429:ACC>A", "alternate": "A"},40],41[42{"identifier": "chr1:10426:10429:ACC>*", "alternate": "T"},43{"identifier": "chr1:10426:10429:ACC>*", "alternate": "T"},44],45]46}474849@pytest.mark.parametrize(50"subset",51[52"foo",53["foo"],54{"foo"},55],56)57def test_drop_nulls(subset: Any) -> None:58df = pl.DataFrame(59{60"foo": [1, 2, 3],61"bar": [6, None, 8],62"ham": ["a", "b", "c"],63}64)65result = df.drop_nulls()66expected = pl.DataFrame(67{68"foo": [1, 3],69"bar": [6, 8],70"ham": ["a", "c"],71}72)73assert_frame_equal(result, expected)7475# below we only drop entries if they are null in the column 'foo'76result = df.drop_nulls(subset)77assert_frame_equal(result, df)787980def test_drop_nulls_lazy() -> None:81lf = pl.LazyFrame({"foo": [1, 2, 3], "bar": [6, None, 8], "ham": ["a", "b", "c"]})82expected = pl.LazyFrame({"foo": [1, 3], "bar": [6, 8], "ham": ["a", "c"]})8384result = lf.drop_nulls()85assert_frame_equal(result, expected)8687result = lf.drop_nulls(cs.contains("a"))88assert_frame_equal(result, expected)899091def test_drop_nulls_misc() -> None:92df = pl.DataFrame({"nrs": [None, 1, 2, 3, None, 4, 5, None]})93assert df.select(pl.col("nrs").drop_nulls()).to_dict(as_series=False) == {94"nrs": [1, 2, 3, 4, 5]95}969798def test_drop_nulls_empty_subset() -> None:99df = pl.DataFrame({"a": [1, None]})100assert_frame_equal(df.drop_nulls([]), df)101assert_frame_equal(df.drop_nulls(()), df)102103104def test_drop_columns() -> None:105out = pl.LazyFrame({"a": [1], "b": [2], "c": [3]}).drop(["a", "b"])106assert out.collect_schema().names() == ["c"]107108out = pl.LazyFrame({"a": [1], "b": [2], "c": [3]}).drop(~cs.starts_with("c"))109assert out.collect_schema().names() == ["c"]110111out = pl.LazyFrame({"a": [1], "b": [2], "c": [3]}).drop("a")112assert out.collect_schema().names() == ["b", "c"]113114out2 = pl.DataFrame({"a": [1], "b": [2], "c": [3]}).drop("a", "b")115assert out2.collect_schema().names() == ["c"]116117out2 = pl.DataFrame({"a": [1], "b": [2], "c": [3]}).drop({"a", "b", "c"})118assert out2.collect_schema().names() == []119120121@pytest.mark.parametrize("lazy", [True, False])122def test_drop_nans(lazy: bool) -> None:123DataFrame = pl.LazyFrame if lazy else pl.DataFrame124df = DataFrame(125{126"a": [1.0, float("nan"), 3.0, 4.0, None],127"b": [10000, 20000, 30000, 40000, None],128"c": [-90.5, 25.0, 0.0, float("nan"), None],129}130)131expected = DataFrame(132{133"a": [1.0, 3.0, None],134"b": [10000, 30000, None],135"c": [-90.5, 0.0, None],136}137)138assert_frame_equal(expected, df.drop_nans())139140expected = DataFrame(141{142"a": [1.0, float("nan"), 3.0, None],143"b": [10000, 20000, 30000, None],144"c": [-90.5, 25.0, 0.0, None],145}146)147assert_frame_equal(expected, df.drop_nans(subset=["c"]))148assert_frame_equal(expected, df.drop_nans(subset=cs.ends_with("c")))149150expected = DataFrame(151{152"a": [1.0, 3.0, None],153"b": [10000, 30000, None],154"c": [-90.5, 0.0, None],155}156)157assert_frame_equal(expected, df.drop_nans(subset=["a", "c"]))158assert_frame_equal(expected, df.drop_nans(subset=cs.float()))159160161def test_drop_nan_ignore_null_3525() -> None:162df = pl.DataFrame({"a": [1.0, float("nan"), 2.0, None, 3.0, 4.0]})163assert df.select(pl.col("a").drop_nans()).to_series().to_list() == [1641.0,1652.0,166None,1673.0,1684.0,169]170171172def test_drop_nans_empty_subset() -> None:173df = pl.DataFrame({"a": [1.0, float("NaN")]})174assert_frame_equal(df.drop_nans([]), df)175assert_frame_equal(df.drop_nans(()), df)176177178def test_drop_without_parameters() -> None:179df = pl.DataFrame({"a": [1, 2]})180assert_frame_equal(df.drop(), df)181assert_frame_equal(df.lazy().drop(*[]), df.lazy())182183184def test_drop_strict() -> None:185df = pl.DataFrame({"a": [1, 2]})186187df.drop("a")188189with pytest.raises(pl.exceptions.ColumnNotFoundError, match="b"):190df.drop("b")191192df.drop("a", strict=False)193df.drop("b", strict=False)194195196def test_drop_regex_14069() -> None:197df = pl.DataFrame({"a": 1, "a2": 2, "b": 3})198assert df.drop(cs.matches("^a.*$")).columns == ["b"]199200201def test_drop_invalid_selector_19023() -> None:202df = pl.DataFrame(203data={"x": [1, 2], "x_b": [3, 4], "y_b": [10, 20], "z": ["a", "b"]}204)205with pytest.raises(pl.exceptions.InvalidOperationError, match="is not a selector"):206df.drop(pl.selectors.ends_with("_b") + []) # type: ignore[arg-type]207208209