Path: blob/main/py-polars/tests/unit/interop/numpy/test_from_numpy_df.py
6939 views
from __future__ import annotations12from typing import TYPE_CHECKING34import numpy as np5import pytest6from numpy.testing import assert_array_equal78import polars as pl9from polars.testing import assert_frame_equal1011if TYPE_CHECKING:12import numpy.typing as npt1314from polars._typing import PolarsDataType, PolarsTemporalType151617def test_from_numpy() -> None:18data = np.array([[1, 2, 3], [4, 5, 6]])19df = pl.from_numpy(20data,21schema=["a", "b"],22orient="col",23schema_overrides={"a": pl.UInt32, "b": pl.UInt32},24)25assert df.shape == (3, 2)26assert df.rows() == [(1, 4), (2, 5), (3, 6)]27assert df.schema == {"a": pl.UInt32, "b": pl.UInt32}28data2 = np.array(["foo", "bar"], dtype=object)29df2 = pl.from_numpy(data2)30assert df2.shape == (2, 1)31assert df2.rows() == [("foo",), ("bar",)]32assert df2.schema == {"column_0": pl.String}33with pytest.raises(34ValueError,35match="cannot create DataFrame from array with more than two dimensions",36):37_ = pl.from_numpy(np.array([[[1]]]))38with pytest.raises(39ValueError, match="cannot create DataFrame from zero-dimensional array"40):41_ = pl.from_numpy(np.array(1))424344def test_from_numpy_array_value() -> None:45df = pl.DataFrame({"A": [[2, 3]]})46assert df.rows() == [([2, 3],)]47assert df.schema == {"A": pl.List(pl.Int64)}484950def test_construct_from_ndarray_value() -> None:51array_cell = np.array([2, 3])52df = pl.DataFrame(np.array([[array_cell, 4]], dtype=object))53assert df.dtypes == [pl.Object, pl.Object]54to_numpy = df.to_numpy()55assert to_numpy.shape == (1, 2)56assert_array_equal(to_numpy[0][0], array_cell)57assert to_numpy[0][1] == 4585960def test_from_numpy_nparray_value() -> None:61array_cell = np.array([2, 3])62df = pl.from_numpy(np.array([[array_cell, 4]], dtype=object))63assert df.dtypes == [pl.Object, pl.Object]64to_numpy = df.to_numpy()65assert to_numpy.shape == (1, 2)66assert_array_equal(to_numpy[0][0], array_cell)67assert to_numpy[0][1] == 4686970def test_from_numpy_structured() -> None:71test_data = [72("Google Pixel 7", 521.90, True),73("Apple iPhone 14 Pro", 999.00, True),74("Samsung Galaxy S23 Ultra", 1199.99, False),75("OnePlus 11", 699.00, True),76]77# create a numpy structured array...78arr_structured = np.array(79test_data,80dtype=np.dtype(81[82("product", "U32"),83("price_usd", "float64"),84("in_stock", "bool"),85]86),87)88# ...and also establish as a record array view89arr_records = arr_structured.view(np.recarray)9091# confirm that we can cleanly initialise a DataFrame from both,92# respecting the native dtypes and any schema overrides, etc.93for arr in (arr_structured, arr_records):94df = pl.DataFrame(data=arr).sort(by="price_usd", descending=True)9596assert df.schema == {97"product": pl.String,98"price_usd": pl.Float64,99"in_stock": pl.Boolean,100}101assert df.rows() == sorted(test_data, key=lambda row: -row[1])102103for df in (104pl.DataFrame(105data=arr, schema=["phone", ("price_usd", pl.Float32), "available"]106),107pl.DataFrame(108data=arr,109schema=["phone", "price_usd", "available"],110schema_overrides={"price_usd": pl.Float32},111),112):113assert df.schema == {114"phone": pl.String,115"price_usd": pl.Float32,116"available": pl.Boolean,117}118119120def test_from_numpy2() -> None:121# note: numpy timeunit support is limited to those supported by polars.122# as a result, datetime64[s] raises123x = np.asarray(range(100_000, 200_000, 10_000), dtype="datetime64[s]")124with pytest.raises(ValueError, match="Please cast to the closest supported unit"):125pl.Series(x)126127128@pytest.mark.parametrize(129("numpy_time_unit", "expected_values", "expected_dtype"),130[131("ns", ["1970-01-02T01:12:34.123456789"], pl.Datetime("ns")),132("us", ["1970-01-02T01:12:34.123456"], pl.Datetime("us")),133("ms", ["1970-01-02T01:12:34.123"], pl.Datetime("ms")),134("D", ["1970-01-02"], pl.Date),135],136)137def test_from_numpy_supported_units(138numpy_time_unit: str,139expected_values: list[str],140expected_dtype: PolarsTemporalType,141) -> None:142values = np.array(143["1970-01-02T01:12:34.123456789123456789"],144dtype=f"datetime64[{numpy_time_unit}]",145)146result = pl.from_numpy(values)147expected = (148pl.Series("column_0", expected_values).str.strptime(expected_dtype).to_frame()149)150assert_frame_equal(result, expected)151152153@pytest.mark.parametrize(154("np_dtype", "dtype"),155[156(np.float64, pl.Float64),157(np.int32, pl.Int32),158],159)160def test_from_numpy_empty(np_dtype: npt.DTypeLike, dtype: PolarsDataType) -> None:161data = np.array([], dtype=np_dtype)162result = pl.from_numpy(data, schema=["a"])163expected = pl.Series("a", [], dtype=dtype).to_frame()164assert_frame_equal(result, expected)165166167