Path: blob/main/crates/polars-python/src/conversion/mod.rs
7889 views
pub(crate) mod any_value;1mod categorical;2pub(crate) mod chunked_array;3mod datetime;45use std::convert::Infallible;6use std::fmt::{Display, Formatter};7use std::fs::File;8use std::hash::{Hash, Hasher};910pub use categorical::PyCategories;11#[cfg(feature = "object")]12use polars::chunked_array::object::PolarsObjectSafe;13use polars::frame::row::Row;14#[cfg(feature = "avro")]15use polars::io::avro::AvroCompression;16#[cfg(feature = "cloud")]17use polars::io::cloud::CloudOptions;18use polars::prelude::ColumnMapping;19use polars::prelude::default_values::{20DefaultFieldValues, IcebergIdentityTransformedPartitionFields,21};22use polars::prelude::deletion::DeletionFilesList;23use polars::series::ops::NullBehavior;24use polars_compute::decimal::dec128_verify_prec_scale;25use polars_core::datatypes::extension::get_extension_type_or_generic;26use polars_core::schema::iceberg::IcebergSchema;27use polars_core::utils::arrow::array::Array;28use polars_core::utils::arrow::types::NativeType;29use polars_core::utils::materialize_dyn_int;30use polars_lazy::prelude::*;31#[cfg(feature = "parquet")]32use polars_parquet::write::StatisticsOptions;33use polars_plan::dsl::ScanSources;34use polars_utils::compression::{BrotliLevel, GzipLevel, ZstdLevel};35use polars_utils::mmap::MemSlice;36use polars_utils::pl_str::PlSmallStr;37use polars_utils::total_ord::{TotalEq, TotalHash};38use pyo3::basic::CompareOp;39use pyo3::exceptions::{PyTypeError, PyValueError};40use pyo3::intern;41use pyo3::prelude::*;42use pyo3::pybacked::PyBackedStr;43use pyo3::sync::PyOnceLock;44use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};4546use crate::error::PyPolarsErr;47use crate::expr::PyExpr;48use crate::file::{PythonScanSourceInput, get_python_scan_source_input};49use crate::interop::arrow::to_rust::field_to_rust_arrow;50#[cfg(feature = "object")]51use crate::object::OBJECT_NAME;52use crate::prelude::*;53use crate::py_modules::{pl_series, polars};54use crate::series::PySeries;55use crate::utils::to_py_err;56use crate::{PyDataFrame, PyLazyFrame};5758/// # Safety59/// Should only be implemented for transparent types60pub(crate) unsafe trait Transparent {61type Target;62}6364unsafe impl Transparent for PySeries {65type Target = Series;66}6768unsafe impl<T> Transparent for Wrap<T> {69type Target = T;70}7172unsafe impl<T: Transparent> Transparent for Option<T> {73type Target = Option<T::Target>;74}7576pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {77assert_eq!(size_of::<T>(), size_of::<T::Target>());78assert_eq!(align_of::<T>(), align_of::<T::Target>());79let len = input.len();80let cap = input.capacity();81let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);82let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();83let ptr: *mut T::Target = vec_ptr as *mut T::Target;84unsafe { Vec::from_raw_parts(ptr, len, cap) }85}8687pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {88reinterpret_vec(buf)89}9091#[derive(PartialEq, Eq, Hash)]92#[repr(transparent)]93pub struct Wrap<T>(pub T);9495impl<T> Clone for Wrap<T>96where97T: Clone,98{99fn clone(&self) -> Self {100Wrap(self.0.clone())101}102}103impl<T> From<T> for Wrap<T> {104fn from(t: T) -> Self {105Wrap(t)106}107}108109// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>110pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {111let pydf = obj.getattr(intern!(obj.py(), "_df"))?;112Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())113}114115pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {116let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;117Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())118}119120pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {121let s = obj.getattr(intern!(obj.py(), "_s"))?;122Ok(s.extract::<PySeries>()?.series.into_inner())123}124125pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {126let series = pl_series(py).bind(py);127let constructor = series.getattr(intern!(py, "_from_pyseries"))?;128constructor.call1((s,))129}130131impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {132fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {133Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))134}135}136137#[cfg(feature = "csv")]138impl<'py> FromPyObject<'py> for Wrap<NullValues> {139fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {140if let Ok(s) = ob.extract::<PyBackedStr>() {141Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))142} else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {143Ok(Wrap(NullValues::AllColumns(144s.into_iter().map(|x| (&*x).into()).collect(),145)))146} else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {147Ok(Wrap(NullValues::Named(148s.into_iter()149.map(|(a, b)| ((&*a).into(), (&*b).into()))150.collect(),151)))152} else {153Err(154PyPolarsErr::Other("could not extract value from null_values argument".into())155.into(),156)157}158}159}160161fn struct_dict<'a, 'py>(162py: Python<'py>,163vals: impl Iterator<Item = AnyValue<'a>>,164flds: &[Field],165) -> PyResult<Bound<'py, PyDict>> {166let dict = PyDict::new(py);167flds.iter().zip(vals).try_for_each(|(fld, val)| {168dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)169})?;170Ok(dict)171}172173impl<'py> IntoPyObject<'py> for Wrap<Series> {174type Target = PyAny;175type Output = Bound<'py, Self::Target>;176type Error = PyErr;177178fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {179to_series(py, PySeries::new(self.0))180}181}182183impl<'py> IntoPyObject<'py> for &Wrap<DataType> {184type Target = PyAny;185type Output = Bound<'py, Self::Target>;186type Error = PyErr;187188fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {189let pl = polars(py).bind(py);190191match &self.0 {192DataType::Int8 => {193let class = pl.getattr(intern!(py, "Int8"))?;194class.call0()195},196DataType::Int16 => {197let class = pl.getattr(intern!(py, "Int16"))?;198class.call0()199},200DataType::Int32 => {201let class = pl.getattr(intern!(py, "Int32"))?;202class.call0()203},204DataType::Int64 => {205let class = pl.getattr(intern!(py, "Int64"))?;206class.call0()207},208DataType::UInt8 => {209let class = pl.getattr(intern!(py, "UInt8"))?;210class.call0()211},212DataType::UInt16 => {213let class = pl.getattr(intern!(py, "UInt16"))?;214class.call0()215},216DataType::UInt32 => {217let class = pl.getattr(intern!(py, "UInt32"))?;218class.call0()219},220DataType::UInt64 => {221let class = pl.getattr(intern!(py, "UInt64"))?;222class.call0()223},224DataType::UInt128 => {225let class = pl.getattr(intern!(py, "UInt128"))?;226class.call0()227},228DataType::Int128 => {229let class = pl.getattr(intern!(py, "Int128"))?;230class.call0()231},232DataType::Float16 => {233let class = pl.getattr(intern!(py, "Float16"))?;234class.call0()235},236DataType::Float32 => {237let class = pl.getattr(intern!(py, "Float32"))?;238class.call0()239},240DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {241let class = pl.getattr(intern!(py, "Float64"))?;242class.call0()243},244DataType::Decimal(precision, scale) => {245let class = pl.getattr(intern!(py, "Decimal"))?;246let args = (*precision, *scale);247class.call1(args)248},249DataType::Boolean => {250let class = pl.getattr(intern!(py, "Boolean"))?;251class.call0()252},253DataType::String | DataType::Unknown(UnknownKind::Str) => {254let class = pl.getattr(intern!(py, "String"))?;255class.call0()256},257DataType::Binary => {258let class = pl.getattr(intern!(py, "Binary"))?;259class.call0()260},261DataType::Array(inner, size) => {262let class = pl.getattr(intern!(py, "Array"))?;263let inner = Wrap(*inner.clone());264let args = (&inner, *size);265class.call1(args)266},267DataType::List(inner) => {268let class = pl.getattr(intern!(py, "List"))?;269let inner = Wrap(*inner.clone());270class.call1((&inner,))271},272DataType::Date => {273let class = pl.getattr(intern!(py, "Date"))?;274class.call0()275},276DataType::Datetime(tu, tz) => {277let datetime_class = pl.getattr(intern!(py, "Datetime"))?;278datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))279},280DataType::Duration(tu) => {281let duration_class = pl.getattr(intern!(py, "Duration"))?;282duration_class.call1((tu.to_ascii(),))283},284#[cfg(feature = "object")]285DataType::Object(_) => {286let class = pl.getattr(intern!(py, "Object"))?;287class.call0()288},289DataType::Categorical(cats, _) => {290let categories_class = pl.getattr(intern!(py, "Categories"))?;291let categorical_class = pl.getattr(intern!(py, "Categorical"))?;292let categories = categories_class293.call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;294let kwargs = [("categories", categories)];295categorical_class.call((), Some(&kwargs.into_py_dict(py)?))296},297DataType::Enum(_, mapping) => {298let categories = unsafe {299StringChunked::from_chunks(300PlSmallStr::from_static("category"),301vec![mapping.to_arrow(true)],302)303};304let class = pl.getattr(intern!(py, "Enum"))?;305let series = to_series(py, categories.into_series().into())?;306class.call1((series,))307},308DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),309DataType::Struct(fields) => {310let field_class = pl.getattr(intern!(py, "Field"))?;311let iter = fields.iter().map(|fld| {312let name = fld.name().as_str();313let dtype = Wrap(fld.dtype().clone());314field_class.call1((name, &dtype)).unwrap()315});316let fields = PyList::new(py, iter)?;317let struct_class = pl.getattr(intern!(py, "Struct"))?;318struct_class.call1((fields,))319},320DataType::Null => {321let class = pl.getattr(intern!(py, "Null"))?;322class.call0()323},324DataType::Extension(typ, storage) => {325let py_storage = Wrap((**storage).clone()).into_pyobject(py)?;326let py_typ = pl327.getattr(intern!(py, "get_extension_type"))?328.call1((typ.name(),))?;329let class = if py_typ.is_none()330|| py_typ.str().map(|s| s == "storage").ok() == Some(true)331{332pl.getattr(intern!(py, "Extension"))?333} else {334py_typ335};336let from_params = class.getattr(intern!(py, "ext_from_params"))?;337from_params.call1((typ.name(), py_storage, typ.serialize_metadata()))338},339DataType::Unknown(UnknownKind::Int(v)) => {340Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)341},342DataType::Unknown(_) => {343let class = pl.getattr(intern!(py, "Unknown"))?;344class.call0()345},346DataType::BinaryOffset => {347unimplemented!()348},349}350}351}352353impl<'py> FromPyObject<'py> for Wrap<Field> {354fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {355let py = ob.py();356let name = ob357.getattr(intern!(py, "name"))?358.str()?359.extract::<PyBackedStr>()?;360let dtype = ob361.getattr(intern!(py, "dtype"))?362.extract::<Wrap<DataType>>()?;363Ok(Wrap(Field::new((&*name).into(), dtype.0)))364}365}366367impl<'py> FromPyObject<'py> for Wrap<DataType> {368fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {369let py = ob.py();370let type_name = ob.get_type().qualname()?.to_string();371372let dtype = match &*type_name {373"DataTypeClass" => {374// just the class, not an object375let name = ob376.getattr(intern!(py, "__name__"))?377.str()?378.extract::<PyBackedStr>()?;379match &*name {380"Int8" => DataType::Int8,381"Int16" => DataType::Int16,382"Int32" => DataType::Int32,383"Int64" => DataType::Int64,384"Int128" => DataType::Int128,385"UInt8" => DataType::UInt8,386"UInt16" => DataType::UInt16,387"UInt32" => DataType::UInt32,388"UInt64" => DataType::UInt64,389"UInt128" => DataType::UInt128,390"Float16" => DataType::Float16,391"Float32" => DataType::Float32,392"Float64" => DataType::Float64,393"Boolean" => DataType::Boolean,394"String" => DataType::String,395"Binary" => DataType::Binary,396"Categorical" => DataType::from_categories(Categories::global()),397"Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),398"Date" => DataType::Date,399"Time" => DataType::Time,400"Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),401"Duration" => DataType::Duration(TimeUnit::Microseconds),402"List" => DataType::List(Box::new(DataType::Null)),403"Array" => DataType::Array(Box::new(DataType::Null), 0),404"Struct" => DataType::Struct(vec![]),405"Null" => DataType::Null,406#[cfg(feature = "object")]407"Object" => DataType::Object(OBJECT_NAME),408"Unknown" => DataType::Unknown(Default::default()),409"Decimal" => {410return Err(PyTypeError::new_err(411"Decimal without precision/scale set is not a valid Polars datatype",412));413},414dt => {415return Err(PyTypeError::new_err(format!(416"'{dt}' is not a Polars data type",417)));418},419}420},421"Int8" => DataType::Int8,422"Int16" => DataType::Int16,423"Int32" => DataType::Int32,424"Int64" => DataType::Int64,425"Int128" => DataType::Int128,426"UInt8" => DataType::UInt8,427"UInt16" => DataType::UInt16,428"UInt32" => DataType::UInt32,429"UInt64" => DataType::UInt64,430"UInt128" => DataType::UInt128,431"Float16" => DataType::Float16,432"Float32" => DataType::Float32,433"Float64" => DataType::Float64,434"Boolean" => DataType::Boolean,435"String" => DataType::String,436"Binary" => DataType::Binary,437"Categorical" => {438let categories = ob.getattr(intern!(py, "categories")).unwrap();439let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();440let py_categories = py_categories.extract::<PyCategories>()?;441DataType::from_categories(py_categories.categories().clone())442},443"Enum" => {444let categories = ob.getattr(intern!(py, "categories")).unwrap();445let s = get_series(&categories.as_borrowed())?;446let ca = s.str().map_err(PyPolarsErr::from)?;447let categories = ca.downcast_iter().next().unwrap().clone();448assert!(!categories.has_nulls());449DataType::from_frozen_categories(450FrozenCategories::new(categories.values_iter()).unwrap(),451)452},453"Date" => DataType::Date,454"Time" => DataType::Time,455"Datetime" => {456let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();457let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;458let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();459let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;460DataType::Datetime(461time_unit,462TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,463)464},465"Duration" => {466let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();467let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;468DataType::Duration(time_unit)469},470"Decimal" => {471let precision = ob.getattr(intern!(py, "precision"))?.extract()?;472let scale = ob.getattr(intern!(py, "scale"))?.extract()?;473dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?;474DataType::Decimal(precision, scale)475},476"List" => {477let inner = ob.getattr(intern!(py, "inner")).unwrap();478let inner = inner.extract::<Wrap<DataType>>()?;479DataType::List(Box::new(inner.0))480},481"Array" => {482let inner = ob.getattr(intern!(py, "inner")).unwrap();483let size = ob.getattr(intern!(py, "size")).unwrap();484let inner = inner.extract::<Wrap<DataType>>()?;485let size = size.extract::<usize>()?;486DataType::Array(Box::new(inner.0), size)487},488"Struct" => {489let fields = ob.getattr(intern!(py, "fields"))?;490let fields = fields491.extract::<Vec<Wrap<Field>>>()?492.into_iter()493.map(|f| f.0)494.collect::<Vec<Field>>();495DataType::Struct(fields)496},497"Null" => DataType::Null,498#[cfg(feature = "object")]499"Object" => DataType::Object(OBJECT_NAME),500"Unknown" => DataType::Unknown(Default::default()),501dt => {502let base_ext = polars(py)503.getattr(py, intern!(py, "BaseExtension"))504.unwrap();505if ob.is_instance(base_ext.bind(py))? {506let ext_name_f = ob.getattr(intern!(py, "ext_name"))?;507let ext_metadata_f = ob.getattr(intern!(py, "ext_metadata"))?;508let ext_storage_f = ob.getattr(intern!(py, "ext_storage"))?;509let name: String = ext_name_f.call0()?.extract()?;510let metadata: Option<String> = ext_metadata_f.call0()?.extract()?;511let storage: Wrap<DataType> = ext_storage_f.call0()?.extract()?;512let ext_typ =513get_extension_type_or_generic(&name, &storage.0, metadata.as_deref());514return Ok(Wrap(DataType::Extension(ext_typ, Box::new(storage.0))));515}516517return Err(PyTypeError::new_err(format!(518"'{dt}' is not a Polars data type",519)));520},521};522Ok(Wrap(dtype))523}524}525526impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {527type Target = PyString;528type Output = Bound<'py, Self::Target>;529type Error = Infallible;530531fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {532self.0.to_ascii().into_pyobject(py)533}534}535536#[cfg(feature = "parquet")]537impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {538fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {539let mut statistics = StatisticsOptions::empty();540541let dict = ob.downcast::<PyDict>()?;542for (key, val) in dict {543let key = key.extract::<PyBackedStr>()?;544let val = val.extract::<bool>()?;545546match key.as_ref() {547"min" => statistics.min_value = val,548"max" => statistics.max_value = val,549"distinct_count" => statistics.distinct_count = val,550"null_count" => statistics.null_count = val,551_ => {552return Err(PyTypeError::new_err(format!(553"'{key}' is not a valid statistic option",554)));555},556}557}558559Ok(Wrap(statistics))560}561}562563impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {564fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {565let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;566let vals = reinterpret_vec(vals);567Ok(Wrap(Row(vals)))568}569}570571impl<'py> FromPyObject<'py> for Wrap<Schema> {572fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {573let dict = ob.downcast::<PyDict>()?;574575Ok(Wrap(576dict.iter()577.map(|(key, val)| {578let key = key.extract::<PyBackedStr>()?;579let val = val.extract::<Wrap<DataType>>()?;580581Ok(Field::new((&*key).into(), val.0))582})583.collect::<PyResult<Schema>>()?,584))585}586}587588impl<'py> FromPyObject<'py> for Wrap<ArrowSchema> {589fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {590let py = ob.py();591592let pyarrow_schema_cls = py593.import(intern!(py, "pyarrow"))?594.getattr(intern!(py, "Schema"))?;595596if ob.is_none() {597return Err(PyValueError::new_err("arrow_schema() returned None").into());598}599600let schema_cls = ob.getattr(intern!(py, "__class__"))?;601602if !schema_cls.is(&pyarrow_schema_cls) {603return Err(PyTypeError::new_err(format!(604"expected pyarrow.Schema, got: {schema_cls}"605)));606}607608let mut iter = ob.try_iter()?.map(|x| x.and_then(field_to_rust_arrow));609610let mut last_err = None;611612let schema =613ArrowSchema::from_iter_check_duplicates(std::iter::from_fn(|| match iter.next() {614Some(Ok(v)) => Some(v),615Some(Err(e)) => {616last_err = Some(e);617None618},619None => None,620}))621.map_err(to_py_err)?;622623if let Some(last_err) = last_err {624return Err(last_err.into());625}626627Ok(Wrap(schema))628}629}630631impl<'py> FromPyObject<'py> for Wrap<ScanSources> {632fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {633let list = ob.downcast::<PyList>()?.to_owned();634635if list.is_empty() {636return Ok(Wrap(ScanSources::default()));637}638639enum MutableSources {640Paths(Vec<PlPath>),641Files(Vec<File>),642Buffers(Vec<MemSlice>),643}644645let num_items = list.len();646let mut iter = list647.into_iter()648.map(|val| get_python_scan_source_input(val.unbind(), false));649650let Some(first) = iter.next() else {651return Ok(Wrap(ScanSources::default()));652};653654let mut sources = match first? {655PythonScanSourceInput::Path(path) => {656let mut sources = Vec::with_capacity(num_items);657sources.push(path);658MutableSources::Paths(sources)659},660PythonScanSourceInput::File(file) => {661let mut sources = Vec::with_capacity(num_items);662sources.push(file.into());663MutableSources::Files(sources)664},665PythonScanSourceInput::Buffer(buffer) => {666let mut sources = Vec::with_capacity(num_items);667sources.push(buffer);668MutableSources::Buffers(sources)669},670};671672for source in iter {673match (&mut sources, source?) {674(MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),675(MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),676(MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),677_ => {678return Err(PyTypeError::new_err(679"Cannot combine in-memory bytes, paths and files for scan sources",680));681},682}683}684685Ok(Wrap(match sources {686MutableSources::Paths(i) => ScanSources::Paths(i.into()),687MutableSources::Files(i) => ScanSources::Files(i.into()),688MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),689}))690}691}692693impl<'py> IntoPyObject<'py> for Wrap<Schema> {694type Target = PyDict;695type Output = Bound<'py, Self::Target>;696type Error = PyErr;697698fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {699let dict = PyDict::new(py);700self.0701.iter()702.try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;703Ok(dict)704}705}706707#[derive(Debug)]708#[repr(transparent)]709pub struct ObjectValue {710pub inner: Py<PyAny>,711}712713impl Clone for ObjectValue {714fn clone(&self) -> Self {715Python::attach(|py| Self {716inner: self.inner.clone_ref(py),717})718}719}720721impl Hash for ObjectValue {722fn hash<H: Hasher>(&self, state: &mut H) {723let h = Python::attach(|py| self.inner.bind(py).hash().expect("should be hashable"));724state.write_isize(h)725}726}727728impl Eq for ObjectValue {}729730impl PartialEq for ObjectValue {731fn eq(&self, other: &Self) -> bool {732Python::attach(|py| {733match self734.inner735.bind(py)736.rich_compare(other.inner.bind(py), CompareOp::Eq)737{738Ok(result) => result.is_truthy().unwrap(),739Err(_) => false,740}741})742}743}744745impl TotalEq for ObjectValue {746fn tot_eq(&self, other: &Self) -> bool {747self == other748}749}750751impl TotalHash for ObjectValue {752fn tot_hash<H>(&self, state: &mut H)753where754H: Hasher,755{756self.hash(state);757}758}759760impl Display for ObjectValue {761fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {762write!(f, "{}", self.inner)763}764}765766#[cfg(feature = "object")]767impl PolarsObject for ObjectValue {768fn type_name() -> &'static str {769"object"770}771}772773impl From<Py<PyAny>> for ObjectValue {774fn from(p: Py<PyAny>) -> Self {775Self { inner: p }776}777}778779impl<'py> FromPyObject<'py> for ObjectValue {780fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {781Ok(ObjectValue {782inner: ob.to_owned().unbind(),783})784}785}786787/// # Safety788///789/// The caller is responsible for checking that val is Object otherwise UB790#[cfg(feature = "object")]791impl From<&dyn PolarsObjectSafe> for &ObjectValue {792fn from(val: &dyn PolarsObjectSafe) -> Self {793unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }794}795}796797impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {798type Target = PyAny;799type Output = Borrowed<'a, 'py, Self::Target>;800type Error = std::convert::Infallible;801802fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {803Ok(self.inner.bind_borrowed(py))804}805}806807impl Default for ObjectValue {808fn default() -> Self {809Python::attach(|py| ObjectValue { inner: py.None() })810}811}812813impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {814fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {815let seq = obj.downcast::<PySequence>()?;816let mut v = Vec::with_capacity(seq.len().unwrap_or(0));817for item in seq.try_iter()? {818v.push(item?.extract::<T>()?);819}820Ok(Wrap(v))821}822}823824#[cfg(feature = "asof_join")]825impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {826fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {827let parsed = match &*(ob.extract::<PyBackedStr>()?) {828"backward" => AsofStrategy::Backward,829"forward" => AsofStrategy::Forward,830"nearest" => AsofStrategy::Nearest,831v => {832return Err(PyValueError::new_err(format!(833"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",834)));835},836};837Ok(Wrap(parsed))838}839}840841impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {842fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {843let parsed = match &*(ob.extract::<PyBackedStr>()?) {844"linear" => InterpolationMethod::Linear,845"nearest" => InterpolationMethod::Nearest,846v => {847return Err(PyValueError::new_err(format!(848"interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",849)));850},851};852Ok(Wrap(parsed))853}854}855856#[cfg(feature = "avro")]857impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {858fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {859let parsed = match &*ob.extract::<PyBackedStr>()? {860"uncompressed" => None,861"snappy" => Some(AvroCompression::Snappy),862"deflate" => Some(AvroCompression::Deflate),863v => {864return Err(PyValueError::new_err(format!(865"avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",866)));867},868};869Ok(Wrap(parsed))870}871}872873impl<'py> FromPyObject<'py> for Wrap<StartBy> {874fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {875let parsed = match &*ob.extract::<PyBackedStr>()? {876"window" => StartBy::WindowBound,877"datapoint" => StartBy::DataPoint,878"monday" => StartBy::Monday,879"tuesday" => StartBy::Tuesday,880"wednesday" => StartBy::Wednesday,881"thursday" => StartBy::Thursday,882"friday" => StartBy::Friday,883"saturday" => StartBy::Saturday,884"sunday" => StartBy::Sunday,885v => {886return Err(PyValueError::new_err(format!(887"`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",888)));889},890};891Ok(Wrap(parsed))892}893}894895impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {896fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {897let parsed = match &*ob.extract::<PyBackedStr>()? {898"left" => ClosedWindow::Left,899"right" => ClosedWindow::Right,900"both" => ClosedWindow::Both,901"none" => ClosedWindow::None,902v => {903return Err(PyValueError::new_err(format!(904"`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",905)));906},907};908Ok(Wrap(parsed))909}910}911912impl<'py> FromPyObject<'py> for Wrap<RoundMode> {913fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {914let parsed = match &*ob.extract::<PyBackedStr>()? {915"half_to_even" => RoundMode::HalfToEven,916"half_away_from_zero" => RoundMode::HalfAwayFromZero,917v => {918return Err(PyValueError::new_err(format!(919"`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",920)));921},922};923Ok(Wrap(parsed))924}925}926927#[cfg(feature = "csv")]928impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {929fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {930let parsed = match &*ob.extract::<PyBackedStr>()? {931"utf8" => CsvEncoding::Utf8,932"utf8-lossy" => CsvEncoding::LossyUtf8,933v => {934return Err(PyValueError::new_err(format!(935"csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",936)));937},938};939Ok(Wrap(parsed))940}941}942943#[cfg(feature = "ipc")]944impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {945fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {946let parsed = match &*ob.extract::<PyBackedStr>()? {947"uncompressed" => None,948"lz4" => Some(IpcCompression::LZ4),949"zstd" => Some(IpcCompression::ZSTD(Default::default())),950v => {951return Err(PyValueError::new_err(format!(952"ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",953)));954},955};956Ok(Wrap(parsed))957}958}959960impl<'py> FromPyObject<'py> for Wrap<JoinType> {961fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {962let parsed = match &*ob.extract::<PyBackedStr>()? {963"inner" => JoinType::Inner,964"left" => JoinType::Left,965"right" => JoinType::Right,966"full" => JoinType::Full,967"semi" => JoinType::Semi,968"anti" => JoinType::Anti,969#[cfg(feature = "cross_join")]970"cross" => JoinType::Cross,971v => {972return Err(PyValueError::new_err(format!(973"`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",974)));975},976};977Ok(Wrap(parsed))978}979}980981impl<'py> FromPyObject<'py> for Wrap<Label> {982fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {983let parsed = match &*ob.extract::<PyBackedStr>()? {984"left" => Label::Left,985"right" => Label::Right,986"datapoint" => Label::DataPoint,987v => {988return Err(PyValueError::new_err(format!(989"`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",990)));991},992};993Ok(Wrap(parsed))994}995}996997impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {998fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {999let parsed = match &*ob.extract::<PyBackedStr>()? {1000"first_non_null" => ListToStructWidthStrategy::FirstNonNull,1001"max_width" => ListToStructWidthStrategy::MaxWidth,1002v => {1003return Err(PyValueError::new_err(format!(1004"`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",1005)));1006},1007};1008Ok(Wrap(parsed))1009}1010}10111012impl<'py> FromPyObject<'py> for Wrap<NonExistent> {1013fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1014let parsed = match &*ob.extract::<PyBackedStr>()? {1015"null" => NonExistent::Null,1016"raise" => NonExistent::Raise,1017v => {1018return Err(PyValueError::new_err(format!(1019"`non_existent` must be one of {{'null', 'raise'}}, got {v}",1020)));1021},1022};1023Ok(Wrap(parsed))1024}1025}10261027impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {1028fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1029let parsed = match &*ob.extract::<PyBackedStr>()? {1030"drop" => NullBehavior::Drop,1031"ignore" => NullBehavior::Ignore,1032v => {1033return Err(PyValueError::new_err(format!(1034"`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",1035)));1036},1037};1038Ok(Wrap(parsed))1039}1040}10411042impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {1043fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1044let parsed = match &*ob.extract::<PyBackedStr>()? {1045"ignore" => NullStrategy::Ignore,1046"propagate" => NullStrategy::Propagate,1047v => {1048return Err(PyValueError::new_err(format!(1049"`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",1050)));1051},1052};1053Ok(Wrap(parsed))1054}1055}10561057#[cfg(feature = "parquet")]1058impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {1059fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1060let parsed = match &*ob.extract::<PyBackedStr>()? {1061"auto" => ParallelStrategy::Auto,1062"columns" => ParallelStrategy::Columns,1063"row_groups" => ParallelStrategy::RowGroups,1064"prefiltered" => ParallelStrategy::Prefiltered,1065"none" => ParallelStrategy::None,1066v => {1067return Err(PyValueError::new_err(format!(1068"`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",1069)));1070},1071};1072Ok(Wrap(parsed))1073}1074}10751076impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {1077fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1078let parsed = match &*ob.extract::<PyBackedStr>()? {1079"fortran" => IndexOrder::Fortran,1080"c" => IndexOrder::C,1081v => {1082return Err(PyValueError::new_err(format!(1083"`order` must be one of {{'fortran', 'c'}}, got {v}",1084)));1085},1086};1087Ok(Wrap(parsed))1088}1089}10901091impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {1092fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1093let parsed = match &*ob.extract::<PyBackedStr>()? {1094"lower" => QuantileMethod::Lower,1095"higher" => QuantileMethod::Higher,1096"nearest" => QuantileMethod::Nearest,1097"linear" => QuantileMethod::Linear,1098"midpoint" => QuantileMethod::Midpoint,1099"equiprobable" => QuantileMethod::Equiprobable,1100v => {1101return Err(PyValueError::new_err(format!(1102"`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",1103)));1104},1105};1106Ok(Wrap(parsed))1107}1108}11091110impl<'py> FromPyObject<'py> for Wrap<RankMethod> {1111fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1112let parsed = match &*ob.extract::<PyBackedStr>()? {1113"min" => RankMethod::Min,1114"max" => RankMethod::Max,1115"average" => RankMethod::Average,1116"dense" => RankMethod::Dense,1117"ordinal" => RankMethod::Ordinal,1118"random" => RankMethod::Random,1119v => {1120return Err(PyValueError::new_err(format!(1121"rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",1122)));1123},1124};1125Ok(Wrap(parsed))1126}1127}11281129impl<'py> FromPyObject<'py> for Wrap<RollingRankMethod> {1130fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1131let parsed = match &*ob.extract::<PyBackedStr>()? {1132"min" => RollingRankMethod::Min,1133"max" => RollingRankMethod::Max,1134"average" => RollingRankMethod::Average,1135"dense" => RollingRankMethod::Dense,1136"random" => RollingRankMethod::Random,1137v => {1138return Err(PyValueError::new_err(format!(1139"rank `method` must be one of {{'min', 'max', 'average', 'dense', 'random'}}, got {v}",1140)));1141},1142};1143Ok(Wrap(parsed))1144}1145}11461147impl<'py> FromPyObject<'py> for Wrap<Roll> {1148fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1149let parsed = match &*ob.extract::<PyBackedStr>()? {1150"raise" => Roll::Raise,1151"forward" => Roll::Forward,1152"backward" => Roll::Backward,1153v => {1154return Err(PyValueError::new_err(format!(1155"`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",1156)));1157},1158};1159Ok(Wrap(parsed))1160}1161}11621163impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {1164fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1165let parsed = match &*ob.extract::<PyBackedStr>()? {1166"ns" => TimeUnit::Nanoseconds,1167"us" => TimeUnit::Microseconds,1168"ms" => TimeUnit::Milliseconds,1169v => {1170return Err(PyValueError::new_err(format!(1171"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",1172)));1173},1174};1175Ok(Wrap(parsed))1176}1177}11781179impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {1180fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1181let parsed = match &*ob.extract::<PyBackedStr>()? {1182"first" => UniqueKeepStrategy::First,1183"last" => UniqueKeepStrategy::Last,1184"none" => UniqueKeepStrategy::None,1185"any" => UniqueKeepStrategy::Any,1186v => {1187return Err(PyValueError::new_err(format!(1188"`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",1189)));1190},1191};1192Ok(Wrap(parsed))1193}1194}11951196#[cfg(feature = "search_sorted")]1197impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {1198fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1199let parsed = match &*ob.extract::<PyBackedStr>()? {1200"any" => SearchSortedSide::Any,1201"left" => SearchSortedSide::Left,1202"right" => SearchSortedSide::Right,1203v => {1204return Err(PyValueError::new_err(format!(1205"sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",1206)));1207},1208};1209Ok(Wrap(parsed))1210}1211}12121213impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {1214fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1215let parsed = match &*ob.extract::<PyBackedStr>()? {1216"both" => ClosedInterval::Both,1217"left" => ClosedInterval::Left,1218"right" => ClosedInterval::Right,1219"none" => ClosedInterval::None,1220v => {1221return Err(PyValueError::new_err(format!(1222"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",1223)));1224},1225};1226Ok(Wrap(parsed))1227}1228}12291230impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {1231fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1232let parsed = match &*ob.extract::<PyBackedStr>()? {1233"group_to_rows" => WindowMapping::GroupsToRows,1234"join" => WindowMapping::Join,1235"explode" => WindowMapping::Explode,1236v => {1237return Err(PyValueError::new_err(format!(1238"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",1239)));1240},1241};1242Ok(Wrap(parsed))1243}1244}12451246impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {1247fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1248let parsed = match &*ob.extract::<PyBackedStr>()? {1249"1:1" => JoinValidation::OneToOne,1250"1:m" => JoinValidation::OneToMany,1251"m:m" => JoinValidation::ManyToMany,1252"m:1" => JoinValidation::ManyToOne,1253v => {1254return Err(PyValueError::new_err(format!(1255"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",1256)));1257},1258};1259Ok(Wrap(parsed))1260}1261}12621263impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {1264fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1265let parsed = match &*ob.extract::<PyBackedStr>()? {1266"none" => MaintainOrderJoin::None,1267"left" => MaintainOrderJoin::Left,1268"right" => MaintainOrderJoin::Right,1269"left_right" => MaintainOrderJoin::LeftRight,1270"right_left" => MaintainOrderJoin::RightLeft,1271v => {1272return Err(PyValueError::new_err(format!(1273"`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",1274)));1275},1276};1277Ok(Wrap(parsed))1278}1279}12801281#[cfg(feature = "csv")]1282impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {1283fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1284let parsed = match &*ob.extract::<PyBackedStr>()? {1285"always" => QuoteStyle::Always,1286"necessary" => QuoteStyle::Necessary,1287"non_numeric" => QuoteStyle::NonNumeric,1288"never" => QuoteStyle::Never,1289v => {1290return Err(PyValueError::new_err(format!(1291"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",1292)));1293},1294};1295Ok(Wrap(parsed))1296}1297}12981299#[cfg(feature = "cloud")]1300pub(crate) fn parse_cloud_options(1301cloud_scheme: Option<CloudScheme>,1302keys_and_values: impl IntoIterator<Item = (String, String)>,1303) -> PyResult<CloudOptions> {1304let iter: &mut dyn Iterator<Item = _> = &mut keys_and_values.into_iter();1305let out = CloudOptions::from_untyped_config(cloud_scheme, iter).map_err(PyPolarsErr::from)?;1306Ok(out)1307}13081309#[cfg(feature = "list_sets")]1310impl<'py> FromPyObject<'py> for Wrap<SetOperation> {1311fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1312let parsed = match &*ob.extract::<PyBackedStr>()? {1313"union" => SetOperation::Union,1314"difference" => SetOperation::Difference,1315"intersection" => SetOperation::Intersection,1316"symmetric_difference" => SetOperation::SymmetricDifference,1317v => {1318return Err(PyValueError::new_err(format!(1319"set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",1320)));1321},1322};1323Ok(Wrap(parsed))1324}1325}13261327// Conversion from ScanCastOptions class from the Python side.1328impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {1329fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1330if ob.is_none() {1331// Initialize the default ScanCastOptions from Python.1332static DEFAULT: PyOnceLock<Wrap<CastColumnsPolicy>> = PyOnceLock::new();13331334let out = DEFAULT.get_or_try_init(ob.py(), || {1335let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")1336.unwrap()1337.getattr("ScanCastOptions")1338.unwrap()1339.call_method0("_default")1340.unwrap();13411342let out = Self::extract_bound(&ob)?;13431344// The default policy should match ERROR_ON_MISMATCH (but this can change).1345debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);13461347PyResult::Ok(out)1348})?;13491350return Ok(out.clone());1351}13521353let py = ob.py();13541355let integer_upcast = match &*ob1356.getattr(intern!(py, "integer_cast"))?1357.extract::<PyBackedStr>()?1358{1359"upcast" => true,1360"forbid" => false,1361v => {1362return Err(PyValueError::new_err(format!(1363"unknown option for integer_cast: {v}"1364)));1365},1366};13671368let mut float_upcast = false;1369let mut float_downcast = false;13701371let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;13721373parse_multiple_options("float_cast", float_cast_object, |v| {1374match v {1375"forbid" => {},1376"upcast" => float_upcast = true,1377"downcast" => float_downcast = true,1378v => {1379return Err(PyValueError::new_err(format!(1380"unknown option for float_cast: {v}"1381)));1382},1383}13841385Ok(())1386})?;13871388let mut datetime_nanoseconds_downcast = false;1389let mut datetime_convert_timezone = false;13901391let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;13921393parse_multiple_options("datetime_cast", datetime_cast_object, |v| {1394match v {1395"forbid" => {},1396"nanosecond-downcast" => datetime_nanoseconds_downcast = true,1397"convert-timezone" => datetime_convert_timezone = true,1398v => {1399return Err(PyValueError::new_err(format!(1400"unknown option for datetime_cast: {v}"1401)));1402},1403};14041405Ok(())1406})?;14071408let missing_struct_fields = match &*ob1409.getattr(intern!(py, "missing_struct_fields"))?1410.extract::<PyBackedStr>()?1411{1412"insert" => MissingColumnsPolicy::Insert,1413"raise" => MissingColumnsPolicy::Raise,1414v => {1415return Err(PyValueError::new_err(format!(1416"unknown option for missing_struct_fields: {v}"1417)));1418},1419};14201421let extra_struct_fields = match &*ob1422.getattr(intern!(py, "extra_struct_fields"))?1423.extract::<PyBackedStr>()?1424{1425"ignore" => ExtraColumnsPolicy::Ignore,1426"raise" => ExtraColumnsPolicy::Raise,1427v => {1428return Err(PyValueError::new_err(format!(1429"unknown option for extra_struct_fields: {v}"1430)));1431},1432};14331434let categorical_to_string = match &*ob1435.getattr(intern!(py, "categorical_to_string"))?1436.extract::<PyBackedStr>()?1437{1438"allow" => true,1439"forbid" => false,1440v => {1441return Err(PyValueError::new_err(format!(1442"unknown option for categorical_to_string: {v}"1443)));1444},1445};14461447return Ok(Wrap(CastColumnsPolicy {1448integer_upcast,1449float_upcast,1450float_downcast,1451datetime_nanoseconds_downcast,1452datetime_microseconds_downcast: false,1453datetime_convert_timezone,1454null_upcast: true,1455categorical_to_string,1456missing_struct_fields,1457extra_struct_fields,1458}));14591460fn parse_multiple_options(1461parameter_name: &'static str,1462py_object: Bound<'_, PyAny>,1463mut parser_func: impl FnMut(&str) -> PyResult<()>,1464) -> PyResult<()> {1465if let Ok(v) = py_object.extract::<PyBackedStr>() {1466parser_func(&v)?;1467} else if let Ok(v) = py_object.try_iter() {1468for v in v {1469parser_func(&v?.extract::<PyBackedStr>()?)?;1470}1471} else {1472return Err(PyValueError::new_err(format!(1473"unknown type for {parameter_name}: {py_object}"1474)));1475}14761477Ok(())1478}1479}1480}14811482pub(crate) fn parse_fill_null_strategy(1483strategy: &str,1484limit: FillNullLimit,1485) -> PyResult<FillNullStrategy> {1486let parsed = match strategy {1487"forward" => FillNullStrategy::Forward(limit),1488"backward" => FillNullStrategy::Backward(limit),1489"min" => FillNullStrategy::Min,1490"max" => FillNullStrategy::Max,1491"mean" => FillNullStrategy::Mean,1492"zero" => FillNullStrategy::Zero,1493"one" => FillNullStrategy::One,1494e => {1495return Err(PyValueError::new_err(format!(1496"`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",1497)));1498},1499};1500Ok(parsed)1501}15021503#[cfg(feature = "parquet")]1504pub(crate) fn parse_parquet_compression(1505compression: &str,1506compression_level: Option<i32>,1507) -> PyResult<ParquetCompression> {1508let parsed = match compression {1509"uncompressed" => ParquetCompression::Uncompressed,1510"snappy" => ParquetCompression::Snappy,1511"gzip" => ParquetCompression::Gzip(1512compression_level1513.map(|lvl| {1514GzipLevel::try_new(lvl as u8)1515.map_err(|e| PyValueError::new_err(format!("{e:?}")))1516})1517.transpose()?,1518),1519"brotli" => ParquetCompression::Brotli(1520compression_level1521.map(|lvl| {1522BrotliLevel::try_new(lvl as u32)1523.map_err(|e| PyValueError::new_err(format!("{e:?}")))1524})1525.transpose()?,1526),1527"lz4" => ParquetCompression::Lz4Raw,1528"zstd" => ParquetCompression::Zstd(1529compression_level1530.map(|lvl| {1531ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))1532})1533.transpose()?,1534),1535e => {1536return Err(PyValueError::new_err(format!(1537"parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'brotli', 'lz4', 'zstd'}}, got {e}",1538)));1539},1540};1541Ok(parsed)1542}15431544pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>1545where1546I: IntoIterator<Item = S>,1547S: AsRef<str>,1548{1549container1550.into_iter()1551.map(|s| PlSmallStr::from_str(s.as_ref()))1552.collect()1553}15541555#[derive(Debug, Copy, Clone)]1556pub struct PyCompatLevel(pub CompatLevel);15571558impl<'py> FromPyObject<'py> for PyCompatLevel {1559fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1560Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {1561if let Ok(compat_level) = CompatLevel::with_level(level) {1562compat_level1563} else {1564return Err(PyValueError::new_err("invalid compat level"));1565}1566} else if let Ok(future) = ob.extract::<bool>() {1567if future {1568CompatLevel::newest()1569} else {1570CompatLevel::oldest()1571}1572} else {1573return Err(PyTypeError::new_err(1574"'compat_level' argument accepts int or bool",1575));1576}))1577}1578}15791580#[cfg(feature = "string_normalize")]1581impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {1582fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1583let parsed = match &*ob.extract::<PyBackedStr>()? {1584"NFC" => UnicodeForm::NFC,1585"NFKC" => UnicodeForm::NFKC,1586"NFD" => UnicodeForm::NFD,1587"NFKD" => UnicodeForm::NFKD,1588v => {1589return Err(PyValueError::new_err(format!(1590"`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",1591)));1592},1593};1594Ok(Wrap(parsed))1595}1596}15971598#[cfg(feature = "parquet")]1599impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {1600fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1601#[derive(FromPyObject)]1602enum Metadata {1603Static(Vec<(String, String)>),1604Dynamic(Py<PyAny>),1605}16061607let metadata = Option::<Metadata>::extract_bound(ob)?;1608let key_value_metadata = metadata.map(|x| match x {1609Metadata::Static(kv) => KeyValueMetadata::from_static(kv),1610Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),1611});1612Ok(Wrap(key_value_metadata))1613}1614}16151616impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {1617fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1618let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;16191620let tz = tz.map(|x| x.0);16211622Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))1623}1624}16251626impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {1627fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1628let parsed = match &*ob.extract::<PyBackedStr>()? {1629"upcast" => UpcastOrForbid::Upcast,1630"forbid" => UpcastOrForbid::Forbid,1631v => {1632return Err(PyValueError::new_err(format!(1633"cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",1634)));1635},1636};1637Ok(Wrap(parsed))1638}1639}16401641impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {1642fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1643let parsed = match &*ob.extract::<PyBackedStr>()? {1644"ignore" => ExtraColumnsPolicy::Ignore,1645"raise" => ExtraColumnsPolicy::Raise,1646v => {1647return Err(PyValueError::new_err(format!(1648"extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",1649)));1650},1651};1652Ok(Wrap(parsed))1653}1654}16551656impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {1657fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1658let parsed = match &*ob.extract::<PyBackedStr>()? {1659"insert" => MissingColumnsPolicy::Insert,1660"raise" => MissingColumnsPolicy::Raise,1661v => {1662return Err(PyValueError::new_err(format!(1663"missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",1664)));1665},1666};1667Ok(Wrap(parsed))1668}1669}16701671impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {1672fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1673if let Ok(pyexpr) = ob.extract::<PyExpr>() {1674return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));1675}16761677let parsed = match &*ob.extract::<PyBackedStr>()? {1678"insert" => MissingColumnsPolicyOrExpr::Insert,1679"raise" => MissingColumnsPolicyOrExpr::Raise,1680v => {1681return Err(PyValueError::new_err(format!(1682"missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",1683)));1684},1685};1686Ok(Wrap(parsed))1687}1688}16891690impl<'py> FromPyObject<'py> for Wrap<ColumnMapping> {1691fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1692let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;16931694Ok(Wrap(match &*column_mapping_type {1695"iceberg-column-mapping" => {1696let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;1697ColumnMapping::Iceberg(Arc::new(1698IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,1699))1700},17011702v => {1703return Err(PyValueError::new_err(format!(1704"unknown column mapping type: {v}"1705)));1706},1707}))1708}1709}17101711impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {1712fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1713let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;17141715Ok(Wrap(match &*deletion_file_type {1716"iceberg-position-delete" => {1717let dict: Bound<'_, PyDict> = ob.extract()?;17181719let mut out = PlIndexMap::new();17201721for (k, v) in dict1722.try_iter()?1723.zip(dict.call_method0("values")?.try_iter()?)1724{1725let k: usize = k?.extract()?;1726let v: Bound<'_, PyAny> = v?.extract()?;17271728let files = v1729.try_iter()?1730.map(|x| {1731x.and_then(|x| {1732let x: String = x.extract()?;1733Ok(x)1734})1735})1736.collect::<PyResult<Arc<[String]>>>()?;17371738if !files.is_empty() {1739out.insert(k, files);1740}1741}17421743DeletionFilesList::IcebergPositionDelete(Arc::new(out))1744},17451746v => {1747return Err(PyValueError::new_err(format!(1748"unknown deletion file type: {v}"1749)));1750},1751}))1752}1753}17541755impl<'py> FromPyObject<'py> for Wrap<DefaultFieldValues> {1756fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1757let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;17581759Ok(Wrap(match &*default_values_type {1760"iceberg" => {1761let dict: Bound<'_, PyDict> = ob.extract()?;17621763let mut out = PlIndexMap::new();17641765for (k, v) in dict1766.try_iter()?1767.zip(dict.call_method0("values")?.try_iter()?)1768{1769let k: u32 = k?.extract()?;1770let v = v?;17711772let v: Result<Column, String> = if let Ok(s) = get_series(&v) {1773Ok(s.into_column())1774} else {1775let err_msg: String = v.extract()?;1776Err(err_msg)1777};17781779out.insert(k, v);1780}17811782DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(1783out,1784)))1785},17861787v => {1788return Err(PyValueError::new_err(format!(1789"unknown deletion file type: {v}"1790)));1791},1792}))1793}1794}17951796impl<'py> FromPyObject<'py> for Wrap<PlPath> {1797fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {1798if let Ok(path) = ob.extract::<PyBackedStr>() {1799Ok(Wrap(PlPath::new(&path)))1800} else if let Ok(path) = ob.extract::<std::path::PathBuf>() {1801Ok(Wrap(PlPath::Local(path.into())))1802} else {1803Err(1804PyTypeError::new_err(format!("PlPath cannot be formed from '{}'", ob.get_type()))1805.into(),1806)1807}1808}1809}18101811impl<'py> IntoPyObject<'py> for Wrap<PlPath> {1812type Target = PyString;1813type Output = Bound<'py, Self::Target>;1814type Error = Infallible;18151816fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {1817self.0.to_str().into_pyobject(py)1818}1819}182018211822