Path: blob/main/crates/polars-python/src/conversion/mod.rs
8353 views
pub(crate) mod any_value;1mod categorical;2pub(crate) mod chunked_array;3mod datetime;45use std::convert::Infallible;6use std::fmt::{Display, Formatter};7use std::fs::File;8use std::hash::{Hash, Hasher};910pub use categorical::PyCategories;11#[cfg(feature = "object")]12use polars::chunked_array::object::PolarsObjectSafe;13use polars::frame::row::Row;14#[cfg(feature = "avro")]15use polars::io::avro::AvroCompression;16use polars::prelude::ColumnMapping;17use polars::prelude::default_values::{18DefaultFieldValues, IcebergIdentityTransformedPartitionFields,19};20use polars::prelude::deletion::DeletionFilesList;21use polars::series::ops::NullBehavior;22use polars_buffer::Buffer;23use polars_compute::decimal::dec128_verify_prec_scale;24use polars_core::datatypes::extension::get_extension_type_or_generic;25use polars_core::schema::iceberg::IcebergSchema;26use polars_core::utils::arrow::array::Array;27use polars_core::utils::materialize_dyn_int;28use polars_lazy::prelude::*;29#[cfg(feature = "parquet")]30use polars_parquet::write::StatisticsOptions;31use polars_plan::dsl::ScanSources;32use polars_utils::compression::{BrotliLevel, GzipLevel, ZstdLevel};33use polars_utils::pl_str::PlSmallStr;34use polars_utils::total_ord::{TotalEq, TotalHash};35use pyo3::basic::CompareOp;36use pyo3::exceptions::{PyTypeError, PyValueError};37use pyo3::intern;38use pyo3::prelude::*;39use pyo3::pybacked::PyBackedStr;40use pyo3::sync::PyOnceLock;41use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};4243use crate::error::PyPolarsErr;44use crate::expr::PyExpr;45use crate::file::{PythonScanSourceInput, get_python_scan_source_input};46#[cfg(feature = "object")]47use crate::object::OBJECT_NAME;48use crate::prelude::*;49use crate::py_modules::{pl_series, polars};50use crate::series::{PySeries, import_schema_pycapsule};51use crate::utils::to_py_err;52use crate::{PyDataFrame, PyLazyFrame};5354/// # Safety55/// Should only be implemented for transparent types56pub(crate) unsafe trait Transparent {57type Target;58}5960unsafe impl Transparent for PySeries {61type Target = Series;62}6364unsafe impl<T> Transparent for Wrap<T> {65type Target = T;66}6768unsafe impl<T: Transparent> Transparent for Option<T> {69type Target = Option<T::Target>;70}7172pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {73assert_eq!(size_of::<T>(), size_of::<T::Target>());74assert_eq!(align_of::<T>(), align_of::<T::Target>());75let len = input.len();76let cap = input.capacity();77let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);78let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();79let ptr: *mut T::Target = vec_ptr as *mut T::Target;80unsafe { Vec::from_raw_parts(ptr, len, cap) }81}8283pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {84reinterpret_vec(buf)85}8687#[derive(PartialEq, Eq, Hash)]88#[repr(transparent)]89pub struct Wrap<T>(pub T);9091impl<T> Clone for Wrap<T>92where93T: Clone,94{95fn clone(&self) -> Self {96Wrap(self.0.clone())97}98}99impl<T> From<T> for Wrap<T> {100fn from(t: T) -> Self {101Wrap(t)102}103}104105// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>106pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {107let pydf = obj.getattr(intern!(obj.py(), "_df"))?;108Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())109}110111pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {112let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;113Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())114}115116pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {117let s = obj.getattr(intern!(obj.py(), "_s"))?;118Ok(s.extract::<PySeries>()?.series.into_inner())119}120121pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {122let series = pl_series(py).bind(py);123let constructor = series.getattr(intern!(py, "_from_pyseries"))?;124constructor.call1((s,))125}126127impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<PlSmallStr> {128type Error = PyErr;129130fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {131Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))132}133}134135#[cfg(feature = "csv")]136impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullValues> {137type Error = PyErr;138139fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {140if let Ok(s) = ob.extract::<PyBackedStr>() {141Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))142} else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {143Ok(Wrap(NullValues::AllColumns(144s.into_iter().map(|x| (&*x).into()).collect(),145)))146} else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {147Ok(Wrap(NullValues::Named(148s.into_iter()149.map(|(a, b)| ((&*a).into(), (&*b).into()))150.collect(),151)))152} else {153Err(154PyPolarsErr::Other("could not extract value from null_values argument".into())155.into(),156)157}158}159}160161fn struct_dict<'a, 'py>(162py: Python<'py>,163vals: impl Iterator<Item = AnyValue<'a>>,164flds: &[Field],165) -> PyResult<Bound<'py, PyDict>> {166let dict = PyDict::new(py);167flds.iter().zip(vals).try_for_each(|(fld, val)| {168dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)169})?;170Ok(dict)171}172173impl<'py> IntoPyObject<'py> for Wrap<Series> {174type Target = PyAny;175type Output = Bound<'py, Self::Target>;176type Error = PyErr;177178fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {179to_series(py, PySeries::new(self.0))180}181}182183impl<'py> IntoPyObject<'py> for &Wrap<DataType> {184type Target = PyAny;185type Output = Bound<'py, Self::Target>;186type Error = PyErr;187188fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {189let pl = polars(py).bind(py);190191match &self.0 {192DataType::Int8 => {193let class = pl.getattr(intern!(py, "Int8"))?;194class.call0()195},196DataType::Int16 => {197let class = pl.getattr(intern!(py, "Int16"))?;198class.call0()199},200DataType::Int32 => {201let class = pl.getattr(intern!(py, "Int32"))?;202class.call0()203},204DataType::Int64 => {205let class = pl.getattr(intern!(py, "Int64"))?;206class.call0()207},208DataType::UInt8 => {209let class = pl.getattr(intern!(py, "UInt8"))?;210class.call0()211},212DataType::UInt16 => {213let class = pl.getattr(intern!(py, "UInt16"))?;214class.call0()215},216DataType::UInt32 => {217let class = pl.getattr(intern!(py, "UInt32"))?;218class.call0()219},220DataType::UInt64 => {221let class = pl.getattr(intern!(py, "UInt64"))?;222class.call0()223},224DataType::UInt128 => {225let class = pl.getattr(intern!(py, "UInt128"))?;226class.call0()227},228DataType::Int128 => {229let class = pl.getattr(intern!(py, "Int128"))?;230class.call0()231},232DataType::Float16 => {233let class = pl.getattr(intern!(py, "Float16"))?;234class.call0()235},236DataType::Float32 => {237let class = pl.getattr(intern!(py, "Float32"))?;238class.call0()239},240DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {241let class = pl.getattr(intern!(py, "Float64"))?;242class.call0()243},244DataType::Decimal(precision, scale) => {245let class = pl.getattr(intern!(py, "Decimal"))?;246let args = (*precision, *scale);247class.call1(args)248},249DataType::Boolean => {250let class = pl.getattr(intern!(py, "Boolean"))?;251class.call0()252},253DataType::String | DataType::Unknown(UnknownKind::Str) => {254let class = pl.getattr(intern!(py, "String"))?;255class.call0()256},257DataType::Binary => {258let class = pl.getattr(intern!(py, "Binary"))?;259class.call0()260},261DataType::Array(inner, size) => {262let class = pl.getattr(intern!(py, "Array"))?;263let inner = Wrap(*inner.clone());264let args = (&inner, *size);265class.call1(args)266},267DataType::List(inner) => {268let class = pl.getattr(intern!(py, "List"))?;269let inner = Wrap(*inner.clone());270class.call1((&inner,))271},272DataType::Date => {273let class = pl.getattr(intern!(py, "Date"))?;274class.call0()275},276DataType::Datetime(tu, tz) => {277let datetime_class = pl.getattr(intern!(py, "Datetime"))?;278datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))279},280DataType::Duration(tu) => {281let duration_class = pl.getattr(intern!(py, "Duration"))?;282duration_class.call1((tu.to_ascii(),))283},284#[cfg(feature = "object")]285DataType::Object(_) => {286let class = pl.getattr(intern!(py, "Object"))?;287class.call0()288},289DataType::Categorical(cats, _) => {290let categories_class = pl.getattr(intern!(py, "Categories"))?;291let categorical_class = pl.getattr(intern!(py, "Categorical"))?;292let categories = categories_class293.call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;294let kwargs = [("categories", categories)];295categorical_class.call((), Some(&kwargs.into_py_dict(py)?))296},297DataType::Enum(_, mapping) => {298let categories = unsafe {299StringChunked::from_chunks(300PlSmallStr::from_static("category"),301vec![mapping.to_arrow(true)],302)303};304let class = pl.getattr(intern!(py, "Enum"))?;305let series = to_series(py, categories.into_series().into())?;306class.call1((series,))307},308DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),309DataType::Struct(fields) => {310let field_class = pl.getattr(intern!(py, "Field"))?;311let iter = fields.iter().map(|fld| {312let name = fld.name().as_str();313let dtype = Wrap(fld.dtype().clone());314field_class.call1((name, &dtype)).unwrap()315});316let fields = PyList::new(py, iter)?;317let struct_class = pl.getattr(intern!(py, "Struct"))?;318struct_class.call1((fields,))319},320DataType::Null => {321let class = pl.getattr(intern!(py, "Null"))?;322class.call0()323},324DataType::Extension(typ, storage) => {325let py_storage = Wrap((**storage).clone()).into_pyobject(py)?;326let py_typ = pl327.getattr(intern!(py, "get_extension_type"))?328.call1((typ.name(),))?;329let class = if py_typ.is_none()330|| py_typ.str().map(|s| s == "storage").ok() == Some(true)331{332pl.getattr(intern!(py, "Extension"))?333} else {334py_typ335};336let from_params = class.getattr(intern!(py, "ext_from_params"))?;337from_params.call1((typ.name(), py_storage, typ.serialize_metadata()))338},339DataType::Unknown(UnknownKind::Int(v)) => {340Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)341},342DataType::Unknown(_) => {343let class = pl.getattr(intern!(py, "Unknown"))?;344class.call0()345},346DataType::BinaryOffset => {347unimplemented!()348},349}350}351}352353impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Field> {354type Error = PyErr;355356fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {357let py = ob.py();358let name = ob359.getattr(intern!(py, "name"))?360.str()?361.extract::<PyBackedStr>()?;362let dtype = ob363.getattr(intern!(py, "dtype"))?364.extract::<Wrap<DataType>>()?;365Ok(Wrap(Field::new((&*name).into(), dtype.0)))366}367}368369impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DataType> {370type Error = PyErr;371372fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {373let py = ob.py();374let type_name = ob.get_type().qualname()?.to_string();375376let dtype = match &*type_name {377"DataTypeClass" => {378// just the class, not an object379let name = ob380.getattr(intern!(py, "__name__"))?381.str()?382.extract::<PyBackedStr>()?;383match &*name {384"Int8" => DataType::Int8,385"Int16" => DataType::Int16,386"Int32" => DataType::Int32,387"Int64" => DataType::Int64,388"Int128" => DataType::Int128,389"UInt8" => DataType::UInt8,390"UInt16" => DataType::UInt16,391"UInt32" => DataType::UInt32,392"UInt64" => DataType::UInt64,393"UInt128" => DataType::UInt128,394"Float16" => DataType::Float16,395"Float32" => DataType::Float32,396"Float64" => DataType::Float64,397"Boolean" => DataType::Boolean,398"String" => DataType::String,399"Binary" => DataType::Binary,400"Categorical" => DataType::from_categories(Categories::global()),401"Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),402"Date" => DataType::Date,403"Time" => DataType::Time,404"Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),405"Duration" => DataType::Duration(TimeUnit::Microseconds),406"List" => DataType::List(Box::new(DataType::Null)),407"Array" => DataType::Array(Box::new(DataType::Null), 0),408"Struct" => DataType::Struct(vec![]),409"Null" => DataType::Null,410#[cfg(feature = "object")]411"Object" => DataType::Object(OBJECT_NAME),412"Unknown" => DataType::Unknown(Default::default()),413"Decimal" => {414return Err(PyTypeError::new_err(415"Decimal without precision/scale set is not a valid Polars datatype",416));417},418dt => {419return Err(PyTypeError::new_err(format!(420"'{dt}' is not a Polars data type",421)));422},423}424},425"Int8" => DataType::Int8,426"Int16" => DataType::Int16,427"Int32" => DataType::Int32,428"Int64" => DataType::Int64,429"Int128" => DataType::Int128,430"UInt8" => DataType::UInt8,431"UInt16" => DataType::UInt16,432"UInt32" => DataType::UInt32,433"UInt64" => DataType::UInt64,434"UInt128" => DataType::UInt128,435"Float16" => DataType::Float16,436"Float32" => DataType::Float32,437"Float64" => DataType::Float64,438"Boolean" => DataType::Boolean,439"String" => DataType::String,440"Binary" => DataType::Binary,441"Categorical" => {442let categories = ob.getattr(intern!(py, "categories")).unwrap();443let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();444let py_categories = py_categories.extract::<PyCategories>()?;445DataType::from_categories(py_categories.categories().clone())446},447"Enum" => {448let categories = ob.getattr(intern!(py, "categories")).unwrap();449let s = get_series(&categories.as_borrowed())?;450let ca = s.str().map_err(PyPolarsErr::from)?;451let categories = ca.downcast_iter().next().unwrap().clone();452assert!(!categories.has_nulls());453DataType::from_frozen_categories(454FrozenCategories::new(categories.values_iter()).unwrap(),455)456},457"Date" => DataType::Date,458"Time" => DataType::Time,459"Datetime" => {460let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();461let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;462let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();463let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;464DataType::Datetime(465time_unit,466TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,467)468},469"Duration" => {470let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();471let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;472DataType::Duration(time_unit)473},474"Decimal" => {475let precision = ob.getattr(intern!(py, "precision"))?.extract()?;476let scale = ob.getattr(intern!(py, "scale"))?.extract()?;477dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?;478DataType::Decimal(precision, scale)479},480"List" => {481let inner = ob.getattr(intern!(py, "inner")).unwrap();482let inner = inner.extract::<Wrap<DataType>>()?;483DataType::List(Box::new(inner.0))484},485"Array" => {486let inner = ob.getattr(intern!(py, "inner")).unwrap();487let size = ob.getattr(intern!(py, "size")).unwrap();488let inner = inner.extract::<Wrap<DataType>>()?;489let size = size.extract::<usize>()?;490DataType::Array(Box::new(inner.0), size)491},492"Struct" => {493let fields = ob.getattr(intern!(py, "fields"))?;494let fields = fields495.extract::<Vec<Wrap<Field>>>()?496.into_iter()497.map(|f| f.0)498.collect::<Vec<Field>>();499DataType::Struct(fields)500},501"Null" => DataType::Null,502#[cfg(feature = "object")]503"Object" => DataType::Object(OBJECT_NAME),504"Unknown" => DataType::Unknown(Default::default()),505dt => {506let base_ext = polars(py)507.getattr(py, intern!(py, "BaseExtension"))508.unwrap();509if ob.is_instance(base_ext.bind(py))? {510let ext_name_f = ob.getattr(intern!(py, "ext_name"))?;511let ext_metadata_f = ob.getattr(intern!(py, "ext_metadata"))?;512let ext_storage_f = ob.getattr(intern!(py, "ext_storage"))?;513let name: String = ext_name_f.call0()?.extract()?;514let metadata: Option<String> = ext_metadata_f.call0()?.extract()?;515let storage: Wrap<DataType> = ext_storage_f.call0()?.extract()?;516let ext_typ =517get_extension_type_or_generic(&name, &storage.0, metadata.as_deref());518return Ok(Wrap(DataType::Extension(ext_typ, Box::new(storage.0))));519}520521return Err(PyTypeError::new_err(format!(522"'{dt}' is not a Polars data type",523)));524},525};526Ok(Wrap(dtype))527}528}529530impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {531type Target = PyString;532type Output = Bound<'py, Self::Target>;533type Error = Infallible;534535fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {536self.0.to_ascii().into_pyobject(py)537}538}539540#[cfg(feature = "parquet")]541impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<StatisticsOptions> {542type Error = PyErr;543544fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {545let mut statistics = StatisticsOptions::empty();546547let dict = ob.cast::<PyDict>()?;548for (key, val) in dict.iter() {549let key = key.extract::<PyBackedStr>()?;550let val = val.extract::<bool>()?;551552match key.as_ref() {553"min" => statistics.min_value = val,554"max" => statistics.max_value = val,555"distinct_count" => statistics.distinct_count = val,556"null_count" => statistics.null_count = val,557_ => {558return Err(PyTypeError::new_err(format!(559"'{key}' is not a valid statistic option",560)));561},562}563}564565Ok(Wrap(statistics))566}567}568569impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Row<'static>> {570type Error = PyErr;571572fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {573let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;574let vals = reinterpret_vec(vals);575Ok(Wrap(Row(vals)))576}577}578579impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Schema> {580type Error = PyErr;581582fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {583let dict = ob.cast::<PyDict>()?;584585Ok(Wrap(586dict.iter()587.map(|(key, val)| {588let key = key.extract::<PyBackedStr>()?;589let val = val.extract::<Wrap<DataType>>()?;590591Ok(Field::new((&*key).into(), val.0))592})593.collect::<PyResult<Schema>>()?,594))595}596}597598impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ArrowSchema> {599type Error = PyErr;600601fn extract(schema_object: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {602let py = schema_object.py();603604let schema_capsule = schema_object605.getattr(intern!(py, "__arrow_c_schema__"))?606.call0()?;607608let field = import_schema_pycapsule(&schema_capsule.extract()?)?;609610let ArrowDataType::Struct(fields) = field.dtype else {611return Err(PyValueError::new_err(format!(612"__arrow_c_schema__ of object did not return struct dtype: \613object: {:?}, dtype: {:?}",614schema_object, &field.dtype615)));616};617618let mut schema = ArrowSchema::from_iter_check_duplicates(fields).map_err(to_py_err)?;619620if let Some(md) = field.metadata {621*schema.metadata_mut() = Arc::unwrap_or_clone(md);622}623624Ok(Wrap(schema))625}626}627628impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ScanSources> {629type Error = PyErr;630631fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {632let list = ob.cast::<PyList>()?.to_owned();633634if list.is_empty() {635return Ok(Wrap(ScanSources::default()));636}637638enum MutableSources {639Paths(Vec<PlRefPath>),640Files(Vec<File>),641Buffers(Vec<Buffer<u8>>),642}643644let num_items = list.len();645let mut iter = list646.into_iter()647.map(|val| get_python_scan_source_input(val.unbind(), false));648649let Some(first) = iter.next() else {650return Ok(Wrap(ScanSources::default()));651};652653let mut sources = match first? {654PythonScanSourceInput::Path(path) => {655let mut sources = Vec::with_capacity(num_items);656sources.push(path);657MutableSources::Paths(sources)658},659PythonScanSourceInput::File(file) => {660let mut sources = Vec::with_capacity(num_items);661sources.push(file.into());662MutableSources::Files(sources)663},664PythonScanSourceInput::Buffer(buffer) => {665let mut sources = Vec::with_capacity(num_items);666sources.push(buffer);667MutableSources::Buffers(sources)668},669};670671for source in iter {672match (&mut sources, source?) {673(MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),674(MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),675(MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),676_ => {677return Err(PyTypeError::new_err(678"Cannot combine in-memory bytes, paths and files for scan sources",679));680},681}682}683684Ok(Wrap(match sources {685MutableSources::Paths(i) => ScanSources::Paths(i.into()),686MutableSources::Files(i) => ScanSources::Files(i.into()),687MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),688}))689}690}691692impl<'py> IntoPyObject<'py> for Wrap<Schema> {693type Target = PyDict;694type Output = Bound<'py, Self::Target>;695type Error = PyErr;696697fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {698let dict = PyDict::new(py);699self.0700.iter()701.try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;702Ok(dict)703}704}705706#[derive(Debug)]707#[repr(transparent)]708pub struct ObjectValue {709pub inner: Py<PyAny>,710}711712impl Clone for ObjectValue {713fn clone(&self) -> Self {714Python::attach(|py| Self {715inner: self.inner.clone_ref(py),716})717}718}719720impl Hash for ObjectValue {721fn hash<H: Hasher>(&self, state: &mut H) {722let h = Python::attach(|py| self.inner.bind(py).hash().expect("should be hashable"));723state.write_isize(h)724}725}726727impl Eq for ObjectValue {}728729impl PartialEq for ObjectValue {730fn eq(&self, other: &Self) -> bool {731Python::attach(|py| {732match self733.inner734.bind(py)735.rich_compare(other.inner.bind(py), CompareOp::Eq)736{737Ok(result) => result.is_truthy().unwrap(),738Err(_) => false,739}740})741}742}743744impl TotalEq for ObjectValue {745fn tot_eq(&self, other: &Self) -> bool {746self == other747}748}749750impl TotalHash for ObjectValue {751fn tot_hash<H>(&self, state: &mut H)752where753H: Hasher,754{755self.hash(state);756}757}758759impl Display for ObjectValue {760fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {761write!(f, "{}", self.inner)762}763}764765#[cfg(feature = "object")]766impl PolarsObject for ObjectValue {767fn type_name() -> &'static str {768"object"769}770}771772impl From<Py<PyAny>> for ObjectValue {773fn from(p: Py<PyAny>) -> Self {774Self { inner: p }775}776}777778impl<'a, 'py> FromPyObject<'a, 'py> for ObjectValue {779type Error = PyErr;780781fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {782Ok(ObjectValue {783inner: ob.to_owned().unbind(),784})785}786}787788/// # Safety789///790/// The caller is responsible for checking that val is Object otherwise UB791#[cfg(feature = "object")]792impl From<&dyn PolarsObjectSafe> for &ObjectValue {793fn from(val: &dyn PolarsObjectSafe) -> Self {794unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }795}796}797798impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {799type Target = PyAny;800type Output = Borrowed<'a, 'py, Self::Target>;801type Error = std::convert::Infallible;802803fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {804Ok(self.inner.bind_borrowed(py))805}806}807808impl Default for ObjectValue {809fn default() -> Self {810Python::attach(|py| ObjectValue { inner: py.None() })811}812}813814impl<'a, 'py, T> FromPyObject<'a, 'py> for Wrap<Vec<T>>815where816T: FromPyObjectOwned<'py>,817{818type Error = PyErr;819820fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {821let seq = ob822.cast::<PySequence>()823.map_err(<PyErr as From<pyo3::CastError>>::from)?;824let mut v = Vec::with_capacity(seq.len().unwrap_or(0));825for item in seq.try_iter()? {826v.push(item?.extract::<T>().map_err(Into::into)?);827}828Ok(Wrap(v))829}830}831832#[cfg(feature = "asof_join")]833impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<AsofStrategy> {834type Error = PyErr;835836fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {837let parsed = match &*(ob.extract::<PyBackedStr>()?) {838"backward" => AsofStrategy::Backward,839"forward" => AsofStrategy::Forward,840"nearest" => AsofStrategy::Nearest,841v => {842return Err(PyValueError::new_err(format!(843"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",844)));845},846};847Ok(Wrap(parsed))848}849}850851impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<InterpolationMethod> {852type Error = PyErr;853854fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {855let parsed = match &*(ob.extract::<PyBackedStr>()?) {856"linear" => InterpolationMethod::Linear,857"nearest" => InterpolationMethod::Nearest,858v => {859return Err(PyValueError::new_err(format!(860"interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",861)));862},863};864Ok(Wrap(parsed))865}866}867868#[cfg(feature = "avro")]869impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<AvroCompression>> {870type Error = PyErr;871872fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {873let parsed = match &*ob.extract::<PyBackedStr>()? {874"uncompressed" => None,875"snappy" => Some(AvroCompression::Snappy),876"deflate" => Some(AvroCompression::Deflate),877v => {878return Err(PyValueError::new_err(format!(879"avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",880)));881},882};883Ok(Wrap(parsed))884}885}886887impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<StartBy> {888type Error = PyErr;889890fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {891let parsed = match &*ob.extract::<PyBackedStr>()? {892"window" => StartBy::WindowBound,893"datapoint" => StartBy::DataPoint,894"monday" => StartBy::Monday,895"tuesday" => StartBy::Tuesday,896"wednesday" => StartBy::Wednesday,897"thursday" => StartBy::Thursday,898"friday" => StartBy::Friday,899"saturday" => StartBy::Saturday,900"sunday" => StartBy::Sunday,901v => {902return Err(PyValueError::new_err(format!(903"`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",904)));905},906};907Ok(Wrap(parsed))908}909}910911impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ClosedWindow> {912type Error = PyErr;913914fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {915let parsed = match &*ob.extract::<PyBackedStr>()? {916"left" => ClosedWindow::Left,917"right" => ClosedWindow::Right,918"both" => ClosedWindow::Both,919"none" => ClosedWindow::None,920v => {921return Err(PyValueError::new_err(format!(922"`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",923)));924},925};926Ok(Wrap(parsed))927}928}929930impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RoundMode> {931type Error = PyErr;932933fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {934let parsed = match &*ob.extract::<PyBackedStr>()? {935"half_to_even" => RoundMode::HalfToEven,936"half_away_from_zero" => RoundMode::HalfAwayFromZero,937v => {938return Err(PyValueError::new_err(format!(939"`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",940)));941},942};943Ok(Wrap(parsed))944}945}946947#[cfg(feature = "csv")]948impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<CsvEncoding> {949type Error = PyErr;950951fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {952let parsed = match &*ob.extract::<PyBackedStr>()? {953"utf8" => CsvEncoding::Utf8,954"utf8-lossy" => CsvEncoding::LossyUtf8,955v => {956return Err(PyValueError::new_err(format!(957"csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",958)));959},960};961Ok(Wrap(parsed))962}963}964965#[cfg(feature = "ipc")]966impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<IpcCompression>> {967type Error = PyErr;968969fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {970let parsed = match &*ob.extract::<PyBackedStr>()? {971"uncompressed" => None,972"lz4" => Some(IpcCompression::LZ4),973"zstd" => Some(IpcCompression::ZSTD(Default::default())),974v => {975return Err(PyValueError::new_err(format!(976"ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",977)));978},979};980Ok(Wrap(parsed))981}982}983984impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<JoinType> {985type Error = PyErr;986987fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {988let parsed = match &*ob.extract::<PyBackedStr>()? {989"inner" => JoinType::Inner,990"left" => JoinType::Left,991"right" => JoinType::Right,992"full" => JoinType::Full,993"semi" => JoinType::Semi,994"anti" => JoinType::Anti,995#[cfg(feature = "cross_join")]996"cross" => JoinType::Cross,997v => {998return Err(PyValueError::new_err(format!(999"`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",1000)));1001},1002};1003Ok(Wrap(parsed))1004}1005}10061007impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Label> {1008type Error = PyErr;10091010fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1011let parsed = match &*ob.extract::<PyBackedStr>()? {1012"left" => Label::Left,1013"right" => Label::Right,1014"datapoint" => Label::DataPoint,1015v => {1016return Err(PyValueError::new_err(format!(1017"`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",1018)));1019},1020};1021Ok(Wrap(parsed))1022}1023}10241025impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ListToStructWidthStrategy> {1026type Error = PyErr;10271028fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1029let parsed = match &*ob.extract::<PyBackedStr>()? {1030"first_non_null" => ListToStructWidthStrategy::FirstNonNull,1031"max_width" => ListToStructWidthStrategy::MaxWidth,1032v => {1033return Err(PyValueError::new_err(format!(1034"`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",1035)));1036},1037};1038Ok(Wrap(parsed))1039}1040}10411042impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NonExistent> {1043type Error = PyErr;10441045fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1046let parsed = match &*ob.extract::<PyBackedStr>()? {1047"null" => NonExistent::Null,1048"raise" => NonExistent::Raise,1049v => {1050return Err(PyValueError::new_err(format!(1051"`non_existent` must be one of {{'null', 'raise'}}, got {v}",1052)));1053},1054};1055Ok(Wrap(parsed))1056}1057}10581059impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullBehavior> {1060type Error = PyErr;10611062fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1063let parsed = match &*ob.extract::<PyBackedStr>()? {1064"drop" => NullBehavior::Drop,1065"ignore" => NullBehavior::Ignore,1066v => {1067return Err(PyValueError::new_err(format!(1068"`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",1069)));1070},1071};1072Ok(Wrap(parsed))1073}1074}10751076impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullStrategy> {1077type Error = PyErr;10781079fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1080let parsed = match &*ob.extract::<PyBackedStr>()? {1081"ignore" => NullStrategy::Ignore,1082"propagate" => NullStrategy::Propagate,1083v => {1084return Err(PyValueError::new_err(format!(1085"`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",1086)));1087},1088};1089Ok(Wrap(parsed))1090}1091}10921093#[cfg(feature = "parquet")]1094impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ParallelStrategy> {1095type Error = PyErr;10961097fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1098let parsed = match &*ob.extract::<PyBackedStr>()? {1099"auto" => ParallelStrategy::Auto,1100"columns" => ParallelStrategy::Columns,1101"row_groups" => ParallelStrategy::RowGroups,1102"prefiltered" => ParallelStrategy::Prefiltered,1103"none" => ParallelStrategy::None,1104v => {1105return Err(PyValueError::new_err(format!(1106"`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",1107)));1108},1109};1110Ok(Wrap(parsed))1111}1112}11131114impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<IndexOrder> {1115type Error = PyErr;11161117fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1118let parsed = match &*ob.extract::<PyBackedStr>()? {1119"fortran" => IndexOrder::Fortran,1120"c" => IndexOrder::C,1121v => {1122return Err(PyValueError::new_err(format!(1123"`order` must be one of {{'fortran', 'c'}}, got {v}",1124)));1125},1126};1127Ok(Wrap(parsed))1128}1129}11301131impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<QuantileMethod> {1132type Error = PyErr;11331134fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1135let parsed = match &*ob.extract::<PyBackedStr>()? {1136"lower" => QuantileMethod::Lower,1137"higher" => QuantileMethod::Higher,1138"nearest" => QuantileMethod::Nearest,1139"linear" => QuantileMethod::Linear,1140"midpoint" => QuantileMethod::Midpoint,1141"equiprobable" => QuantileMethod::Equiprobable,1142v => {1143return Err(PyValueError::new_err(format!(1144"`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",1145)));1146},1147};1148Ok(Wrap(parsed))1149}1150}11511152impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RankMethod> {1153type Error = PyErr;11541155fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1156let parsed = match &*ob.extract::<PyBackedStr>()? {1157"min" => RankMethod::Min,1158"max" => RankMethod::Max,1159"average" => RankMethod::Average,1160"dense" => RankMethod::Dense,1161"ordinal" => RankMethod::Ordinal,1162"random" => RankMethod::Random,1163v => {1164return Err(PyValueError::new_err(format!(1165"rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",1166)));1167},1168};1169Ok(Wrap(parsed))1170}1171}11721173impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RollingRankMethod> {1174type Error = PyErr;11751176fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1177let parsed = match &*ob.extract::<PyBackedStr>()? {1178"min" => RollingRankMethod::Min,1179"max" => RollingRankMethod::Max,1180"average" => RollingRankMethod::Average,1181"dense" => RollingRankMethod::Dense,1182"random" => RollingRankMethod::Random,1183v => {1184return Err(PyValueError::new_err(format!(1185"rank `method` must be one of {{'min', 'max', 'average', 'dense', 'random'}}, got {v}",1186)));1187},1188};1189Ok(Wrap(parsed))1190}1191}11921193impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Roll> {1194type Error = PyErr;11951196fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1197let parsed = match &*ob.extract::<PyBackedStr>()? {1198"raise" => Roll::Raise,1199"forward" => Roll::Forward,1200"backward" => Roll::Backward,1201v => {1202return Err(PyValueError::new_err(format!(1203"`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",1204)));1205},1206};1207Ok(Wrap(parsed))1208}1209}12101211impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<TimeUnit> {1212type Error = PyErr;12131214fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1215let parsed = match &*ob.extract::<PyBackedStr>()? {1216"ns" => TimeUnit::Nanoseconds,1217"us" => TimeUnit::Microseconds,1218"ms" => TimeUnit::Milliseconds,1219v => {1220return Err(PyValueError::new_err(format!(1221"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",1222)));1223},1224};1225Ok(Wrap(parsed))1226}1227}12281229impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UniqueKeepStrategy> {1230type Error = PyErr;12311232fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1233let parsed = match &*ob.extract::<PyBackedStr>()? {1234"first" => UniqueKeepStrategy::First,1235"last" => UniqueKeepStrategy::Last,1236"none" => UniqueKeepStrategy::None,1237"any" => UniqueKeepStrategy::Any,1238v => {1239return Err(PyValueError::new_err(format!(1240"`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",1241)));1242},1243};1244Ok(Wrap(parsed))1245}1246}12471248#[cfg(feature = "search_sorted")]1249impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<SearchSortedSide> {1250type Error = PyErr;12511252fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1253let parsed = match &*ob.extract::<PyBackedStr>()? {1254"any" => SearchSortedSide::Any,1255"left" => SearchSortedSide::Left,1256"right" => SearchSortedSide::Right,1257v => {1258return Err(PyValueError::new_err(format!(1259"sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",1260)));1261},1262};1263Ok(Wrap(parsed))1264}1265}12661267impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ClosedInterval> {1268type Error = PyErr;12691270fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1271let parsed = match &*ob.extract::<PyBackedStr>()? {1272"both" => ClosedInterval::Both,1273"left" => ClosedInterval::Left,1274"right" => ClosedInterval::Right,1275"none" => ClosedInterval::None,1276v => {1277return Err(PyValueError::new_err(format!(1278"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",1279)));1280},1281};1282Ok(Wrap(parsed))1283}1284}12851286impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<WindowMapping> {1287type Error = PyErr;12881289fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1290let parsed = match &*ob.extract::<PyBackedStr>()? {1291"group_to_rows" => WindowMapping::GroupsToRows,1292"join" => WindowMapping::Join,1293"explode" => WindowMapping::Explode,1294v => {1295return Err(PyValueError::new_err(format!(1296"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",1297)));1298},1299};1300Ok(Wrap(parsed))1301}1302}13031304impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<JoinValidation> {1305type Error = PyErr;13061307fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1308let parsed = match &*ob.extract::<PyBackedStr>()? {1309"1:1" => JoinValidation::OneToOne,1310"1:m" => JoinValidation::OneToMany,1311"m:m" => JoinValidation::ManyToMany,1312"m:1" => JoinValidation::ManyToOne,1313v => {1314return Err(PyValueError::new_err(format!(1315"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",1316)));1317},1318};1319Ok(Wrap(parsed))1320}1321}13221323impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MaintainOrderJoin> {1324type Error = PyErr;13251326fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1327let parsed = match &*ob.extract::<PyBackedStr>()? {1328"none" => MaintainOrderJoin::None,1329"left" => MaintainOrderJoin::Left,1330"right" => MaintainOrderJoin::Right,1331"left_right" => MaintainOrderJoin::LeftRight,1332"right_left" => MaintainOrderJoin::RightLeft,1333v => {1334return Err(PyValueError::new_err(format!(1335"`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",1336)));1337},1338};1339Ok(Wrap(parsed))1340}1341}13421343#[cfg(feature = "csv")]1344impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<QuoteStyle> {1345type Error = PyErr;13461347fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1348let parsed = match &*ob.extract::<PyBackedStr>()? {1349"always" => QuoteStyle::Always,1350"necessary" => QuoteStyle::Necessary,1351"non_numeric" => QuoteStyle::NonNumeric,1352"never" => QuoteStyle::Never,1353v => {1354return Err(PyValueError::new_err(format!(1355"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",1356)));1357},1358};1359Ok(Wrap(parsed))1360}1361}13621363#[cfg(feature = "list_sets")]1364impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<SetOperation> {1365type Error = PyErr;13661367fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1368let parsed = match &*ob.extract::<PyBackedStr>()? {1369"union" => SetOperation::Union,1370"difference" => SetOperation::Difference,1371"intersection" => SetOperation::Intersection,1372"symmetric_difference" => SetOperation::SymmetricDifference,1373v => {1374return Err(PyValueError::new_err(format!(1375"set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",1376)));1377},1378};1379Ok(Wrap(parsed))1380}1381}13821383// Conversion from ScanCastOptions class from the Python side.1384impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<CastColumnsPolicy> {1385type Error = PyErr;13861387fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1388if ob.is_none() {1389// Initialize the default ScanCastOptions from Python.1390static DEFAULT: PyOnceLock<Wrap<CastColumnsPolicy>> = PyOnceLock::new();13911392let out = DEFAULT.get_or_try_init(ob.py(), || {1393let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")1394.unwrap()1395.getattr("ScanCastOptions")1396.unwrap()1397.call_method0("_default")1398.unwrap();13991400let out = Self::extract(ob.as_borrowed())?;14011402// The default policy should match ERROR_ON_MISMATCH (but this can change).1403debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);14041405PyResult::Ok(out)1406})?;14071408return Ok(out.clone());1409}14101411let py = ob.py();14121413let integer_upcast = match &*ob1414.getattr(intern!(py, "integer_cast"))?1415.extract::<PyBackedStr>()?1416{1417"upcast" => true,1418"forbid" => false,1419v => {1420return Err(PyValueError::new_err(format!(1421"unknown option for integer_cast: {v}"1422)));1423},1424};14251426let mut float_upcast = false;1427let mut float_downcast = false;14281429let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;14301431parse_multiple_options("float_cast", float_cast_object, |v| {1432match v {1433"forbid" => {},1434"upcast" => float_upcast = true,1435"downcast" => float_downcast = true,1436v => {1437return Err(PyValueError::new_err(format!(1438"unknown option for float_cast: {v}"1439)));1440},1441}14421443Ok(())1444})?;14451446let mut datetime_nanoseconds_downcast = false;1447let mut datetime_convert_timezone = false;14481449let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;14501451parse_multiple_options("datetime_cast", datetime_cast_object, |v| {1452match v {1453"forbid" => {},1454"nanosecond-downcast" => datetime_nanoseconds_downcast = true,1455"convert-timezone" => datetime_convert_timezone = true,1456v => {1457return Err(PyValueError::new_err(format!(1458"unknown option for datetime_cast: {v}"1459)));1460},1461};14621463Ok(())1464})?;14651466let missing_struct_fields = match &*ob1467.getattr(intern!(py, "missing_struct_fields"))?1468.extract::<PyBackedStr>()?1469{1470"insert" => MissingColumnsPolicy::Insert,1471"raise" => MissingColumnsPolicy::Raise,1472v => {1473return Err(PyValueError::new_err(format!(1474"unknown option for missing_struct_fields: {v}"1475)));1476},1477};14781479let extra_struct_fields = match &*ob1480.getattr(intern!(py, "extra_struct_fields"))?1481.extract::<PyBackedStr>()?1482{1483"ignore" => ExtraColumnsPolicy::Ignore,1484"raise" => ExtraColumnsPolicy::Raise,1485v => {1486return Err(PyValueError::new_err(format!(1487"unknown option for extra_struct_fields: {v}"1488)));1489},1490};14911492let categorical_to_string = match &*ob1493.getattr(intern!(py, "categorical_to_string"))?1494.extract::<PyBackedStr>()?1495{1496"allow" => true,1497"forbid" => false,1498v => {1499return Err(PyValueError::new_err(format!(1500"unknown option for categorical_to_string: {v}"1501)));1502},1503};15041505return Ok(Wrap(CastColumnsPolicy {1506integer_upcast,1507float_upcast,1508float_downcast,1509datetime_nanoseconds_downcast,1510datetime_microseconds_downcast: false,1511datetime_convert_timezone,1512null_upcast: true,1513categorical_to_string,1514missing_struct_fields,1515extra_struct_fields,1516}));15171518fn parse_multiple_options(1519parameter_name: &'static str,1520py_object: Bound<'_, PyAny>,1521mut parser_func: impl FnMut(&str) -> PyResult<()>,1522) -> PyResult<()> {1523if let Ok(v) = py_object.extract::<PyBackedStr>() {1524parser_func(&v)?;1525} else if let Ok(v) = py_object.try_iter() {1526for v in v {1527parser_func(&v?.extract::<PyBackedStr>()?)?;1528}1529} else {1530return Err(PyValueError::new_err(format!(1531"unknown type for {parameter_name}: {py_object}"1532)));1533}15341535Ok(())1536}1537}1538}15391540pub(crate) fn parse_fill_null_strategy(1541strategy: &str,1542limit: FillNullLimit,1543) -> PyResult<FillNullStrategy> {1544let parsed = match strategy {1545"forward" => FillNullStrategy::Forward(limit),1546"backward" => FillNullStrategy::Backward(limit),1547"min" => FillNullStrategy::Min,1548"max" => FillNullStrategy::Max,1549"mean" => FillNullStrategy::Mean,1550"zero" => FillNullStrategy::Zero,1551"one" => FillNullStrategy::One,1552e => {1553return Err(PyValueError::new_err(format!(1554"`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",1555)));1556},1557};1558Ok(parsed)1559}15601561#[cfg(feature = "parquet")]1562pub(crate) fn parse_parquet_compression(1563compression: &str,1564compression_level: Option<i32>,1565) -> PyResult<ParquetCompression> {1566let parsed = match compression {1567"uncompressed" => ParquetCompression::Uncompressed,1568"snappy" => ParquetCompression::Snappy,1569"gzip" => ParquetCompression::Gzip(1570compression_level1571.map(|lvl| {1572GzipLevel::try_new(lvl as u8)1573.map_err(|e| PyValueError::new_err(format!("{e:?}")))1574})1575.transpose()?,1576),1577"brotli" => ParquetCompression::Brotli(1578compression_level1579.map(|lvl| {1580BrotliLevel::try_new(lvl as u32)1581.map_err(|e| PyValueError::new_err(format!("{e:?}")))1582})1583.transpose()?,1584),1585"lz4" => ParquetCompression::Lz4Raw,1586"zstd" => ParquetCompression::Zstd(1587compression_level1588.map(|lvl| {1589ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))1590})1591.transpose()?,1592),1593e => {1594return Err(PyValueError::new_err(format!(1595"parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'brotli', 'lz4', 'zstd'}}, got {e}",1596)));1597},1598};1599Ok(parsed)1600}16011602pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>1603where1604I: IntoIterator<Item = S>,1605S: AsRef<str>,1606{1607container1608.into_iter()1609.map(|s| PlSmallStr::from_str(s.as_ref()))1610.collect()1611}16121613#[derive(Debug, Copy, Clone)]1614pub struct PyCompatLevel(pub CompatLevel);16151616impl<'a, 'py> FromPyObject<'a, 'py> for PyCompatLevel {1617type Error = PyErr;16181619fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1620Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {1621if let Ok(compat_level) = CompatLevel::with_level(level) {1622compat_level1623} else {1624return Err(PyValueError::new_err("invalid compat level"));1625}1626} else if let Ok(future) = ob.extract::<bool>() {1627if future {1628CompatLevel::newest()1629} else {1630CompatLevel::oldest()1631}1632} else {1633return Err(PyTypeError::new_err(1634"'compat_level' argument accepts int or bool",1635));1636}))1637}1638}16391640#[cfg(feature = "string_normalize")]1641impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UnicodeForm> {1642type Error = PyErr;16431644fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1645let parsed = match &*ob.extract::<PyBackedStr>()? {1646"NFC" => UnicodeForm::NFC,1647"NFKC" => UnicodeForm::NFKC,1648"NFD" => UnicodeForm::NFD,1649"NFKD" => UnicodeForm::NFKD,1650v => {1651return Err(PyValueError::new_err(format!(1652"`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",1653)));1654},1655};1656Ok(Wrap(parsed))1657}1658}16591660#[cfg(feature = "parquet")]1661impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<KeyValueMetadata>> {1662type Error = PyErr;16631664fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1665#[derive(FromPyObject)]1666enum Metadata {1667Static(Vec<(String, String)>),1668Dynamic(Py<PyAny>),1669}16701671let metadata = Option::<Metadata>::extract(ob)?;1672let key_value_metadata = metadata.map(|x| match x {1673Metadata::Static(kv) => KeyValueMetadata::from_static(kv),1674Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),1675});1676Ok(Wrap(key_value_metadata))1677}1678}16791680impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<TimeZone>> {1681type Error = PyErr;16821683fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1684let tz = Option::<Wrap<PlSmallStr>>::extract(ob)?;16851686let tz = tz.map(|x| x.0);16871688Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))1689}1690}16911692impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UpcastOrForbid> {1693type Error = PyErr;16941695fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1696let parsed = match &*ob.extract::<PyBackedStr>()? {1697"upcast" => UpcastOrForbid::Upcast,1698"forbid" => UpcastOrForbid::Forbid,1699v => {1700return Err(PyValueError::new_err(format!(1701"cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",1702)));1703},1704};1705Ok(Wrap(parsed))1706}1707}17081709impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ExtraColumnsPolicy> {1710type Error = PyErr;17111712fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1713let parsed = match &*ob.extract::<PyBackedStr>()? {1714"ignore" => ExtraColumnsPolicy::Ignore,1715"raise" => ExtraColumnsPolicy::Raise,1716v => {1717return Err(PyValueError::new_err(format!(1718"extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",1719)));1720},1721};1722Ok(Wrap(parsed))1723}1724}17251726impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MissingColumnsPolicy> {1727type Error = PyErr;17281729fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1730let parsed = match &*ob.extract::<PyBackedStr>()? {1731"insert" => MissingColumnsPolicy::Insert,1732"raise" => MissingColumnsPolicy::Raise,1733v => {1734return Err(PyValueError::new_err(format!(1735"missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",1736)));1737},1738};1739Ok(Wrap(parsed))1740}1741}17421743impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MissingColumnsPolicyOrExpr> {1744type Error = PyErr;17451746fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1747if let Ok(pyexpr) = ob.extract::<PyExpr>() {1748return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));1749}17501751let parsed = match &*ob.extract::<PyBackedStr>()? {1752"insert" => MissingColumnsPolicyOrExpr::Insert,1753"raise" => MissingColumnsPolicyOrExpr::Raise,1754v => {1755return Err(PyValueError::new_err(format!(1756"missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",1757)));1758},1759};1760Ok(Wrap(parsed))1761}1762}17631764impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ColumnMapping> {1765type Error = PyErr;17661767fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1768let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;17691770Ok(Wrap(match &*column_mapping_type {1771"iceberg-column-mapping" => {1772let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;1773ColumnMapping::Iceberg(Arc::new(1774IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,1775))1776},17771778v => {1779return Err(PyValueError::new_err(format!(1780"unknown column mapping type: {v}"1781)));1782},1783}))1784}1785}17861787impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DeletionFilesList> {1788type Error = PyErr;17891790fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1791let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;17921793Ok(Wrap(match &*deletion_file_type {1794"iceberg-position-delete" => {1795let dict: Bound<'_, PyDict> = ob.extract()?;17961797let mut out = PlIndexMap::new();17981799for (k, v) in dict1800.try_iter()?1801.zip(dict.call_method0("values")?.try_iter()?)1802{1803let k: usize = k?.extract()?;1804let v: Bound<'_, PyAny> = v?.extract()?;18051806let files = v1807.try_iter()?1808.map(|x| {1809x.and_then(|x| {1810let x: String = x.extract()?;1811Ok(x)1812})1813})1814.collect::<PyResult<Arc<[String]>>>()?;18151816if !files.is_empty() {1817out.insert(k, files);1818}1819}18201821DeletionFilesList::IcebergPositionDelete(Arc::new(out))1822},18231824v => {1825return Err(PyValueError::new_err(format!(1826"unknown deletion file type: {v}"1827)));1828},1829}))1830}1831}18321833impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DefaultFieldValues> {1834type Error = PyErr;18351836fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1837let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;18381839Ok(Wrap(match &*default_values_type {1840"iceberg" => {1841let dict: Bound<'_, PyDict> = ob.extract()?;18421843let mut out = PlIndexMap::new();18441845for (k, v) in dict1846.try_iter()?1847.zip(dict.call_method0("values")?.try_iter()?)1848{1849let k: u32 = k?.extract()?;1850let v = v?;18511852let v: Result<Column, String> = if let Ok(s) = get_series(&v) {1853Ok(s.into_column())1854} else {1855let err_msg: String = v.extract()?;1856Err(err_msg)1857};18581859out.insert(k, v);1860}18611862DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(1863out,1864)))1865},18661867v => {1868return Err(PyValueError::new_err(format!(1869"unknown deletion file type: {v}"1870)));1871},1872}))1873}1874}18751876impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<PlRefPath> {1877type Error = PyErr;18781879fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {1880if let Ok(path) = ob.extract::<PyBackedStr>() {1881Ok(Wrap(PlRefPath::new(&*path)))1882} else if let Ok(path) = ob.extract::<std::path::PathBuf>() {1883Ok(Wrap(PlRefPath::try_from_path(&path).map_err(to_py_err)?))1884} else {1885Err(PyTypeError::new_err(format!(1886"PlRefPath cannot be formed from '{}'",1887ob.get_type()1888))1889.into())1890}1891}1892}18931894impl<'py> IntoPyObject<'py> for Wrap<PlRefPath> {1895type Target = PyString;1896type Output = Bound<'py, Self::Target>;1897type Error = Infallible;18981899fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {1900self.0.as_str().into_pyobject(py)1901}1902}190319041905