Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-python/src/conversion/mod.rs
7889 views
1
pub(crate) mod any_value;
2
mod categorical;
3
pub(crate) mod chunked_array;
4
mod datetime;
5
6
use std::convert::Infallible;
7
use std::fmt::{Display, Formatter};
8
use std::fs::File;
9
use std::hash::{Hash, Hasher};
10
11
pub use categorical::PyCategories;
12
#[cfg(feature = "object")]
13
use polars::chunked_array::object::PolarsObjectSafe;
14
use polars::frame::row::Row;
15
#[cfg(feature = "avro")]
16
use polars::io::avro::AvroCompression;
17
#[cfg(feature = "cloud")]
18
use polars::io::cloud::CloudOptions;
19
use polars::prelude::ColumnMapping;
20
use polars::prelude::default_values::{
21
DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
22
};
23
use polars::prelude::deletion::DeletionFilesList;
24
use polars::series::ops::NullBehavior;
25
use polars_compute::decimal::dec128_verify_prec_scale;
26
use polars_core::datatypes::extension::get_extension_type_or_generic;
27
use polars_core::schema::iceberg::IcebergSchema;
28
use polars_core::utils::arrow::array::Array;
29
use polars_core::utils::arrow::types::NativeType;
30
use polars_core::utils::materialize_dyn_int;
31
use polars_lazy::prelude::*;
32
#[cfg(feature = "parquet")]
33
use polars_parquet::write::StatisticsOptions;
34
use polars_plan::dsl::ScanSources;
35
use polars_utils::compression::{BrotliLevel, GzipLevel, ZstdLevel};
36
use polars_utils::mmap::MemSlice;
37
use polars_utils::pl_str::PlSmallStr;
38
use polars_utils::total_ord::{TotalEq, TotalHash};
39
use pyo3::basic::CompareOp;
40
use pyo3::exceptions::{PyTypeError, PyValueError};
41
use pyo3::intern;
42
use pyo3::prelude::*;
43
use pyo3::pybacked::PyBackedStr;
44
use pyo3::sync::PyOnceLock;
45
use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
46
47
use crate::error::PyPolarsErr;
48
use crate::expr::PyExpr;
49
use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
50
use crate::interop::arrow::to_rust::field_to_rust_arrow;
51
#[cfg(feature = "object")]
52
use crate::object::OBJECT_NAME;
53
use crate::prelude::*;
54
use crate::py_modules::{pl_series, polars};
55
use crate::series::PySeries;
56
use crate::utils::to_py_err;
57
use crate::{PyDataFrame, PyLazyFrame};
58
59
/// # Safety
60
/// Should only be implemented for transparent types
61
pub(crate) unsafe trait Transparent {
62
type Target;
63
}
64
65
unsafe impl Transparent for PySeries {
66
type Target = Series;
67
}
68
69
unsafe impl<T> Transparent for Wrap<T> {
70
type Target = T;
71
}
72
73
unsafe impl<T: Transparent> Transparent for Option<T> {
74
type Target = Option<T::Target>;
75
}
76
77
pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
78
assert_eq!(size_of::<T>(), size_of::<T::Target>());
79
assert_eq!(align_of::<T>(), align_of::<T::Target>());
80
let len = input.len();
81
let cap = input.capacity();
82
let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
83
let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
84
let ptr: *mut T::Target = vec_ptr as *mut T::Target;
85
unsafe { Vec::from_raw_parts(ptr, len, cap) }
86
}
87
88
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
89
reinterpret_vec(buf)
90
}
91
92
#[derive(PartialEq, Eq, Hash)]
93
#[repr(transparent)]
94
pub struct Wrap<T>(pub T);
95
96
impl<T> Clone for Wrap<T>
97
where
98
T: Clone,
99
{
100
fn clone(&self) -> Self {
101
Wrap(self.0.clone())
102
}
103
}
104
impl<T> From<T> for Wrap<T> {
105
fn from(t: T) -> Self {
106
Wrap(t)
107
}
108
}
109
110
// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>
111
pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
112
let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
113
Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())
114
}
115
116
pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
117
let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
118
Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())
119
}
120
121
pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
122
let s = obj.getattr(intern!(obj.py(), "_s"))?;
123
Ok(s.extract::<PySeries>()?.series.into_inner())
124
}
125
126
pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
127
let series = pl_series(py).bind(py);
128
let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
129
constructor.call1((s,))
130
}
131
132
impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
133
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
134
Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
135
}
136
}
137
138
#[cfg(feature = "csv")]
139
impl<'py> FromPyObject<'py> for Wrap<NullValues> {
140
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
141
if let Ok(s) = ob.extract::<PyBackedStr>() {
142
Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
143
} else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
144
Ok(Wrap(NullValues::AllColumns(
145
s.into_iter().map(|x| (&*x).into()).collect(),
146
)))
147
} else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
148
Ok(Wrap(NullValues::Named(
149
s.into_iter()
150
.map(|(a, b)| ((&*a).into(), (&*b).into()))
151
.collect(),
152
)))
153
} else {
154
Err(
155
PyPolarsErr::Other("could not extract value from null_values argument".into())
156
.into(),
157
)
158
}
159
}
160
}
161
162
fn struct_dict<'a, 'py>(
163
py: Python<'py>,
164
vals: impl Iterator<Item = AnyValue<'a>>,
165
flds: &[Field],
166
) -> PyResult<Bound<'py, PyDict>> {
167
let dict = PyDict::new(py);
168
flds.iter().zip(vals).try_for_each(|(fld, val)| {
169
dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
170
})?;
171
Ok(dict)
172
}
173
174
impl<'py> IntoPyObject<'py> for Wrap<Series> {
175
type Target = PyAny;
176
type Output = Bound<'py, Self::Target>;
177
type Error = PyErr;
178
179
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
180
to_series(py, PySeries::new(self.0))
181
}
182
}
183
184
impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
185
type Target = PyAny;
186
type Output = Bound<'py, Self::Target>;
187
type Error = PyErr;
188
189
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
190
let pl = polars(py).bind(py);
191
192
match &self.0 {
193
DataType::Int8 => {
194
let class = pl.getattr(intern!(py, "Int8"))?;
195
class.call0()
196
},
197
DataType::Int16 => {
198
let class = pl.getattr(intern!(py, "Int16"))?;
199
class.call0()
200
},
201
DataType::Int32 => {
202
let class = pl.getattr(intern!(py, "Int32"))?;
203
class.call0()
204
},
205
DataType::Int64 => {
206
let class = pl.getattr(intern!(py, "Int64"))?;
207
class.call0()
208
},
209
DataType::UInt8 => {
210
let class = pl.getattr(intern!(py, "UInt8"))?;
211
class.call0()
212
},
213
DataType::UInt16 => {
214
let class = pl.getattr(intern!(py, "UInt16"))?;
215
class.call0()
216
},
217
DataType::UInt32 => {
218
let class = pl.getattr(intern!(py, "UInt32"))?;
219
class.call0()
220
},
221
DataType::UInt64 => {
222
let class = pl.getattr(intern!(py, "UInt64"))?;
223
class.call0()
224
},
225
DataType::UInt128 => {
226
let class = pl.getattr(intern!(py, "UInt128"))?;
227
class.call0()
228
},
229
DataType::Int128 => {
230
let class = pl.getattr(intern!(py, "Int128"))?;
231
class.call0()
232
},
233
DataType::Float16 => {
234
let class = pl.getattr(intern!(py, "Float16"))?;
235
class.call0()
236
},
237
DataType::Float32 => {
238
let class = pl.getattr(intern!(py, "Float32"))?;
239
class.call0()
240
},
241
DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
242
let class = pl.getattr(intern!(py, "Float64"))?;
243
class.call0()
244
},
245
DataType::Decimal(precision, scale) => {
246
let class = pl.getattr(intern!(py, "Decimal"))?;
247
let args = (*precision, *scale);
248
class.call1(args)
249
},
250
DataType::Boolean => {
251
let class = pl.getattr(intern!(py, "Boolean"))?;
252
class.call0()
253
},
254
DataType::String | DataType::Unknown(UnknownKind::Str) => {
255
let class = pl.getattr(intern!(py, "String"))?;
256
class.call0()
257
},
258
DataType::Binary => {
259
let class = pl.getattr(intern!(py, "Binary"))?;
260
class.call0()
261
},
262
DataType::Array(inner, size) => {
263
let class = pl.getattr(intern!(py, "Array"))?;
264
let inner = Wrap(*inner.clone());
265
let args = (&inner, *size);
266
class.call1(args)
267
},
268
DataType::List(inner) => {
269
let class = pl.getattr(intern!(py, "List"))?;
270
let inner = Wrap(*inner.clone());
271
class.call1((&inner,))
272
},
273
DataType::Date => {
274
let class = pl.getattr(intern!(py, "Date"))?;
275
class.call0()
276
},
277
DataType::Datetime(tu, tz) => {
278
let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
279
datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
280
},
281
DataType::Duration(tu) => {
282
let duration_class = pl.getattr(intern!(py, "Duration"))?;
283
duration_class.call1((tu.to_ascii(),))
284
},
285
#[cfg(feature = "object")]
286
DataType::Object(_) => {
287
let class = pl.getattr(intern!(py, "Object"))?;
288
class.call0()
289
},
290
DataType::Categorical(cats, _) => {
291
let categories_class = pl.getattr(intern!(py, "Categories"))?;
292
let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
293
let categories = categories_class
294
.call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
295
let kwargs = [("categories", categories)];
296
categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
297
},
298
DataType::Enum(_, mapping) => {
299
let categories = unsafe {
300
StringChunked::from_chunks(
301
PlSmallStr::from_static("category"),
302
vec![mapping.to_arrow(true)],
303
)
304
};
305
let class = pl.getattr(intern!(py, "Enum"))?;
306
let series = to_series(py, categories.into_series().into())?;
307
class.call1((series,))
308
},
309
DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),
310
DataType::Struct(fields) => {
311
let field_class = pl.getattr(intern!(py, "Field"))?;
312
let iter = fields.iter().map(|fld| {
313
let name = fld.name().as_str();
314
let dtype = Wrap(fld.dtype().clone());
315
field_class.call1((name, &dtype)).unwrap()
316
});
317
let fields = PyList::new(py, iter)?;
318
let struct_class = pl.getattr(intern!(py, "Struct"))?;
319
struct_class.call1((fields,))
320
},
321
DataType::Null => {
322
let class = pl.getattr(intern!(py, "Null"))?;
323
class.call0()
324
},
325
DataType::Extension(typ, storage) => {
326
let py_storage = Wrap((**storage).clone()).into_pyobject(py)?;
327
let py_typ = pl
328
.getattr(intern!(py, "get_extension_type"))?
329
.call1((typ.name(),))?;
330
let class = if py_typ.is_none()
331
|| py_typ.str().map(|s| s == "storage").ok() == Some(true)
332
{
333
pl.getattr(intern!(py, "Extension"))?
334
} else {
335
py_typ
336
};
337
let from_params = class.getattr(intern!(py, "ext_from_params"))?;
338
from_params.call1((typ.name(), py_storage, typ.serialize_metadata()))
339
},
340
DataType::Unknown(UnknownKind::Int(v)) => {
341
Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
342
},
343
DataType::Unknown(_) => {
344
let class = pl.getattr(intern!(py, "Unknown"))?;
345
class.call0()
346
},
347
DataType::BinaryOffset => {
348
unimplemented!()
349
},
350
}
351
}
352
}
353
354
impl<'py> FromPyObject<'py> for Wrap<Field> {
355
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
356
let py = ob.py();
357
let name = ob
358
.getattr(intern!(py, "name"))?
359
.str()?
360
.extract::<PyBackedStr>()?;
361
let dtype = ob
362
.getattr(intern!(py, "dtype"))?
363
.extract::<Wrap<DataType>>()?;
364
Ok(Wrap(Field::new((&*name).into(), dtype.0)))
365
}
366
}
367
368
impl<'py> FromPyObject<'py> for Wrap<DataType> {
369
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
370
let py = ob.py();
371
let type_name = ob.get_type().qualname()?.to_string();
372
373
let dtype = match &*type_name {
374
"DataTypeClass" => {
375
// just the class, not an object
376
let name = ob
377
.getattr(intern!(py, "__name__"))?
378
.str()?
379
.extract::<PyBackedStr>()?;
380
match &*name {
381
"Int8" => DataType::Int8,
382
"Int16" => DataType::Int16,
383
"Int32" => DataType::Int32,
384
"Int64" => DataType::Int64,
385
"Int128" => DataType::Int128,
386
"UInt8" => DataType::UInt8,
387
"UInt16" => DataType::UInt16,
388
"UInt32" => DataType::UInt32,
389
"UInt64" => DataType::UInt64,
390
"UInt128" => DataType::UInt128,
391
"Float16" => DataType::Float16,
392
"Float32" => DataType::Float32,
393
"Float64" => DataType::Float64,
394
"Boolean" => DataType::Boolean,
395
"String" => DataType::String,
396
"Binary" => DataType::Binary,
397
"Categorical" => DataType::from_categories(Categories::global()),
398
"Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
399
"Date" => DataType::Date,
400
"Time" => DataType::Time,
401
"Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
402
"Duration" => DataType::Duration(TimeUnit::Microseconds),
403
"List" => DataType::List(Box::new(DataType::Null)),
404
"Array" => DataType::Array(Box::new(DataType::Null), 0),
405
"Struct" => DataType::Struct(vec![]),
406
"Null" => DataType::Null,
407
#[cfg(feature = "object")]
408
"Object" => DataType::Object(OBJECT_NAME),
409
"Unknown" => DataType::Unknown(Default::default()),
410
"Decimal" => {
411
return Err(PyTypeError::new_err(
412
"Decimal without precision/scale set is not a valid Polars datatype",
413
));
414
},
415
dt => {
416
return Err(PyTypeError::new_err(format!(
417
"'{dt}' is not a Polars data type",
418
)));
419
},
420
}
421
},
422
"Int8" => DataType::Int8,
423
"Int16" => DataType::Int16,
424
"Int32" => DataType::Int32,
425
"Int64" => DataType::Int64,
426
"Int128" => DataType::Int128,
427
"UInt8" => DataType::UInt8,
428
"UInt16" => DataType::UInt16,
429
"UInt32" => DataType::UInt32,
430
"UInt64" => DataType::UInt64,
431
"UInt128" => DataType::UInt128,
432
"Float16" => DataType::Float16,
433
"Float32" => DataType::Float32,
434
"Float64" => DataType::Float64,
435
"Boolean" => DataType::Boolean,
436
"String" => DataType::String,
437
"Binary" => DataType::Binary,
438
"Categorical" => {
439
let categories = ob.getattr(intern!(py, "categories")).unwrap();
440
let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
441
let py_categories = py_categories.extract::<PyCategories>()?;
442
DataType::from_categories(py_categories.categories().clone())
443
},
444
"Enum" => {
445
let categories = ob.getattr(intern!(py, "categories")).unwrap();
446
let s = get_series(&categories.as_borrowed())?;
447
let ca = s.str().map_err(PyPolarsErr::from)?;
448
let categories = ca.downcast_iter().next().unwrap().clone();
449
assert!(!categories.has_nulls());
450
DataType::from_frozen_categories(
451
FrozenCategories::new(categories.values_iter()).unwrap(),
452
)
453
},
454
"Date" => DataType::Date,
455
"Time" => DataType::Time,
456
"Datetime" => {
457
let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
458
let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
459
let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
460
let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
461
DataType::Datetime(
462
time_unit,
463
TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
464
)
465
},
466
"Duration" => {
467
let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
468
let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
469
DataType::Duration(time_unit)
470
},
471
"Decimal" => {
472
let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
473
let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
474
dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?;
475
DataType::Decimal(precision, scale)
476
},
477
"List" => {
478
let inner = ob.getattr(intern!(py, "inner")).unwrap();
479
let inner = inner.extract::<Wrap<DataType>>()?;
480
DataType::List(Box::new(inner.0))
481
},
482
"Array" => {
483
let inner = ob.getattr(intern!(py, "inner")).unwrap();
484
let size = ob.getattr(intern!(py, "size")).unwrap();
485
let inner = inner.extract::<Wrap<DataType>>()?;
486
let size = size.extract::<usize>()?;
487
DataType::Array(Box::new(inner.0), size)
488
},
489
"Struct" => {
490
let fields = ob.getattr(intern!(py, "fields"))?;
491
let fields = fields
492
.extract::<Vec<Wrap<Field>>>()?
493
.into_iter()
494
.map(|f| f.0)
495
.collect::<Vec<Field>>();
496
DataType::Struct(fields)
497
},
498
"Null" => DataType::Null,
499
#[cfg(feature = "object")]
500
"Object" => DataType::Object(OBJECT_NAME),
501
"Unknown" => DataType::Unknown(Default::default()),
502
dt => {
503
let base_ext = polars(py)
504
.getattr(py, intern!(py, "BaseExtension"))
505
.unwrap();
506
if ob.is_instance(base_ext.bind(py))? {
507
let ext_name_f = ob.getattr(intern!(py, "ext_name"))?;
508
let ext_metadata_f = ob.getattr(intern!(py, "ext_metadata"))?;
509
let ext_storage_f = ob.getattr(intern!(py, "ext_storage"))?;
510
let name: String = ext_name_f.call0()?.extract()?;
511
let metadata: Option<String> = ext_metadata_f.call0()?.extract()?;
512
let storage: Wrap<DataType> = ext_storage_f.call0()?.extract()?;
513
let ext_typ =
514
get_extension_type_or_generic(&name, &storage.0, metadata.as_deref());
515
return Ok(Wrap(DataType::Extension(ext_typ, Box::new(storage.0))));
516
}
517
518
return Err(PyTypeError::new_err(format!(
519
"'{dt}' is not a Polars data type",
520
)));
521
},
522
};
523
Ok(Wrap(dtype))
524
}
525
}
526
527
impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
528
type Target = PyString;
529
type Output = Bound<'py, Self::Target>;
530
type Error = Infallible;
531
532
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
533
self.0.to_ascii().into_pyobject(py)
534
}
535
}
536
537
#[cfg(feature = "parquet")]
538
impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
539
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
540
let mut statistics = StatisticsOptions::empty();
541
542
let dict = ob.downcast::<PyDict>()?;
543
for (key, val) in dict {
544
let key = key.extract::<PyBackedStr>()?;
545
let val = val.extract::<bool>()?;
546
547
match key.as_ref() {
548
"min" => statistics.min_value = val,
549
"max" => statistics.max_value = val,
550
"distinct_count" => statistics.distinct_count = val,
551
"null_count" => statistics.null_count = val,
552
_ => {
553
return Err(PyTypeError::new_err(format!(
554
"'{key}' is not a valid statistic option",
555
)));
556
},
557
}
558
}
559
560
Ok(Wrap(statistics))
561
}
562
}
563
564
impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
565
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
566
let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
567
let vals = reinterpret_vec(vals);
568
Ok(Wrap(Row(vals)))
569
}
570
}
571
572
impl<'py> FromPyObject<'py> for Wrap<Schema> {
573
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
574
let dict = ob.downcast::<PyDict>()?;
575
576
Ok(Wrap(
577
dict.iter()
578
.map(|(key, val)| {
579
let key = key.extract::<PyBackedStr>()?;
580
let val = val.extract::<Wrap<DataType>>()?;
581
582
Ok(Field::new((&*key).into(), val.0))
583
})
584
.collect::<PyResult<Schema>>()?,
585
))
586
}
587
}
588
589
impl<'py> FromPyObject<'py> for Wrap<ArrowSchema> {
590
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
591
let py = ob.py();
592
593
let pyarrow_schema_cls = py
594
.import(intern!(py, "pyarrow"))?
595
.getattr(intern!(py, "Schema"))?;
596
597
if ob.is_none() {
598
return Err(PyValueError::new_err("arrow_schema() returned None").into());
599
}
600
601
let schema_cls = ob.getattr(intern!(py, "__class__"))?;
602
603
if !schema_cls.is(&pyarrow_schema_cls) {
604
return Err(PyTypeError::new_err(format!(
605
"expected pyarrow.Schema, got: {schema_cls}"
606
)));
607
}
608
609
let mut iter = ob.try_iter()?.map(|x| x.and_then(field_to_rust_arrow));
610
611
let mut last_err = None;
612
613
let schema =
614
ArrowSchema::from_iter_check_duplicates(std::iter::from_fn(|| match iter.next() {
615
Some(Ok(v)) => Some(v),
616
Some(Err(e)) => {
617
last_err = Some(e);
618
None
619
},
620
None => None,
621
}))
622
.map_err(to_py_err)?;
623
624
if let Some(last_err) = last_err {
625
return Err(last_err.into());
626
}
627
628
Ok(Wrap(schema))
629
}
630
}
631
632
impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
633
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
634
let list = ob.downcast::<PyList>()?.to_owned();
635
636
if list.is_empty() {
637
return Ok(Wrap(ScanSources::default()));
638
}
639
640
enum MutableSources {
641
Paths(Vec<PlPath>),
642
Files(Vec<File>),
643
Buffers(Vec<MemSlice>),
644
}
645
646
let num_items = list.len();
647
let mut iter = list
648
.into_iter()
649
.map(|val| get_python_scan_source_input(val.unbind(), false));
650
651
let Some(first) = iter.next() else {
652
return Ok(Wrap(ScanSources::default()));
653
};
654
655
let mut sources = match first? {
656
PythonScanSourceInput::Path(path) => {
657
let mut sources = Vec::with_capacity(num_items);
658
sources.push(path);
659
MutableSources::Paths(sources)
660
},
661
PythonScanSourceInput::File(file) => {
662
let mut sources = Vec::with_capacity(num_items);
663
sources.push(file.into());
664
MutableSources::Files(sources)
665
},
666
PythonScanSourceInput::Buffer(buffer) => {
667
let mut sources = Vec::with_capacity(num_items);
668
sources.push(buffer);
669
MutableSources::Buffers(sources)
670
},
671
};
672
673
for source in iter {
674
match (&mut sources, source?) {
675
(MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
676
(MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
677
(MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
678
_ => {
679
return Err(PyTypeError::new_err(
680
"Cannot combine in-memory bytes, paths and files for scan sources",
681
));
682
},
683
}
684
}
685
686
Ok(Wrap(match sources {
687
MutableSources::Paths(i) => ScanSources::Paths(i.into()),
688
MutableSources::Files(i) => ScanSources::Files(i.into()),
689
MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
690
}))
691
}
692
}
693
694
impl<'py> IntoPyObject<'py> for Wrap<Schema> {
695
type Target = PyDict;
696
type Output = Bound<'py, Self::Target>;
697
type Error = PyErr;
698
699
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
700
let dict = PyDict::new(py);
701
self.0
702
.iter()
703
.try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
704
Ok(dict)
705
}
706
}
707
708
#[derive(Debug)]
709
#[repr(transparent)]
710
pub struct ObjectValue {
711
pub inner: Py<PyAny>,
712
}
713
714
impl Clone for ObjectValue {
715
fn clone(&self) -> Self {
716
Python::attach(|py| Self {
717
inner: self.inner.clone_ref(py),
718
})
719
}
720
}
721
722
impl Hash for ObjectValue {
723
fn hash<H: Hasher>(&self, state: &mut H) {
724
let h = Python::attach(|py| self.inner.bind(py).hash().expect("should be hashable"));
725
state.write_isize(h)
726
}
727
}
728
729
impl Eq for ObjectValue {}
730
731
impl PartialEq for ObjectValue {
732
fn eq(&self, other: &Self) -> bool {
733
Python::attach(|py| {
734
match self
735
.inner
736
.bind(py)
737
.rich_compare(other.inner.bind(py), CompareOp::Eq)
738
{
739
Ok(result) => result.is_truthy().unwrap(),
740
Err(_) => false,
741
}
742
})
743
}
744
}
745
746
impl TotalEq for ObjectValue {
747
fn tot_eq(&self, other: &Self) -> bool {
748
self == other
749
}
750
}
751
752
impl TotalHash for ObjectValue {
753
fn tot_hash<H>(&self, state: &mut H)
754
where
755
H: Hasher,
756
{
757
self.hash(state);
758
}
759
}
760
761
impl Display for ObjectValue {
762
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
763
write!(f, "{}", self.inner)
764
}
765
}
766
767
#[cfg(feature = "object")]
768
impl PolarsObject for ObjectValue {
769
fn type_name() -> &'static str {
770
"object"
771
}
772
}
773
774
impl From<Py<PyAny>> for ObjectValue {
775
fn from(p: Py<PyAny>) -> Self {
776
Self { inner: p }
777
}
778
}
779
780
impl<'py> FromPyObject<'py> for ObjectValue {
781
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
782
Ok(ObjectValue {
783
inner: ob.to_owned().unbind(),
784
})
785
}
786
}
787
788
/// # Safety
789
///
790
/// The caller is responsible for checking that val is Object otherwise UB
791
#[cfg(feature = "object")]
792
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
793
fn from(val: &dyn PolarsObjectSafe) -> Self {
794
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
795
}
796
}
797
798
impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
799
type Target = PyAny;
800
type Output = Borrowed<'a, 'py, Self::Target>;
801
type Error = std::convert::Infallible;
802
803
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
804
Ok(self.inner.bind_borrowed(py))
805
}
806
}
807
808
impl Default for ObjectValue {
809
fn default() -> Self {
810
Python::attach(|py| ObjectValue { inner: py.None() })
811
}
812
}
813
814
impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
815
fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
816
let seq = obj.downcast::<PySequence>()?;
817
let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
818
for item in seq.try_iter()? {
819
v.push(item?.extract::<T>()?);
820
}
821
Ok(Wrap(v))
822
}
823
}
824
825
#[cfg(feature = "asof_join")]
826
impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
827
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
828
let parsed = match &*(ob.extract::<PyBackedStr>()?) {
829
"backward" => AsofStrategy::Backward,
830
"forward" => AsofStrategy::Forward,
831
"nearest" => AsofStrategy::Nearest,
832
v => {
833
return Err(PyValueError::new_err(format!(
834
"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
835
)));
836
},
837
};
838
Ok(Wrap(parsed))
839
}
840
}
841
842
impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
843
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
844
let parsed = match &*(ob.extract::<PyBackedStr>()?) {
845
"linear" => InterpolationMethod::Linear,
846
"nearest" => InterpolationMethod::Nearest,
847
v => {
848
return Err(PyValueError::new_err(format!(
849
"interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
850
)));
851
},
852
};
853
Ok(Wrap(parsed))
854
}
855
}
856
857
#[cfg(feature = "avro")]
858
impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
859
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
860
let parsed = match &*ob.extract::<PyBackedStr>()? {
861
"uncompressed" => None,
862
"snappy" => Some(AvroCompression::Snappy),
863
"deflate" => Some(AvroCompression::Deflate),
864
v => {
865
return Err(PyValueError::new_err(format!(
866
"avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
867
)));
868
},
869
};
870
Ok(Wrap(parsed))
871
}
872
}
873
874
impl<'py> FromPyObject<'py> for Wrap<StartBy> {
875
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
876
let parsed = match &*ob.extract::<PyBackedStr>()? {
877
"window" => StartBy::WindowBound,
878
"datapoint" => StartBy::DataPoint,
879
"monday" => StartBy::Monday,
880
"tuesday" => StartBy::Tuesday,
881
"wednesday" => StartBy::Wednesday,
882
"thursday" => StartBy::Thursday,
883
"friday" => StartBy::Friday,
884
"saturday" => StartBy::Saturday,
885
"sunday" => StartBy::Sunday,
886
v => {
887
return Err(PyValueError::new_err(format!(
888
"`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
889
)));
890
},
891
};
892
Ok(Wrap(parsed))
893
}
894
}
895
896
impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
897
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
898
let parsed = match &*ob.extract::<PyBackedStr>()? {
899
"left" => ClosedWindow::Left,
900
"right" => ClosedWindow::Right,
901
"both" => ClosedWindow::Both,
902
"none" => ClosedWindow::None,
903
v => {
904
return Err(PyValueError::new_err(format!(
905
"`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
906
)));
907
},
908
};
909
Ok(Wrap(parsed))
910
}
911
}
912
913
impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
914
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
915
let parsed = match &*ob.extract::<PyBackedStr>()? {
916
"half_to_even" => RoundMode::HalfToEven,
917
"half_away_from_zero" => RoundMode::HalfAwayFromZero,
918
v => {
919
return Err(PyValueError::new_err(format!(
920
"`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
921
)));
922
},
923
};
924
Ok(Wrap(parsed))
925
}
926
}
927
928
#[cfg(feature = "csv")]
929
impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
930
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
931
let parsed = match &*ob.extract::<PyBackedStr>()? {
932
"utf8" => CsvEncoding::Utf8,
933
"utf8-lossy" => CsvEncoding::LossyUtf8,
934
v => {
935
return Err(PyValueError::new_err(format!(
936
"csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
937
)));
938
},
939
};
940
Ok(Wrap(parsed))
941
}
942
}
943
944
#[cfg(feature = "ipc")]
945
impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
946
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
947
let parsed = match &*ob.extract::<PyBackedStr>()? {
948
"uncompressed" => None,
949
"lz4" => Some(IpcCompression::LZ4),
950
"zstd" => Some(IpcCompression::ZSTD(Default::default())),
951
v => {
952
return Err(PyValueError::new_err(format!(
953
"ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
954
)));
955
},
956
};
957
Ok(Wrap(parsed))
958
}
959
}
960
961
impl<'py> FromPyObject<'py> for Wrap<JoinType> {
962
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
963
let parsed = match &*ob.extract::<PyBackedStr>()? {
964
"inner" => JoinType::Inner,
965
"left" => JoinType::Left,
966
"right" => JoinType::Right,
967
"full" => JoinType::Full,
968
"semi" => JoinType::Semi,
969
"anti" => JoinType::Anti,
970
#[cfg(feature = "cross_join")]
971
"cross" => JoinType::Cross,
972
v => {
973
return Err(PyValueError::new_err(format!(
974
"`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
975
)));
976
},
977
};
978
Ok(Wrap(parsed))
979
}
980
}
981
982
impl<'py> FromPyObject<'py> for Wrap<Label> {
983
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
984
let parsed = match &*ob.extract::<PyBackedStr>()? {
985
"left" => Label::Left,
986
"right" => Label::Right,
987
"datapoint" => Label::DataPoint,
988
v => {
989
return Err(PyValueError::new_err(format!(
990
"`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
991
)));
992
},
993
};
994
Ok(Wrap(parsed))
995
}
996
}
997
998
impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
999
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1000
let parsed = match &*ob.extract::<PyBackedStr>()? {
1001
"first_non_null" => ListToStructWidthStrategy::FirstNonNull,
1002
"max_width" => ListToStructWidthStrategy::MaxWidth,
1003
v => {
1004
return Err(PyValueError::new_err(format!(
1005
"`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
1006
)));
1007
},
1008
};
1009
Ok(Wrap(parsed))
1010
}
1011
}
1012
1013
impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
1014
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1015
let parsed = match &*ob.extract::<PyBackedStr>()? {
1016
"null" => NonExistent::Null,
1017
"raise" => NonExistent::Raise,
1018
v => {
1019
return Err(PyValueError::new_err(format!(
1020
"`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1021
)));
1022
},
1023
};
1024
Ok(Wrap(parsed))
1025
}
1026
}
1027
1028
impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
1029
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1030
let parsed = match &*ob.extract::<PyBackedStr>()? {
1031
"drop" => NullBehavior::Drop,
1032
"ignore" => NullBehavior::Ignore,
1033
v => {
1034
return Err(PyValueError::new_err(format!(
1035
"`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1036
)));
1037
},
1038
};
1039
Ok(Wrap(parsed))
1040
}
1041
}
1042
1043
impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
1044
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1045
let parsed = match &*ob.extract::<PyBackedStr>()? {
1046
"ignore" => NullStrategy::Ignore,
1047
"propagate" => NullStrategy::Propagate,
1048
v => {
1049
return Err(PyValueError::new_err(format!(
1050
"`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1051
)));
1052
},
1053
};
1054
Ok(Wrap(parsed))
1055
}
1056
}
1057
1058
#[cfg(feature = "parquet")]
1059
impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
1060
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1061
let parsed = match &*ob.extract::<PyBackedStr>()? {
1062
"auto" => ParallelStrategy::Auto,
1063
"columns" => ParallelStrategy::Columns,
1064
"row_groups" => ParallelStrategy::RowGroups,
1065
"prefiltered" => ParallelStrategy::Prefiltered,
1066
"none" => ParallelStrategy::None,
1067
v => {
1068
return Err(PyValueError::new_err(format!(
1069
"`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1070
)));
1071
},
1072
};
1073
Ok(Wrap(parsed))
1074
}
1075
}
1076
1077
impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1078
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1079
let parsed = match &*ob.extract::<PyBackedStr>()? {
1080
"fortran" => IndexOrder::Fortran,
1081
"c" => IndexOrder::C,
1082
v => {
1083
return Err(PyValueError::new_err(format!(
1084
"`order` must be one of {{'fortran', 'c'}}, got {v}",
1085
)));
1086
},
1087
};
1088
Ok(Wrap(parsed))
1089
}
1090
}
1091
1092
impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1093
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1094
let parsed = match &*ob.extract::<PyBackedStr>()? {
1095
"lower" => QuantileMethod::Lower,
1096
"higher" => QuantileMethod::Higher,
1097
"nearest" => QuantileMethod::Nearest,
1098
"linear" => QuantileMethod::Linear,
1099
"midpoint" => QuantileMethod::Midpoint,
1100
"equiprobable" => QuantileMethod::Equiprobable,
1101
v => {
1102
return Err(PyValueError::new_err(format!(
1103
"`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1104
)));
1105
},
1106
};
1107
Ok(Wrap(parsed))
1108
}
1109
}
1110
1111
impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1112
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1113
let parsed = match &*ob.extract::<PyBackedStr>()? {
1114
"min" => RankMethod::Min,
1115
"max" => RankMethod::Max,
1116
"average" => RankMethod::Average,
1117
"dense" => RankMethod::Dense,
1118
"ordinal" => RankMethod::Ordinal,
1119
"random" => RankMethod::Random,
1120
v => {
1121
return Err(PyValueError::new_err(format!(
1122
"rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1123
)));
1124
},
1125
};
1126
Ok(Wrap(parsed))
1127
}
1128
}
1129
1130
impl<'py> FromPyObject<'py> for Wrap<RollingRankMethod> {
1131
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1132
let parsed = match &*ob.extract::<PyBackedStr>()? {
1133
"min" => RollingRankMethod::Min,
1134
"max" => RollingRankMethod::Max,
1135
"average" => RollingRankMethod::Average,
1136
"dense" => RollingRankMethod::Dense,
1137
"random" => RollingRankMethod::Random,
1138
v => {
1139
return Err(PyValueError::new_err(format!(
1140
"rank `method` must be one of {{'min', 'max', 'average', 'dense', 'random'}}, got {v}",
1141
)));
1142
},
1143
};
1144
Ok(Wrap(parsed))
1145
}
1146
}
1147
1148
impl<'py> FromPyObject<'py> for Wrap<Roll> {
1149
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1150
let parsed = match &*ob.extract::<PyBackedStr>()? {
1151
"raise" => Roll::Raise,
1152
"forward" => Roll::Forward,
1153
"backward" => Roll::Backward,
1154
v => {
1155
return Err(PyValueError::new_err(format!(
1156
"`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1157
)));
1158
},
1159
};
1160
Ok(Wrap(parsed))
1161
}
1162
}
1163
1164
impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1165
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1166
let parsed = match &*ob.extract::<PyBackedStr>()? {
1167
"ns" => TimeUnit::Nanoseconds,
1168
"us" => TimeUnit::Microseconds,
1169
"ms" => TimeUnit::Milliseconds,
1170
v => {
1171
return Err(PyValueError::new_err(format!(
1172
"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1173
)));
1174
},
1175
};
1176
Ok(Wrap(parsed))
1177
}
1178
}
1179
1180
impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1181
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1182
let parsed = match &*ob.extract::<PyBackedStr>()? {
1183
"first" => UniqueKeepStrategy::First,
1184
"last" => UniqueKeepStrategy::Last,
1185
"none" => UniqueKeepStrategy::None,
1186
"any" => UniqueKeepStrategy::Any,
1187
v => {
1188
return Err(PyValueError::new_err(format!(
1189
"`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1190
)));
1191
},
1192
};
1193
Ok(Wrap(parsed))
1194
}
1195
}
1196
1197
#[cfg(feature = "search_sorted")]
1198
impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1199
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1200
let parsed = match &*ob.extract::<PyBackedStr>()? {
1201
"any" => SearchSortedSide::Any,
1202
"left" => SearchSortedSide::Left,
1203
"right" => SearchSortedSide::Right,
1204
v => {
1205
return Err(PyValueError::new_err(format!(
1206
"sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1207
)));
1208
},
1209
};
1210
Ok(Wrap(parsed))
1211
}
1212
}
1213
1214
impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1215
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1216
let parsed = match &*ob.extract::<PyBackedStr>()? {
1217
"both" => ClosedInterval::Both,
1218
"left" => ClosedInterval::Left,
1219
"right" => ClosedInterval::Right,
1220
"none" => ClosedInterval::None,
1221
v => {
1222
return Err(PyValueError::new_err(format!(
1223
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1224
)));
1225
},
1226
};
1227
Ok(Wrap(parsed))
1228
}
1229
}
1230
1231
impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1232
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1233
let parsed = match &*ob.extract::<PyBackedStr>()? {
1234
"group_to_rows" => WindowMapping::GroupsToRows,
1235
"join" => WindowMapping::Join,
1236
"explode" => WindowMapping::Explode,
1237
v => {
1238
return Err(PyValueError::new_err(format!(
1239
"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1240
)));
1241
},
1242
};
1243
Ok(Wrap(parsed))
1244
}
1245
}
1246
1247
impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1248
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1249
let parsed = match &*ob.extract::<PyBackedStr>()? {
1250
"1:1" => JoinValidation::OneToOne,
1251
"1:m" => JoinValidation::OneToMany,
1252
"m:m" => JoinValidation::ManyToMany,
1253
"m:1" => JoinValidation::ManyToOne,
1254
v => {
1255
return Err(PyValueError::new_err(format!(
1256
"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1257
)));
1258
},
1259
};
1260
Ok(Wrap(parsed))
1261
}
1262
}
1263
1264
impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1265
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1266
let parsed = match &*ob.extract::<PyBackedStr>()? {
1267
"none" => MaintainOrderJoin::None,
1268
"left" => MaintainOrderJoin::Left,
1269
"right" => MaintainOrderJoin::Right,
1270
"left_right" => MaintainOrderJoin::LeftRight,
1271
"right_left" => MaintainOrderJoin::RightLeft,
1272
v => {
1273
return Err(PyValueError::new_err(format!(
1274
"`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1275
)));
1276
},
1277
};
1278
Ok(Wrap(parsed))
1279
}
1280
}
1281
1282
#[cfg(feature = "csv")]
1283
impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1284
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1285
let parsed = match &*ob.extract::<PyBackedStr>()? {
1286
"always" => QuoteStyle::Always,
1287
"necessary" => QuoteStyle::Necessary,
1288
"non_numeric" => QuoteStyle::NonNumeric,
1289
"never" => QuoteStyle::Never,
1290
v => {
1291
return Err(PyValueError::new_err(format!(
1292
"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1293
)));
1294
},
1295
};
1296
Ok(Wrap(parsed))
1297
}
1298
}
1299
1300
#[cfg(feature = "cloud")]
1301
pub(crate) fn parse_cloud_options(
1302
cloud_scheme: Option<CloudScheme>,
1303
keys_and_values: impl IntoIterator<Item = (String, String)>,
1304
) -> PyResult<CloudOptions> {
1305
let iter: &mut dyn Iterator<Item = _> = &mut keys_and_values.into_iter();
1306
let out = CloudOptions::from_untyped_config(cloud_scheme, iter).map_err(PyPolarsErr::from)?;
1307
Ok(out)
1308
}
1309
1310
#[cfg(feature = "list_sets")]
1311
impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1312
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1313
let parsed = match &*ob.extract::<PyBackedStr>()? {
1314
"union" => SetOperation::Union,
1315
"difference" => SetOperation::Difference,
1316
"intersection" => SetOperation::Intersection,
1317
"symmetric_difference" => SetOperation::SymmetricDifference,
1318
v => {
1319
return Err(PyValueError::new_err(format!(
1320
"set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1321
)));
1322
},
1323
};
1324
Ok(Wrap(parsed))
1325
}
1326
}
1327
1328
// Conversion from ScanCastOptions class from the Python side.
1329
impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1330
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1331
if ob.is_none() {
1332
// Initialize the default ScanCastOptions from Python.
1333
static DEFAULT: PyOnceLock<Wrap<CastColumnsPolicy>> = PyOnceLock::new();
1334
1335
let out = DEFAULT.get_or_try_init(ob.py(), || {
1336
let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1337
.unwrap()
1338
.getattr("ScanCastOptions")
1339
.unwrap()
1340
.call_method0("_default")
1341
.unwrap();
1342
1343
let out = Self::extract_bound(&ob)?;
1344
1345
// The default policy should match ERROR_ON_MISMATCH (but this can change).
1346
debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1347
1348
PyResult::Ok(out)
1349
})?;
1350
1351
return Ok(out.clone());
1352
}
1353
1354
let py = ob.py();
1355
1356
let integer_upcast = match &*ob
1357
.getattr(intern!(py, "integer_cast"))?
1358
.extract::<PyBackedStr>()?
1359
{
1360
"upcast" => true,
1361
"forbid" => false,
1362
v => {
1363
return Err(PyValueError::new_err(format!(
1364
"unknown option for integer_cast: {v}"
1365
)));
1366
},
1367
};
1368
1369
let mut float_upcast = false;
1370
let mut float_downcast = false;
1371
1372
let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1373
1374
parse_multiple_options("float_cast", float_cast_object, |v| {
1375
match v {
1376
"forbid" => {},
1377
"upcast" => float_upcast = true,
1378
"downcast" => float_downcast = true,
1379
v => {
1380
return Err(PyValueError::new_err(format!(
1381
"unknown option for float_cast: {v}"
1382
)));
1383
},
1384
}
1385
1386
Ok(())
1387
})?;
1388
1389
let mut datetime_nanoseconds_downcast = false;
1390
let mut datetime_convert_timezone = false;
1391
1392
let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1393
1394
parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1395
match v {
1396
"forbid" => {},
1397
"nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1398
"convert-timezone" => datetime_convert_timezone = true,
1399
v => {
1400
return Err(PyValueError::new_err(format!(
1401
"unknown option for datetime_cast: {v}"
1402
)));
1403
},
1404
};
1405
1406
Ok(())
1407
})?;
1408
1409
let missing_struct_fields = match &*ob
1410
.getattr(intern!(py, "missing_struct_fields"))?
1411
.extract::<PyBackedStr>()?
1412
{
1413
"insert" => MissingColumnsPolicy::Insert,
1414
"raise" => MissingColumnsPolicy::Raise,
1415
v => {
1416
return Err(PyValueError::new_err(format!(
1417
"unknown option for missing_struct_fields: {v}"
1418
)));
1419
},
1420
};
1421
1422
let extra_struct_fields = match &*ob
1423
.getattr(intern!(py, "extra_struct_fields"))?
1424
.extract::<PyBackedStr>()?
1425
{
1426
"ignore" => ExtraColumnsPolicy::Ignore,
1427
"raise" => ExtraColumnsPolicy::Raise,
1428
v => {
1429
return Err(PyValueError::new_err(format!(
1430
"unknown option for extra_struct_fields: {v}"
1431
)));
1432
},
1433
};
1434
1435
let categorical_to_string = match &*ob
1436
.getattr(intern!(py, "categorical_to_string"))?
1437
.extract::<PyBackedStr>()?
1438
{
1439
"allow" => true,
1440
"forbid" => false,
1441
v => {
1442
return Err(PyValueError::new_err(format!(
1443
"unknown option for categorical_to_string: {v}"
1444
)));
1445
},
1446
};
1447
1448
return Ok(Wrap(CastColumnsPolicy {
1449
integer_upcast,
1450
float_upcast,
1451
float_downcast,
1452
datetime_nanoseconds_downcast,
1453
datetime_microseconds_downcast: false,
1454
datetime_convert_timezone,
1455
null_upcast: true,
1456
categorical_to_string,
1457
missing_struct_fields,
1458
extra_struct_fields,
1459
}));
1460
1461
fn parse_multiple_options(
1462
parameter_name: &'static str,
1463
py_object: Bound<'_, PyAny>,
1464
mut parser_func: impl FnMut(&str) -> PyResult<()>,
1465
) -> PyResult<()> {
1466
if let Ok(v) = py_object.extract::<PyBackedStr>() {
1467
parser_func(&v)?;
1468
} else if let Ok(v) = py_object.try_iter() {
1469
for v in v {
1470
parser_func(&v?.extract::<PyBackedStr>()?)?;
1471
}
1472
} else {
1473
return Err(PyValueError::new_err(format!(
1474
"unknown type for {parameter_name}: {py_object}"
1475
)));
1476
}
1477
1478
Ok(())
1479
}
1480
}
1481
}
1482
1483
pub(crate) fn parse_fill_null_strategy(
1484
strategy: &str,
1485
limit: FillNullLimit,
1486
) -> PyResult<FillNullStrategy> {
1487
let parsed = match strategy {
1488
"forward" => FillNullStrategy::Forward(limit),
1489
"backward" => FillNullStrategy::Backward(limit),
1490
"min" => FillNullStrategy::Min,
1491
"max" => FillNullStrategy::Max,
1492
"mean" => FillNullStrategy::Mean,
1493
"zero" => FillNullStrategy::Zero,
1494
"one" => FillNullStrategy::One,
1495
e => {
1496
return Err(PyValueError::new_err(format!(
1497
"`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1498
)));
1499
},
1500
};
1501
Ok(parsed)
1502
}
1503
1504
#[cfg(feature = "parquet")]
1505
pub(crate) fn parse_parquet_compression(
1506
compression: &str,
1507
compression_level: Option<i32>,
1508
) -> PyResult<ParquetCompression> {
1509
let parsed = match compression {
1510
"uncompressed" => ParquetCompression::Uncompressed,
1511
"snappy" => ParquetCompression::Snappy,
1512
"gzip" => ParquetCompression::Gzip(
1513
compression_level
1514
.map(|lvl| {
1515
GzipLevel::try_new(lvl as u8)
1516
.map_err(|e| PyValueError::new_err(format!("{e:?}")))
1517
})
1518
.transpose()?,
1519
),
1520
"brotli" => ParquetCompression::Brotli(
1521
compression_level
1522
.map(|lvl| {
1523
BrotliLevel::try_new(lvl as u32)
1524
.map_err(|e| PyValueError::new_err(format!("{e:?}")))
1525
})
1526
.transpose()?,
1527
),
1528
"lz4" => ParquetCompression::Lz4Raw,
1529
"zstd" => ParquetCompression::Zstd(
1530
compression_level
1531
.map(|lvl| {
1532
ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1533
})
1534
.transpose()?,
1535
),
1536
e => {
1537
return Err(PyValueError::new_err(format!(
1538
"parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'brotli', 'lz4', 'zstd'}}, got {e}",
1539
)));
1540
},
1541
};
1542
Ok(parsed)
1543
}
1544
1545
pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1546
where
1547
I: IntoIterator<Item = S>,
1548
S: AsRef<str>,
1549
{
1550
container
1551
.into_iter()
1552
.map(|s| PlSmallStr::from_str(s.as_ref()))
1553
.collect()
1554
}
1555
1556
#[derive(Debug, Copy, Clone)]
1557
pub struct PyCompatLevel(pub CompatLevel);
1558
1559
impl<'py> FromPyObject<'py> for PyCompatLevel {
1560
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1561
Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1562
if let Ok(compat_level) = CompatLevel::with_level(level) {
1563
compat_level
1564
} else {
1565
return Err(PyValueError::new_err("invalid compat level"));
1566
}
1567
} else if let Ok(future) = ob.extract::<bool>() {
1568
if future {
1569
CompatLevel::newest()
1570
} else {
1571
CompatLevel::oldest()
1572
}
1573
} else {
1574
return Err(PyTypeError::new_err(
1575
"'compat_level' argument accepts int or bool",
1576
));
1577
}))
1578
}
1579
}
1580
1581
#[cfg(feature = "string_normalize")]
1582
impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1583
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1584
let parsed = match &*ob.extract::<PyBackedStr>()? {
1585
"NFC" => UnicodeForm::NFC,
1586
"NFKC" => UnicodeForm::NFKC,
1587
"NFD" => UnicodeForm::NFD,
1588
"NFKD" => UnicodeForm::NFKD,
1589
v => {
1590
return Err(PyValueError::new_err(format!(
1591
"`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1592
)));
1593
},
1594
};
1595
Ok(Wrap(parsed))
1596
}
1597
}
1598
1599
#[cfg(feature = "parquet")]
1600
impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1601
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1602
#[derive(FromPyObject)]
1603
enum Metadata {
1604
Static(Vec<(String, String)>),
1605
Dynamic(Py<PyAny>),
1606
}
1607
1608
let metadata = Option::<Metadata>::extract_bound(ob)?;
1609
let key_value_metadata = metadata.map(|x| match x {
1610
Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1611
Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1612
});
1613
Ok(Wrap(key_value_metadata))
1614
}
1615
}
1616
1617
impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1618
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1619
let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1620
1621
let tz = tz.map(|x| x.0);
1622
1623
Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1624
}
1625
}
1626
1627
impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1628
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1629
let parsed = match &*ob.extract::<PyBackedStr>()? {
1630
"upcast" => UpcastOrForbid::Upcast,
1631
"forbid" => UpcastOrForbid::Forbid,
1632
v => {
1633
return Err(PyValueError::new_err(format!(
1634
"cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1635
)));
1636
},
1637
};
1638
Ok(Wrap(parsed))
1639
}
1640
}
1641
1642
impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1643
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1644
let parsed = match &*ob.extract::<PyBackedStr>()? {
1645
"ignore" => ExtraColumnsPolicy::Ignore,
1646
"raise" => ExtraColumnsPolicy::Raise,
1647
v => {
1648
return Err(PyValueError::new_err(format!(
1649
"extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1650
)));
1651
},
1652
};
1653
Ok(Wrap(parsed))
1654
}
1655
}
1656
1657
impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1658
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1659
let parsed = match &*ob.extract::<PyBackedStr>()? {
1660
"insert" => MissingColumnsPolicy::Insert,
1661
"raise" => MissingColumnsPolicy::Raise,
1662
v => {
1663
return Err(PyValueError::new_err(format!(
1664
"missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1665
)));
1666
},
1667
};
1668
Ok(Wrap(parsed))
1669
}
1670
}
1671
1672
impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1673
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1674
if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1675
return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1676
}
1677
1678
let parsed = match &*ob.extract::<PyBackedStr>()? {
1679
"insert" => MissingColumnsPolicyOrExpr::Insert,
1680
"raise" => MissingColumnsPolicyOrExpr::Raise,
1681
v => {
1682
return Err(PyValueError::new_err(format!(
1683
"missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1684
)));
1685
},
1686
};
1687
Ok(Wrap(parsed))
1688
}
1689
}
1690
1691
impl<'py> FromPyObject<'py> for Wrap<ColumnMapping> {
1692
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1693
let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1694
1695
Ok(Wrap(match &*column_mapping_type {
1696
"iceberg-column-mapping" => {
1697
let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1698
ColumnMapping::Iceberg(Arc::new(
1699
IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1700
))
1701
},
1702
1703
v => {
1704
return Err(PyValueError::new_err(format!(
1705
"unknown column mapping type: {v}"
1706
)));
1707
},
1708
}))
1709
}
1710
}
1711
1712
impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1713
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1714
let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1715
1716
Ok(Wrap(match &*deletion_file_type {
1717
"iceberg-position-delete" => {
1718
let dict: Bound<'_, PyDict> = ob.extract()?;
1719
1720
let mut out = PlIndexMap::new();
1721
1722
for (k, v) in dict
1723
.try_iter()?
1724
.zip(dict.call_method0("values")?.try_iter()?)
1725
{
1726
let k: usize = k?.extract()?;
1727
let v: Bound<'_, PyAny> = v?.extract()?;
1728
1729
let files = v
1730
.try_iter()?
1731
.map(|x| {
1732
x.and_then(|x| {
1733
let x: String = x.extract()?;
1734
Ok(x)
1735
})
1736
})
1737
.collect::<PyResult<Arc<[String]>>>()?;
1738
1739
if !files.is_empty() {
1740
out.insert(k, files);
1741
}
1742
}
1743
1744
DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1745
},
1746
1747
v => {
1748
return Err(PyValueError::new_err(format!(
1749
"unknown deletion file type: {v}"
1750
)));
1751
},
1752
}))
1753
}
1754
}
1755
1756
impl<'py> FromPyObject<'py> for Wrap<DefaultFieldValues> {
1757
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1758
let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1759
1760
Ok(Wrap(match &*default_values_type {
1761
"iceberg" => {
1762
let dict: Bound<'_, PyDict> = ob.extract()?;
1763
1764
let mut out = PlIndexMap::new();
1765
1766
for (k, v) in dict
1767
.try_iter()?
1768
.zip(dict.call_method0("values")?.try_iter()?)
1769
{
1770
let k: u32 = k?.extract()?;
1771
let v = v?;
1772
1773
let v: Result<Column, String> = if let Ok(s) = get_series(&v) {
1774
Ok(s.into_column())
1775
} else {
1776
let err_msg: String = v.extract()?;
1777
Err(err_msg)
1778
};
1779
1780
out.insert(k, v);
1781
}
1782
1783
DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1784
out,
1785
)))
1786
},
1787
1788
v => {
1789
return Err(PyValueError::new_err(format!(
1790
"unknown deletion file type: {v}"
1791
)));
1792
},
1793
}))
1794
}
1795
}
1796
1797
impl<'py> FromPyObject<'py> for Wrap<PlPath> {
1798
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1799
if let Ok(path) = ob.extract::<PyBackedStr>() {
1800
Ok(Wrap(PlPath::new(&path)))
1801
} else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1802
Ok(Wrap(PlPath::Local(path.into())))
1803
} else {
1804
Err(
1805
PyTypeError::new_err(format!("PlPath cannot be formed from '{}'", ob.get_type()))
1806
.into(),
1807
)
1808
}
1809
}
1810
}
1811
1812
impl<'py> IntoPyObject<'py> for Wrap<PlPath> {
1813
type Target = PyString;
1814
type Output = Bound<'py, Self::Target>;
1815
type Error = Infallible;
1816
1817
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1818
self.0.to_str().into_pyobject(py)
1819
}
1820
}
1821
1822