Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/dsl/file_scan/python_dataset.rs
8407 views
1
use std::fmt::Debug;
2
use std::sync::OnceLock;
3
4
use polars_core::error::PolarsResult;
5
use polars_core::schema::SchemaRef;
6
use polars_utils::pl_str::PlSmallStr;
7
use polars_utils::python_function::PythonObject;
8
9
use crate::dsl::DslPlan;
10
11
/// This is for `polars-python` to inject so that the implementation can be done there:
12
/// * The impls for converting from Python objects are there.
13
pub static DATASET_PROVIDER_VTABLE: OnceLock<PythonDatasetProviderVTable> = OnceLock::new();
14
15
pub struct PythonDatasetProviderVTable {
16
pub name: fn(dataset_object: &PythonObject) -> PlSmallStr,
17
18
pub schema: fn(dataset_object: &PythonObject) -> PolarsResult<SchemaRef>,
19
20
#[expect(clippy::type_complexity)]
21
pub to_dataset_scan: fn(
22
dataset_object: &PythonObject,
23
existing_resolved_version_key: Option<&str>,
24
limit: Option<usize>,
25
projection: Option<&[PlSmallStr]>,
26
filter_columns: Option<&[PlSmallStr]>,
27
pyarrow_predicate: Option<&str>,
28
) -> PolarsResult<Option<(DslPlan, PlSmallStr)>>,
29
}
30
31
pub fn dataset_provider_vtable() -> Result<&'static PythonDatasetProviderVTable, &'static str> {
32
DATASET_PROVIDER_VTABLE
33
.get()
34
.ok_or("DATASET_PROVIDER_VTABLE not initialized")
35
}
36
37
/// Currently intended only for Iceberg support
38
#[derive(Debug)]
39
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
40
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
41
pub struct PythonDatasetProvider {
42
dataset_object: PythonObject,
43
}
44
45
impl PythonDatasetProvider {
46
pub fn new(dataset_object: PythonObject) -> Self {
47
Self { dataset_object }
48
}
49
50
pub fn name(&self) -> PlSmallStr {
51
(dataset_provider_vtable().unwrap().name)(&self.dataset_object)
52
}
53
54
pub fn schema(&self) -> PolarsResult<SchemaRef> {
55
(dataset_provider_vtable().unwrap().schema)(&self.dataset_object)
56
}
57
58
pub fn to_dataset_scan(
59
&self,
60
existing_resolved_version_key: Option<&str>,
61
limit: Option<usize>,
62
projection: Option<&[PlSmallStr]>,
63
filter_columns: Option<&[PlSmallStr]>,
64
pyarrow_predicate: Option<&str>,
65
) -> PolarsResult<Option<(DslPlan, PlSmallStr)>> {
66
(dataset_provider_vtable().unwrap().to_dataset_scan)(
67
&self.dataset_object,
68
existing_resolved_version_key,
69
limit,
70
projection,
71
filter_columns,
72
pyarrow_predicate,
73
)
74
}
75
}
76
77