Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/dsl/file_scan/python_dataset.rs
6940 views
1
use std::fmt::Debug;
2
use std::sync::OnceLock;
3
4
use polars_core::error::PolarsResult;
5
use polars_core::schema::SchemaRef;
6
use polars_utils::pl_str::PlSmallStr;
7
use polars_utils::python_function::PythonObject;
8
9
use crate::dsl::DslPlan;
10
11
/// This is for `polars-python` to inject so that the implementation can be done there:
12
/// * The impls for converting from Python objects are there.
13
pub static DATASET_PROVIDER_VTABLE: OnceLock<PythonDatasetProviderVTable> = OnceLock::new();
14
15
pub struct PythonDatasetProviderVTable {
16
pub name: fn(dataset_object: &PythonObject) -> PlSmallStr,
17
18
pub schema: fn(dataset_object: &PythonObject) -> PolarsResult<SchemaRef>,
19
20
#[expect(clippy::type_complexity)]
21
pub to_dataset_scan: fn(
22
dataset_object: &PythonObject,
23
existing_resolved_version_key: Option<&str>,
24
limit: Option<usize>,
25
projection: Option<&[PlSmallStr]>,
26
) -> PolarsResult<Option<(DslPlan, PlSmallStr)>>,
27
}
28
29
pub fn dataset_provider_vtable() -> Result<&'static PythonDatasetProviderVTable, &'static str> {
30
DATASET_PROVIDER_VTABLE
31
.get()
32
.ok_or("DATASET_PROVIDER_VTABLE not initialized")
33
}
34
35
/// Currently intended only for Iceberg support
36
#[derive(Debug)]
37
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
38
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
39
pub struct PythonDatasetProvider {
40
dataset_object: PythonObject,
41
}
42
43
impl PythonDatasetProvider {
44
pub fn new(dataset_object: PythonObject) -> Self {
45
Self { dataset_object }
46
}
47
48
pub fn name(&self) -> PlSmallStr {
49
(dataset_provider_vtable().unwrap().name)(&self.dataset_object)
50
}
51
52
pub fn schema(&self) -> PolarsResult<SchemaRef> {
53
(dataset_provider_vtable().unwrap().schema)(&self.dataset_object)
54
}
55
56
pub fn to_dataset_scan(
57
&self,
58
existing_resolved_version_key: Option<&str>,
59
limit: Option<usize>,
60
projection: Option<&[PlSmallStr]>,
61
) -> PolarsResult<Option<(DslPlan, PlSmallStr)>> {
62
(dataset_provider_vtable().unwrap().to_dataset_scan)(
63
&self.dataset_object,
64
existing_resolved_version_key,
65
limit,
66
projection,
67
)
68
}
69
}
70
71