Path: blob/main/crates/polars-plan/src/dsl/file_scan/python_dataset.rs
6940 views
use std::fmt::Debug;1use std::sync::OnceLock;23use polars_core::error::PolarsResult;4use polars_core::schema::SchemaRef;5use polars_utils::pl_str::PlSmallStr;6use polars_utils::python_function::PythonObject;78use crate::dsl::DslPlan;910/// This is for `polars-python` to inject so that the implementation can be done there:11/// * The impls for converting from Python objects are there.12pub static DATASET_PROVIDER_VTABLE: OnceLock<PythonDatasetProviderVTable> = OnceLock::new();1314pub struct PythonDatasetProviderVTable {15pub name: fn(dataset_object: &PythonObject) -> PlSmallStr,1617pub schema: fn(dataset_object: &PythonObject) -> PolarsResult<SchemaRef>,1819#[expect(clippy::type_complexity)]20pub to_dataset_scan: fn(21dataset_object: &PythonObject,22existing_resolved_version_key: Option<&str>,23limit: Option<usize>,24projection: Option<&[PlSmallStr]>,25) -> PolarsResult<Option<(DslPlan, PlSmallStr)>>,26}2728pub fn dataset_provider_vtable() -> Result<&'static PythonDatasetProviderVTable, &'static str> {29DATASET_PROVIDER_VTABLE30.get()31.ok_or("DATASET_PROVIDER_VTABLE not initialized")32}3334/// Currently intended only for Iceberg support35#[derive(Debug)]36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]37#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]38pub struct PythonDatasetProvider {39dataset_object: PythonObject,40}4142impl PythonDatasetProvider {43pub fn new(dataset_object: PythonObject) -> Self {44Self { dataset_object }45}4647pub fn name(&self) -> PlSmallStr {48(dataset_provider_vtable().unwrap().name)(&self.dataset_object)49}5051pub fn schema(&self) -> PolarsResult<SchemaRef> {52(dataset_provider_vtable().unwrap().schema)(&self.dataset_object)53}5455pub fn to_dataset_scan(56&self,57existing_resolved_version_key: Option<&str>,58limit: Option<usize>,59projection: Option<&[PlSmallStr]>,60) -> PolarsResult<Option<(DslPlan, PlSmallStr)>> {61(dataset_provider_vtable().unwrap().to_dataset_scan)(62&self.dataset_object,63existing_resolved_version_key,64limit,65projection,66)67}68}697071