Path: blob/main/crates/polars-plan/src/dsl/file_scan/python_dataset.rs
8407 views
use std::fmt::Debug;1use std::sync::OnceLock;23use polars_core::error::PolarsResult;4use polars_core::schema::SchemaRef;5use polars_utils::pl_str::PlSmallStr;6use polars_utils::python_function::PythonObject;78use crate::dsl::DslPlan;910/// This is for `polars-python` to inject so that the implementation can be done there:11/// * The impls for converting from Python objects are there.12pub static DATASET_PROVIDER_VTABLE: OnceLock<PythonDatasetProviderVTable> = OnceLock::new();1314pub struct PythonDatasetProviderVTable {15pub name: fn(dataset_object: &PythonObject) -> PlSmallStr,1617pub schema: fn(dataset_object: &PythonObject) -> PolarsResult<SchemaRef>,1819#[expect(clippy::type_complexity)]20pub to_dataset_scan: fn(21dataset_object: &PythonObject,22existing_resolved_version_key: Option<&str>,23limit: Option<usize>,24projection: Option<&[PlSmallStr]>,25filter_columns: Option<&[PlSmallStr]>,26pyarrow_predicate: Option<&str>,27) -> PolarsResult<Option<(DslPlan, PlSmallStr)>>,28}2930pub fn dataset_provider_vtable() -> Result<&'static PythonDatasetProviderVTable, &'static str> {31DATASET_PROVIDER_VTABLE32.get()33.ok_or("DATASET_PROVIDER_VTABLE not initialized")34}3536/// Currently intended only for Iceberg support37#[derive(Debug)]38#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]39#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]40pub struct PythonDatasetProvider {41dataset_object: PythonObject,42}4344impl PythonDatasetProvider {45pub fn new(dataset_object: PythonObject) -> Self {46Self { dataset_object }47}4849pub fn name(&self) -> PlSmallStr {50(dataset_provider_vtable().unwrap().name)(&self.dataset_object)51}5253pub fn schema(&self) -> PolarsResult<SchemaRef> {54(dataset_provider_vtable().unwrap().schema)(&self.dataset_object)55}5657pub fn to_dataset_scan(58&self,59existing_resolved_version_key: Option<&str>,60limit: Option<usize>,61projection: Option<&[PlSmallStr]>,62filter_columns: Option<&[PlSmallStr]>,63pyarrow_predicate: Option<&str>,64) -> PolarsResult<Option<(DslPlan, PlSmallStr)>> {65(dataset_provider_vtable().unwrap().to_dataset_scan)(66&self.dataset_object,67existing_resolved_version_key,68limit,69projection,70filter_columns,71pyarrow_predicate,72)73}74}757677