Path: blob/main/crates/polars-python/src/io/scan_options.rs
7889 views
use std::sync::Arc;12use polars::prelude::default_values::DefaultFieldValues;3use polars::prelude::deletion::DeletionFilesList;4use polars::prelude::{5CastColumnsPolicy, CloudScheme, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy,6PlSmallStr, Schema, TableStatistics, UnifiedScanArgs,7};8use polars_io::{HiveOptions, RowIndex};9use polars_utils::IdxSize;10use polars_utils::slice_enum::Slice;11use pyo3::pybacked::PyBackedStr;12use pyo3::types::PyAnyMethods;13use pyo3::{Bound, FromPyObject, Py, PyAny, PyResult, intern};1415use crate::PyDataFrame;16use crate::functions::parse_cloud_options;17use crate::prelude::Wrap;1819/// Interface to `class ScanOptions` on the Python side20pub struct PyScanOptions<'py>(Bound<'py, pyo3::PyAny>);2122impl<'py> FromPyObject<'py> for PyScanOptions<'py> {23fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {24Ok(Self(ob.clone()))25}26}2728impl<'py> FromPyObject<'py> for Wrap<TableStatistics> {29fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {30let py = ob.py();31Ok(Wrap(TableStatistics(Arc::new(32PyDataFrame::extract_bound(&ob.getattr(intern!(py, "_df"))?)?33.df34.into_inner(),35))))36}37}3839impl PyScanOptions<'_> {40pub fn extract_unified_scan_args(41&self,42cloud_scheme: Option<CloudScheme>,43) -> PyResult<UnifiedScanArgs> {44#[derive(FromPyObject)]45struct Extract {46row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,47pre_slice: Option<(i64, usize)>,48cast_options: Wrap<CastColumnsPolicy>,49extra_columns: Wrap<ExtraColumnsPolicy>,50missing_columns: Wrap<MissingColumnsPolicy>,51include_file_paths: Option<Wrap<PlSmallStr>>,52glob: bool,53hidden_file_prefix: Option<Vec<PyBackedStr>>,54column_mapping: Option<Wrap<ColumnMapping>>,55default_values: Option<Wrap<DefaultFieldValues>>,56hive_partitioning: Option<bool>,57hive_schema: Option<Wrap<Schema>>,58try_parse_hive_dates: bool,59rechunk: bool,60cache: bool,61storage_options: Option<Vec<(String, String)>>,62credential_provider: Option<Py<PyAny>>,63retries: usize,64deletion_files: Option<Wrap<DeletionFilesList>>,65table_statistics: Option<Wrap<TableStatistics>>,66row_count: Option<(u64, u64)>,67}6869let Extract {70row_index,71pre_slice,72cast_options,73extra_columns,74missing_columns,75include_file_paths,76column_mapping,77default_values,78glob,79hidden_file_prefix,80hive_partitioning,81hive_schema,82try_parse_hive_dates,83rechunk,84cache,85storage_options,86credential_provider,87retries,88deletion_files,89table_statistics,90row_count,91} = self.0.extract()?;9293let cloud_options =94parse_cloud_options(cloud_scheme, storage_options, credential_provider, retries)?;9596let hive_schema = hive_schema.map(|s| Arc::new(s.0));9798let row_index = row_index.map(|(name, offset)| RowIndex {99name: name.0,100offset,101});102103let hive_options = HiveOptions {104enabled: hive_partitioning,105hive_start_idx: 0,106schema: hive_schema,107try_parse_dates: try_parse_hive_dates,108};109110let unified_scan_args = UnifiedScanArgs {111// Schema is currently still stored inside the options per scan type, but we do eventually112// want to put it here instead.113schema: None,114cloud_options,115hive_options,116rechunk,117cache,118glob,119hidden_file_prefix: hidden_file_prefix120.map(|x| x.into_iter().map(|x| (*x).into()).collect()),121projection: None,122column_mapping: column_mapping.map(|x| x.0),123default_values: default_values124.map(|x| x.0)125.filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),126row_index,127pre_slice: pre_slice.map(Slice::from),128cast_columns_policy: cast_options.0,129missing_columns_policy: missing_columns.0,130extra_columns_policy: extra_columns.0,131include_file_paths: include_file_paths.map(|x| x.0),132deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),133table_statistics: table_statistics.map(|x| x.0),134row_count,135};136137Ok(unified_scan_args)138}139}140141142