Path: blob/main/crates/polars-python/src/io/scan_options.rs
8383 views
use std::sync::Arc;12use polars::prelude::default_values::DefaultFieldValues;3use polars::prelude::deletion::DeletionFilesList;4use polars::prelude::{5CastColumnsPolicy, CloudScheme, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy,6PlSmallStr, Schema, TableStatistics, UnifiedScanArgs,7};8use polars_io::{HiveOptions, RowIndex};9use polars_utils::IdxSize;10use polars_utils::slice_enum::Slice;11use pyo3::intern;12use pyo3::prelude::*;13use pyo3::pybacked::PyBackedStr;1415use crate::PyDataFrame;16use crate::io::cloud_options::OptPyCloudOptions;17use crate::prelude::Wrap;1819/// Interface to `class ScanOptions` on the Python side20pub struct PyScanOptions<'py>(Bound<'py, PyAny>);2122impl<'a, 'py> FromPyObject<'a, 'py> for PyScanOptions<'py> {23type Error = PyErr;2425fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {26Ok(Self(ob.to_owned()))27}28}2930impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<TableStatistics> {31type Error = PyErr;3233fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {34let py = ob.py();35let attr = ob.getattr(intern!(py, "_df"))?;36Ok(Wrap(TableStatistics(Arc::new(37PyDataFrame::extract(attr.as_borrowed())?.df.into_inner(),38))))39}40}4142impl PyScanOptions<'_> {43pub fn extract_unified_scan_args(44&self,45cloud_scheme: Option<CloudScheme>,46) -> PyResult<UnifiedScanArgs> {47#[derive(FromPyObject)]48struct Extract<'a> {49row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,50pre_slice: Option<(i64, usize)>,51cast_options: Wrap<CastColumnsPolicy>,52extra_columns: Wrap<ExtraColumnsPolicy>,53missing_columns: Wrap<MissingColumnsPolicy>,54include_file_paths: Option<Wrap<PlSmallStr>>,55glob: bool,56hidden_file_prefix: Option<Vec<PyBackedStr>>,57column_mapping: Option<Wrap<ColumnMapping>>,58default_values: Option<Wrap<DefaultFieldValues>>,59hive_partitioning: Option<bool>,60hive_schema: Option<Wrap<Schema>>,61try_parse_hive_dates: bool,62rechunk: bool,63cache: bool,64storage_options: OptPyCloudOptions<'a>,65credential_provider: Option<Py<PyAny>>,66deletion_files: Option<Wrap<DeletionFilesList>>,67table_statistics: Option<Wrap<TableStatistics>>,68row_count: Option<(u64, u64)>,69}7071let Extract {72row_index,73pre_slice,74cast_options,75extra_columns,76missing_columns,77include_file_paths,78column_mapping,79default_values,80glob,81hidden_file_prefix,82hive_partitioning,83hive_schema,84try_parse_hive_dates,85rechunk,86cache,87storage_options,88credential_provider,89deletion_files,90table_statistics,91row_count,92} = self.0.extract()?;9394let cloud_options =95storage_options.extract_opt_cloud_options(cloud_scheme, credential_provider)?;9697let hive_schema = hive_schema.map(|s| Arc::new(s.0));9899let row_index = row_index.map(|(name, offset)| RowIndex {100name: name.0,101offset,102});103104let hive_options = HiveOptions {105enabled: hive_partitioning,106hive_start_idx: 0,107schema: hive_schema,108try_parse_dates: try_parse_hive_dates,109};110111let unified_scan_args = UnifiedScanArgs {112// Schema is currently still stored inside the options per scan type, but we do eventually113// want to put it here instead.114schema: None,115cloud_options,116hive_options,117rechunk,118cache,119glob,120hidden_file_prefix: hidden_file_prefix121.map(|x| x.into_iter().map(|x| (*x).into()).collect()),122projection: None,123column_mapping: column_mapping.map(|x| x.0),124default_values: default_values125.map(|x| x.0)126.filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),127row_index,128pre_slice: pre_slice.map(Slice::from),129cast_columns_policy: cast_options.0,130missing_columns_policy: missing_columns.0,131extra_columns_policy: extra_columns.0,132include_file_paths: include_file_paths.map(|x| x.0),133deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),134table_statistics: table_statistics.map(|x| x.0),135row_count,136};137138Ok(unified_scan_args)139}140}141142143