Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-python/src/io/scan_options.rs
7889 views
1
use std::sync::Arc;
2
3
use polars::prelude::default_values::DefaultFieldValues;
4
use polars::prelude::deletion::DeletionFilesList;
5
use polars::prelude::{
6
CastColumnsPolicy, CloudScheme, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy,
7
PlSmallStr, Schema, TableStatistics, UnifiedScanArgs,
8
};
9
use polars_io::{HiveOptions, RowIndex};
10
use polars_utils::IdxSize;
11
use polars_utils::slice_enum::Slice;
12
use pyo3::pybacked::PyBackedStr;
13
use pyo3::types::PyAnyMethods;
14
use pyo3::{Bound, FromPyObject, Py, PyAny, PyResult, intern};
15
16
use crate::PyDataFrame;
17
use crate::functions::parse_cloud_options;
18
use crate::prelude::Wrap;
19
20
/// Interface to `class ScanOptions` on the Python side
21
pub struct PyScanOptions<'py>(Bound<'py, pyo3::PyAny>);
22
23
impl<'py> FromPyObject<'py> for PyScanOptions<'py> {
24
fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
25
Ok(Self(ob.clone()))
26
}
27
}
28
29
impl<'py> FromPyObject<'py> for Wrap<TableStatistics> {
30
fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
31
let py = ob.py();
32
Ok(Wrap(TableStatistics(Arc::new(
33
PyDataFrame::extract_bound(&ob.getattr(intern!(py, "_df"))?)?
34
.df
35
.into_inner(),
36
))))
37
}
38
}
39
40
impl PyScanOptions<'_> {
41
pub fn extract_unified_scan_args(
42
&self,
43
cloud_scheme: Option<CloudScheme>,
44
) -> PyResult<UnifiedScanArgs> {
45
#[derive(FromPyObject)]
46
struct Extract {
47
row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
48
pre_slice: Option<(i64, usize)>,
49
cast_options: Wrap<CastColumnsPolicy>,
50
extra_columns: Wrap<ExtraColumnsPolicy>,
51
missing_columns: Wrap<MissingColumnsPolicy>,
52
include_file_paths: Option<Wrap<PlSmallStr>>,
53
glob: bool,
54
hidden_file_prefix: Option<Vec<PyBackedStr>>,
55
column_mapping: Option<Wrap<ColumnMapping>>,
56
default_values: Option<Wrap<DefaultFieldValues>>,
57
hive_partitioning: Option<bool>,
58
hive_schema: Option<Wrap<Schema>>,
59
try_parse_hive_dates: bool,
60
rechunk: bool,
61
cache: bool,
62
storage_options: Option<Vec<(String, String)>>,
63
credential_provider: Option<Py<PyAny>>,
64
retries: usize,
65
deletion_files: Option<Wrap<DeletionFilesList>>,
66
table_statistics: Option<Wrap<TableStatistics>>,
67
row_count: Option<(u64, u64)>,
68
}
69
70
let Extract {
71
row_index,
72
pre_slice,
73
cast_options,
74
extra_columns,
75
missing_columns,
76
include_file_paths,
77
column_mapping,
78
default_values,
79
glob,
80
hidden_file_prefix,
81
hive_partitioning,
82
hive_schema,
83
try_parse_hive_dates,
84
rechunk,
85
cache,
86
storage_options,
87
credential_provider,
88
retries,
89
deletion_files,
90
table_statistics,
91
row_count,
92
} = self.0.extract()?;
93
94
let cloud_options =
95
parse_cloud_options(cloud_scheme, storage_options, credential_provider, retries)?;
96
97
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
98
99
let row_index = row_index.map(|(name, offset)| RowIndex {
100
name: name.0,
101
offset,
102
});
103
104
let hive_options = HiveOptions {
105
enabled: hive_partitioning,
106
hive_start_idx: 0,
107
schema: hive_schema,
108
try_parse_dates: try_parse_hive_dates,
109
};
110
111
let unified_scan_args = UnifiedScanArgs {
112
// Schema is currently still stored inside the options per scan type, but we do eventually
113
// want to put it here instead.
114
schema: None,
115
cloud_options,
116
hive_options,
117
rechunk,
118
cache,
119
glob,
120
hidden_file_prefix: hidden_file_prefix
121
.map(|x| x.into_iter().map(|x| (*x).into()).collect()),
122
projection: None,
123
column_mapping: column_mapping.map(|x| x.0),
124
default_values: default_values
125
.map(|x| x.0)
126
.filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
127
row_index,
128
pre_slice: pre_slice.map(Slice::from),
129
cast_columns_policy: cast_options.0,
130
missing_columns_policy: missing_columns.0,
131
extra_columns_policy: extra_columns.0,
132
include_file_paths: include_file_paths.map(|x| x.0),
133
deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
134
table_statistics: table_statistics.map(|x| x.0),
135
row_count,
136
};
137
138
Ok(unified_scan_args)
139
}
140
}
141
142