Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-python/src/io/scan_options.rs
8383 views
1
use std::sync::Arc;
2
3
use polars::prelude::default_values::DefaultFieldValues;
4
use polars::prelude::deletion::DeletionFilesList;
5
use polars::prelude::{
6
CastColumnsPolicy, CloudScheme, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy,
7
PlSmallStr, Schema, TableStatistics, UnifiedScanArgs,
8
};
9
use polars_io::{HiveOptions, RowIndex};
10
use polars_utils::IdxSize;
11
use polars_utils::slice_enum::Slice;
12
use pyo3::intern;
13
use pyo3::prelude::*;
14
use pyo3::pybacked::PyBackedStr;
15
16
use crate::PyDataFrame;
17
use crate::io::cloud_options::OptPyCloudOptions;
18
use crate::prelude::Wrap;
19
20
/// Interface to `class ScanOptions` on the Python side
21
pub struct PyScanOptions<'py>(Bound<'py, PyAny>);
22
23
impl<'a, 'py> FromPyObject<'a, 'py> for PyScanOptions<'py> {
24
type Error = PyErr;
25
26
fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
27
Ok(Self(ob.to_owned()))
28
}
29
}
30
31
impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<TableStatistics> {
32
type Error = PyErr;
33
34
fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
35
let py = ob.py();
36
let attr = ob.getattr(intern!(py, "_df"))?;
37
Ok(Wrap(TableStatistics(Arc::new(
38
PyDataFrame::extract(attr.as_borrowed())?.df.into_inner(),
39
))))
40
}
41
}
42
43
impl PyScanOptions<'_> {
44
pub fn extract_unified_scan_args(
45
&self,
46
cloud_scheme: Option<CloudScheme>,
47
) -> PyResult<UnifiedScanArgs> {
48
#[derive(FromPyObject)]
49
struct Extract<'a> {
50
row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
51
pre_slice: Option<(i64, usize)>,
52
cast_options: Wrap<CastColumnsPolicy>,
53
extra_columns: Wrap<ExtraColumnsPolicy>,
54
missing_columns: Wrap<MissingColumnsPolicy>,
55
include_file_paths: Option<Wrap<PlSmallStr>>,
56
glob: bool,
57
hidden_file_prefix: Option<Vec<PyBackedStr>>,
58
column_mapping: Option<Wrap<ColumnMapping>>,
59
default_values: Option<Wrap<DefaultFieldValues>>,
60
hive_partitioning: Option<bool>,
61
hive_schema: Option<Wrap<Schema>>,
62
try_parse_hive_dates: bool,
63
rechunk: bool,
64
cache: bool,
65
storage_options: OptPyCloudOptions<'a>,
66
credential_provider: Option<Py<PyAny>>,
67
deletion_files: Option<Wrap<DeletionFilesList>>,
68
table_statistics: Option<Wrap<TableStatistics>>,
69
row_count: Option<(u64, u64)>,
70
}
71
72
let Extract {
73
row_index,
74
pre_slice,
75
cast_options,
76
extra_columns,
77
missing_columns,
78
include_file_paths,
79
column_mapping,
80
default_values,
81
glob,
82
hidden_file_prefix,
83
hive_partitioning,
84
hive_schema,
85
try_parse_hive_dates,
86
rechunk,
87
cache,
88
storage_options,
89
credential_provider,
90
deletion_files,
91
table_statistics,
92
row_count,
93
} = self.0.extract()?;
94
95
let cloud_options =
96
storage_options.extract_opt_cloud_options(cloud_scheme, credential_provider)?;
97
98
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
99
100
let row_index = row_index.map(|(name, offset)| RowIndex {
101
name: name.0,
102
offset,
103
});
104
105
let hive_options = HiveOptions {
106
enabled: hive_partitioning,
107
hive_start_idx: 0,
108
schema: hive_schema,
109
try_parse_dates: try_parse_hive_dates,
110
};
111
112
let unified_scan_args = UnifiedScanArgs {
113
// Schema is currently still stored inside the options per scan type, but we do eventually
114
// want to put it here instead.
115
schema: None,
116
cloud_options,
117
hive_options,
118
rechunk,
119
cache,
120
glob,
121
hidden_file_prefix: hidden_file_prefix
122
.map(|x| x.into_iter().map(|x| (*x).into()).collect()),
123
projection: None,
124
column_mapping: column_mapping.map(|x| x.0),
125
default_values: default_values
126
.map(|x| x.0)
127
.filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
128
row_index,
129
pre_slice: pre_slice.map(Slice::from),
130
cast_columns_policy: cast_options.0,
131
missing_columns_policy: missing_columns.0,
132
extra_columns_policy: extra_columns.0,
133
include_file_paths: include_file_paths.map(|x| x.0),
134
deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
135
table_statistics: table_statistics.map(|x| x.0),
136
row_count,
137
};
138
139
Ok(unified_scan_args)
140
}
141
}
142
143