Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/parquet/read/options.rs
6940 views
1
use polars_core::schema::SchemaRef;
2
#[cfg(feature = "serde")]
3
use serde::{Deserialize, Serialize};
4
5
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
6
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
7
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
8
pub struct ParquetOptions {
9
pub schema: Option<SchemaRef>,
10
pub parallel: ParallelStrategy,
11
pub low_memory: bool,
12
pub use_statistics: bool,
13
}
14
15
impl Default for ParquetOptions {
16
fn default() -> Self {
17
Self {
18
schema: None,
19
parallel: ParallelStrategy::default(),
20
low_memory: false,
21
use_statistics: true,
22
}
23
}
24
}
25
26
#[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Hash)]
27
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
28
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
29
pub enum ParallelStrategy {
30
/// Don't parallelize
31
None,
32
/// Parallelize over the columns
33
Columns,
34
/// Parallelize over the row groups
35
RowGroups,
36
/// First evaluates the pushed-down predicates in parallel and determines a mask of which rows
37
/// to read. Then, it parallelizes over both the columns and the row groups while filtering out
38
/// rows that do not need to be read. This can provide significant speedups for large files
39
/// (i.e. many row-groups) with a predicate that filters clustered rows or filters heavily. In
40
/// other cases, this may slow down the scan compared other strategies.
41
///
42
/// If no predicate is given, this falls back to back to [`ParallelStrategy::Auto`].
43
Prefiltered,
44
/// Automatically determine over which unit to parallelize
45
/// This will choose the most occurring unit.
46
#[default]
47
Auto,
48
}
49
50