CoCalc -- options.rs

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/parquet/read/options.rs
⁶⁹⁴⁰ views
1
use polars_core::schema::SchemaRef;
2
#[cfg(feature = "serde")]
3
use serde::{Deserialize, Serialize};
4

5
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
6
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
7
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
8
pub struct ParquetOptions {
9
    pub schema: Option<SchemaRef>,
10
    pub parallel: ParallelStrategy,
11
    pub low_memory: bool,
12
    pub use_statistics: bool,
13
}
14

15
impl Default for ParquetOptions {
16
    fn default() -> Self {
17
        Self {
18
            schema: None,
19
            parallel: ParallelStrategy::default(),
20
            low_memory: false,
21
            use_statistics: true,
22
        }
23
    }
24
}
25

26
#[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Hash)]
27
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
28
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
29
pub enum ParallelStrategy {
30
    /// Don't parallelize
31
    None,
32
    /// Parallelize over the columns
33
    Columns,
34
    /// Parallelize over the row groups
35
    RowGroups,
36
    /// First evaluates the pushed-down predicates in parallel and determines a mask of which rows
37
    /// to read. Then, it parallelizes over both the columns and the row groups while filtering out
38
    /// rows that do not need to be read. This can provide significant speedups for large files
39
    /// (i.e. many row-groups) with a predicate that filters clustered rows or filters heavily. In
40
    /// other cases, this may slow down the scan compared other strategies.
41
    ///
42
    /// If no predicate is given, this falls back to back to [`ParallelStrategy::Auto`].
43
    Prefiltered,
44
    /// Automatically determine over which unit to parallelize
45
    /// This will choose the most occurring unit.
46
    #[default]
47
    Auto,
48
}
49

50
Product

Resources

Company