Path: blob/main/crates/polars-io/src/parquet/read/options.rs
6940 views
use polars_core::schema::SchemaRef;1#[cfg(feature = "serde")]2use serde::{Deserialize, Serialize};34#[derive(Clone, Debug, PartialEq, Eq, Hash)]5#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]6#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]7pub struct ParquetOptions {8pub schema: Option<SchemaRef>,9pub parallel: ParallelStrategy,10pub low_memory: bool,11pub use_statistics: bool,12}1314impl Default for ParquetOptions {15fn default() -> Self {16Self {17schema: None,18parallel: ParallelStrategy::default(),19low_memory: false,20use_statistics: true,21}22}23}2425#[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Hash)]26#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]27#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]28pub enum ParallelStrategy {29/// Don't parallelize30None,31/// Parallelize over the columns32Columns,33/// Parallelize over the row groups34RowGroups,35/// First evaluates the pushed-down predicates in parallel and determines a mask of which rows36/// to read. Then, it parallelizes over both the columns and the row groups while filtering out37/// rows that do not need to be read. This can provide significant speedups for large files38/// (i.e. many row-groups) with a predicate that filters clustered rows or filters heavily. In39/// other cases, this may slow down the scan compared other strategies.40///41/// If no predicate is given, this falls back to back to [`ParallelStrategy::Auto`].42Prefiltered,43/// Automatically determine over which unit to parallelize44/// This will choose the most occurring unit.45#[default]46Auto,47}484950