Path: blob/main/crates/polars-io/src/parquet/write/options.rs
6940 views
use polars_error::PolarsResult;1use polars_parquet::write::{2BrotliLevel as BrotliLevelParquet, CompressionOptions, GzipLevel as GzipLevelParquet,3StatisticsOptions, ZstdLevel as ZstdLevelParquet,4};5use polars_utils::pl_str::PlSmallStr;6#[cfg(feature = "serde")]7use serde::{Deserialize, Serialize};89use super::KeyValueMetadata;1011#[derive(Clone, Debug, PartialEq, Eq, Default, Hash)]12#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]13#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]14pub struct ParquetWriteOptions {15/// Data page compression16pub compression: ParquetCompression,17/// Compute and write column statistics.18pub statistics: StatisticsOptions,19/// If `None` will be all written to a single row group.20pub row_group_size: Option<usize>,21/// if `None` will be 1024^2 bytes22pub data_page_size: Option<usize>,23/// Custom file-level key value metadata24pub key_value_metadata: Option<KeyValueMetadata>,2526/// Per-field overwrites for writing properties.27pub field_overwrites: Vec<ParquetFieldOverwrites>,28}2930#[derive(Clone, Debug, PartialEq, Eq, Hash)]31#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]32#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]33pub enum ChildFieldOverwrites {34/// Flat datatypes35None,36/// List / Array37ListLike(Box<ParquetFieldOverwrites>),38Struct(Vec<ParquetFieldOverwrites>),39}4041#[derive(Clone, Debug, PartialEq, Eq, Hash)]42#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]43#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]44pub struct MetadataKeyValue {45pub key: PlSmallStr,46pub value: Option<PlSmallStr>,47}4849#[derive(Clone, Debug, PartialEq, Eq, Hash)]50#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]51#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]52pub struct ParquetFieldOverwrites {53pub name: Option<PlSmallStr>,54pub children: ChildFieldOverwrites,5556pub required: Option<bool>,57pub field_id: Option<i32>,58pub metadata: Option<Vec<MetadataKeyValue>>,59}6061/// The compression strategy to use for writing Parquet files.62#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]63#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]64#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]65pub enum ParquetCompression {66Uncompressed,67Snappy,68Gzip(Option<GzipLevel>),69Lzo,70Brotli(Option<BrotliLevel>),71Zstd(Option<ZstdLevel>),72Lz4Raw,73}7475impl Default for ParquetCompression {76fn default() -> Self {77Self::Zstd(None)78}79}8081/// A valid Gzip compression level.82#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]83#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]84#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]85pub struct GzipLevel(u8);8687impl GzipLevel {88pub fn try_new(level: u8) -> PolarsResult<Self> {89GzipLevelParquet::try_new(level)?;90Ok(GzipLevel(level))91}92}9394/// A valid Brotli compression level.95#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]96#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]97#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]98pub struct BrotliLevel(u32);99100impl BrotliLevel {101pub fn try_new(level: u32) -> PolarsResult<Self> {102BrotliLevelParquet::try_new(level)?;103Ok(BrotliLevel(level))104}105}106107/// A valid Zstandard compression level.108#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]109#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]110#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]111pub struct ZstdLevel(i32);112113impl ZstdLevel {114pub fn try_new(level: i32) -> PolarsResult<Self> {115ZstdLevelParquet::try_new(level)?;116Ok(ZstdLevel(level))117}118}119120impl From<ParquetCompression> for CompressionOptions {121fn from(value: ParquetCompression) -> Self {122use ParquetCompression::*;123match value {124Uncompressed => CompressionOptions::Uncompressed,125Snappy => CompressionOptions::Snappy,126Gzip(level) => {127CompressionOptions::Gzip(level.map(|v| GzipLevelParquet::try_new(v.0).unwrap()))128},129Lzo => CompressionOptions::Lzo,130Brotli(level) => {131CompressionOptions::Brotli(level.map(|v| BrotliLevelParquet::try_new(v.0).unwrap()))132},133Lz4Raw => CompressionOptions::Lz4Raw,134Zstd(level) => {135CompressionOptions::Zstd(level.map(|v| ZstdLevelParquet::try_new(v.0).unwrap()))136},137}138}139}140141142