Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/parquet/write/options.rs
6940 views
1
use polars_error::PolarsResult;
2
use polars_parquet::write::{
3
BrotliLevel as BrotliLevelParquet, CompressionOptions, GzipLevel as GzipLevelParquet,
4
StatisticsOptions, ZstdLevel as ZstdLevelParquet,
5
};
6
use polars_utils::pl_str::PlSmallStr;
7
#[cfg(feature = "serde")]
8
use serde::{Deserialize, Serialize};
9
10
use super::KeyValueMetadata;
11
12
#[derive(Clone, Debug, PartialEq, Eq, Default, Hash)]
13
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
15
pub struct ParquetWriteOptions {
16
/// Data page compression
17
pub compression: ParquetCompression,
18
/// Compute and write column statistics.
19
pub statistics: StatisticsOptions,
20
/// If `None` will be all written to a single row group.
21
pub row_group_size: Option<usize>,
22
/// if `None` will be 1024^2 bytes
23
pub data_page_size: Option<usize>,
24
/// Custom file-level key value metadata
25
pub key_value_metadata: Option<KeyValueMetadata>,
26
27
/// Per-field overwrites for writing properties.
28
pub field_overwrites: Vec<ParquetFieldOverwrites>,
29
}
30
31
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
32
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
33
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
34
pub enum ChildFieldOverwrites {
35
/// Flat datatypes
36
None,
37
/// List / Array
38
ListLike(Box<ParquetFieldOverwrites>),
39
Struct(Vec<ParquetFieldOverwrites>),
40
}
41
42
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
43
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
44
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
45
pub struct MetadataKeyValue {
46
pub key: PlSmallStr,
47
pub value: Option<PlSmallStr>,
48
}
49
50
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
51
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
52
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
53
pub struct ParquetFieldOverwrites {
54
pub name: Option<PlSmallStr>,
55
pub children: ChildFieldOverwrites,
56
57
pub required: Option<bool>,
58
pub field_id: Option<i32>,
59
pub metadata: Option<Vec<MetadataKeyValue>>,
60
}
61
62
/// The compression strategy to use for writing Parquet files.
63
#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
64
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
65
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
66
pub enum ParquetCompression {
67
Uncompressed,
68
Snappy,
69
Gzip(Option<GzipLevel>),
70
Lzo,
71
Brotli(Option<BrotliLevel>),
72
Zstd(Option<ZstdLevel>),
73
Lz4Raw,
74
}
75
76
impl Default for ParquetCompression {
77
fn default() -> Self {
78
Self::Zstd(None)
79
}
80
}
81
82
/// A valid Gzip compression level.
83
#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
84
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
85
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
86
pub struct GzipLevel(u8);
87
88
impl GzipLevel {
89
pub fn try_new(level: u8) -> PolarsResult<Self> {
90
GzipLevelParquet::try_new(level)?;
91
Ok(GzipLevel(level))
92
}
93
}
94
95
/// A valid Brotli compression level.
96
#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
97
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
98
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
99
pub struct BrotliLevel(u32);
100
101
impl BrotliLevel {
102
pub fn try_new(level: u32) -> PolarsResult<Self> {
103
BrotliLevelParquet::try_new(level)?;
104
Ok(BrotliLevel(level))
105
}
106
}
107
108
/// A valid Zstandard compression level.
109
#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
110
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
111
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
112
pub struct ZstdLevel(i32);
113
114
impl ZstdLevel {
115
pub fn try_new(level: i32) -> PolarsResult<Self> {
116
ZstdLevelParquet::try_new(level)?;
117
Ok(ZstdLevel(level))
118
}
119
}
120
121
impl From<ParquetCompression> for CompressionOptions {
122
fn from(value: ParquetCompression) -> Self {
123
use ParquetCompression::*;
124
match value {
125
Uncompressed => CompressionOptions::Uncompressed,
126
Snappy => CompressionOptions::Snappy,
127
Gzip(level) => {
128
CompressionOptions::Gzip(level.map(|v| GzipLevelParquet::try_new(v.0).unwrap()))
129
},
130
Lzo => CompressionOptions::Lzo,
131
Brotli(level) => {
132
CompressionOptions::Brotli(level.map(|v| BrotliLevelParquet::try_new(v.0).unwrap()))
133
},
134
Lz4Raw => CompressionOptions::Lz4Raw,
135
Zstd(level) => {
136
CompressionOptions::Zstd(level.map(|v| ZstdLevelParquet::try_new(v.0).unwrap()))
137
},
138
}
139
}
140
}
141
142