Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/options.rs
8424 views
1
use polars_core::schema::SchemaRef;
2
use polars_error::{PolarsError, PolarsResult};
3
use polars_utils::IdxSize;
4
use polars_utils::pl_str::PlSmallStr;
5
#[cfg(feature = "serde")]
6
use serde::{Deserialize, Serialize};
7
8
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
9
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
10
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
11
pub struct RowIndex {
12
pub name: PlSmallStr,
13
pub offset: IdxSize,
14
}
15
16
/// Options for Hive partitioning.
17
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
18
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
19
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
20
pub struct HiveOptions {
21
/// This can be `None` to automatically enable for single directory scans
22
/// and disable otherwise. However it should be initialized if it is inside
23
/// a DSL / IR plan.
24
pub enabled: Option<bool>,
25
pub hive_start_idx: usize,
26
pub schema: Option<SchemaRef>,
27
pub try_parse_dates: bool,
28
}
29
30
impl HiveOptions {
31
pub fn new_enabled() -> Self {
32
Self {
33
enabled: Some(true),
34
hive_start_idx: 0,
35
schema: None,
36
try_parse_dates: true,
37
}
38
}
39
40
pub fn new_disabled() -> Self {
41
Self {
42
enabled: Some(false),
43
hive_start_idx: 0,
44
schema: None,
45
try_parse_dates: false,
46
}
47
}
48
}
49
50
impl Default for HiveOptions {
51
fn default() -> Self {
52
Self::new_enabled()
53
}
54
}
55
56
/// Compression options for file that are expressed externally like CSV and NDJSON. Externally does
57
/// not mean by an external tool, more that it doesn't happen internally like it does for Parquet
58
/// and IPC.
59
///
60
/// Compared to other formats like IPC and Parquet, compression is external.
61
#[derive(Copy, Clone, Debug, Default, Eq, Hash, PartialEq)]
62
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
63
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
64
#[derive()]
65
pub enum ExternalCompression {
66
#[default]
67
Uncompressed,
68
Gzip {
69
level: Option<u32>,
70
},
71
Zstd {
72
level: Option<u32>,
73
},
74
}
75
76
impl ExternalCompression {
77
/// Returns the expected file suffix associated with the compression format.
78
pub fn file_suffix(self) -> Option<&'static str> {
79
match self {
80
Self::Uncompressed => None,
81
Self::Gzip { .. } => Some(".gz"),
82
Self::Zstd { .. } => Some(".zst"),
83
}
84
}
85
86
pub fn try_from(value: &str, level: Option<u32>) -> PolarsResult<Self> {
87
match value {
88
"uncompressed" => Ok(Self::Uncompressed),
89
"gzip" => Ok(Self::Gzip { level }),
90
"zstd" => Ok(Self::Zstd { level }),
91
_ => Err(PolarsError::InvalidOperation(
92
format!("Invalid compression format: ({value})").into(),
93
)),
94
}
95
}
96
}
97
98