Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/parquet/metadata/column_descriptor.rs
6940 views
1
use std::ops::Deref;
2
use std::sync::Arc;
3
4
use polars_utils::pl_str::PlSmallStr;
5
#[cfg(feature = "serde")]
6
use serde::{Deserialize, Serialize};
7
8
use crate::parquet::schema::types::{ParquetType, PrimitiveType};
9
10
/// A descriptor of a parquet column. It contains the necessary information to deserialize
11
/// a parquet column.
12
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
13
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
14
pub struct Descriptor {
15
/// The [`PrimitiveType`] of this column
16
pub primitive_type: PrimitiveType,
17
18
/// The maximum definition level
19
pub max_def_level: i16,
20
21
/// The maximum repetition level
22
pub max_rep_level: i16,
23
}
24
25
#[derive(Debug, Clone)]
26
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
27
pub enum BaseType {
28
Owned(ParquetType),
29
Arc(Arc<ParquetType>),
30
}
31
32
impl BaseType {
33
pub fn into_arc(self) -> Self {
34
match self {
35
BaseType::Owned(t) => Self::Arc(Arc::new(t)),
36
BaseType::Arc(t) => Self::Arc(t),
37
}
38
}
39
}
40
41
impl PartialEq for BaseType {
42
fn eq(&self, other: &Self) -> bool {
43
self.deref() == other.deref()
44
}
45
}
46
47
impl Deref for BaseType {
48
type Target = ParquetType;
49
50
fn deref(&self) -> &Self::Target {
51
match self {
52
BaseType::Owned(i) => i,
53
BaseType::Arc(i) => i.as_ref(),
54
}
55
}
56
}
57
58
/// A descriptor for leaf-level primitive columns.
59
/// This encapsulates information such as definition and repetition levels and is used to
60
/// re-assemble nested data.
61
#[derive(Debug, PartialEq, Clone)]
62
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
63
pub struct ColumnDescriptor {
64
/// The descriptor this columns' leaf.
65
pub descriptor: Descriptor,
66
67
/// The path of this column. For instance, "a.b.c.d".
68
pub path_in_schema: Vec<PlSmallStr>,
69
70
/// The [`ParquetType`] this descriptor is a leaf of
71
pub base_type: BaseType,
72
}
73
74
impl ColumnDescriptor {
75
/// Creates new descriptor for leaf-level column.
76
pub fn new(
77
descriptor: Descriptor,
78
path_in_schema: Vec<PlSmallStr>,
79
base_type: BaseType,
80
) -> Self {
81
Self {
82
descriptor,
83
path_in_schema,
84
base_type,
85
}
86
}
87
}
88
89