Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/parquet/schema/io_thrift/from_thrift.rs
8512 views
1
use polars_parquet_format::SchemaElement;
2
use polars_utils::pl_str::PlSmallStr;
3
4
use super::super::types::ParquetType;
5
use crate::parquet::error::{ParquetError, ParquetResult};
6
use crate::parquet::schema::types::FieldInfo;
7
8
impl ParquetType {
9
/// Method to convert from Thrift.
10
pub fn try_from_thrift(elements: &[SchemaElement]) -> ParquetResult<ParquetType> {
11
let mut index = 0;
12
let mut schema_nodes = Vec::new();
13
while index < elements.len() {
14
let t = from_thrift_helper(elements, index)?;
15
index = t.0;
16
schema_nodes.push(t.1);
17
}
18
if schema_nodes.len() != 1 {
19
return Err(ParquetError::oos(format!(
20
"Expected exactly one root node, but found {}",
21
schema_nodes.len()
22
)));
23
}
24
25
Ok(schema_nodes.remove(0))
26
}
27
}
28
29
/// Constructs a new Type from the `elements`, starting at index `index`.
30
/// The first result is the starting index for the next Type after this one. If it is
31
/// equal to `elements.len()`, then this Type is the last one.
32
/// The second result is the result Type.
33
fn from_thrift_helper(
34
elements: &[SchemaElement],
35
index: usize,
36
) -> ParquetResult<(usize, ParquetType)> {
37
// Whether or not the current node is root (message type).
38
// There is only one message type node in the schema tree.
39
let is_root_node = index == 0;
40
41
let element = elements
42
.get(index)
43
.ok_or_else(|| ParquetError::oos(format!("index {index} on SchemaElement is not valid")))?;
44
let name = PlSmallStr::from_str(element.name.as_str());
45
let converted_type = element.converted_type;
46
47
let id = element.field_id;
48
match element.num_children {
49
// empty root
50
None | Some(0) if is_root_node => {
51
let fields = vec![];
52
let tp = ParquetType::new_root(name, fields);
53
Ok((index + 1, tp))
54
},
55
56
// From parquet-format:
57
// The children count is used to construct the nested relationship.
58
// This field is not set when the element is a primitive type
59
// Sometimes parquet-cpp sets num_children field to 0 for primitive types, so we
60
// have to handle this case too.
61
None | Some(0) => {
62
// primitive type
63
let repetition = element
64
.repetition_type
65
.ok_or_else(|| {
66
ParquetError::oos("Repetition level must be defined for a primitive type")
67
})?
68
.try_into()?;
69
let physical_type = element.type_.ok_or_else(|| {
70
ParquetError::oos("Physical type must be defined for a primitive type")
71
})?;
72
73
let converted_type = converted_type
74
.map(|converted_type| {
75
let maybe_decimal = match (element.precision, element.scale) {
76
(Some(precision), Some(scale)) => Some((precision, scale)),
77
(None, None) => None,
78
_ => {
79
return Err(ParquetError::oos(
80
"When precision or scale are defined, both must be defined",
81
));
82
},
83
};
84
(converted_type, maybe_decimal).try_into()
85
})
86
.transpose()?;
87
88
let logical_type = element
89
.logical_type
90
.clone()
91
.map(|x| x.try_into())
92
.transpose()?;
93
94
let tp = ParquetType::try_from_primitive(
95
name,
96
(physical_type, element.type_length).try_into()?,
97
repetition,
98
converted_type,
99
logical_type,
100
id,
101
)?;
102
103
Ok((index + 1, tp))
104
},
105
Some(n) => {
106
let mut fields = vec![];
107
let mut next_index = index + 1;
108
for _ in 0..n {
109
let child_result = from_thrift_helper(elements, next_index)?;
110
next_index = child_result.0;
111
fields.push(child_result.1);
112
}
113
114
let tp = if is_root_node {
115
ParquetType::new_root(name, fields)
116
} else {
117
let repetition = if let Some(repetition) = element.repetition_type {
118
repetition.try_into()?
119
} else {
120
return Err(ParquetError::oos(
121
"The repetition level of a non-root must be non-null",
122
));
123
};
124
125
let converted_type = converted_type.map(|x| x.try_into()).transpose()?;
126
127
let logical_type = element
128
.logical_type
129
.clone()
130
.map(|x| x.try_into())
131
.transpose()?;
132
133
ParquetType::GroupType {
134
field_info: FieldInfo {
135
name,
136
repetition,
137
id,
138
},
139
fields,
140
converted_type,
141
logical_type,
142
}
143
};
144
Ok((next_index, tp))
145
},
146
}
147
}
148
149