Path: blob/main/crates/polars-parquet/src/parquet/schema/io_thrift/from_thrift.rs
8512 views
use polars_parquet_format::SchemaElement;1use polars_utils::pl_str::PlSmallStr;23use super::super::types::ParquetType;4use crate::parquet::error::{ParquetError, ParquetResult};5use crate::parquet::schema::types::FieldInfo;67impl ParquetType {8/// Method to convert from Thrift.9pub fn try_from_thrift(elements: &[SchemaElement]) -> ParquetResult<ParquetType> {10let mut index = 0;11let mut schema_nodes = Vec::new();12while index < elements.len() {13let t = from_thrift_helper(elements, index)?;14index = t.0;15schema_nodes.push(t.1);16}17if schema_nodes.len() != 1 {18return Err(ParquetError::oos(format!(19"Expected exactly one root node, but found {}",20schema_nodes.len()21)));22}2324Ok(schema_nodes.remove(0))25}26}2728/// Constructs a new Type from the `elements`, starting at index `index`.29/// The first result is the starting index for the next Type after this one. If it is30/// equal to `elements.len()`, then this Type is the last one.31/// The second result is the result Type.32fn from_thrift_helper(33elements: &[SchemaElement],34index: usize,35) -> ParquetResult<(usize, ParquetType)> {36// Whether or not the current node is root (message type).37// There is only one message type node in the schema tree.38let is_root_node = index == 0;3940let element = elements41.get(index)42.ok_or_else(|| ParquetError::oos(format!("index {index} on SchemaElement is not valid")))?;43let name = PlSmallStr::from_str(element.name.as_str());44let converted_type = element.converted_type;4546let id = element.field_id;47match element.num_children {48// empty root49None | Some(0) if is_root_node => {50let fields = vec![];51let tp = ParquetType::new_root(name, fields);52Ok((index + 1, tp))53},5455// From parquet-format:56// The children count is used to construct the nested relationship.57// This field is not set when the element is a primitive type58// Sometimes parquet-cpp sets num_children field to 0 for primitive types, so we59// have to handle this case too.60None | Some(0) => {61// primitive type62let repetition = element63.repetition_type64.ok_or_else(|| {65ParquetError::oos("Repetition level must be defined for a primitive type")66})?67.try_into()?;68let physical_type = element.type_.ok_or_else(|| {69ParquetError::oos("Physical type must be defined for a primitive type")70})?;7172let converted_type = converted_type73.map(|converted_type| {74let maybe_decimal = match (element.precision, element.scale) {75(Some(precision), Some(scale)) => Some((precision, scale)),76(None, None) => None,77_ => {78return Err(ParquetError::oos(79"When precision or scale are defined, both must be defined",80));81},82};83(converted_type, maybe_decimal).try_into()84})85.transpose()?;8687let logical_type = element88.logical_type89.clone()90.map(|x| x.try_into())91.transpose()?;9293let tp = ParquetType::try_from_primitive(94name,95(physical_type, element.type_length).try_into()?,96repetition,97converted_type,98logical_type,99id,100)?;101102Ok((index + 1, tp))103},104Some(n) => {105let mut fields = vec![];106let mut next_index = index + 1;107for _ in 0..n {108let child_result = from_thrift_helper(elements, next_index)?;109next_index = child_result.0;110fields.push(child_result.1);111}112113let tp = if is_root_node {114ParquetType::new_root(name, fields)115} else {116let repetition = if let Some(repetition) = element.repetition_type {117repetition.try_into()?118} else {119return Err(ParquetError::oos(120"The repetition level of a non-root must be non-null",121));122};123124let converted_type = converted_type.map(|x| x.try_into()).transpose()?;125126let logical_type = element127.logical_type128.clone()129.map(|x| x.try_into())130.transpose()?;131132ParquetType::GroupType {133field_info: FieldInfo {134name,135repetition,136id,137},138fields,139converted_type,140logical_type,141}142};143Ok((next_index, tp))144},145}146}147148149