Path: blob/main/crates/polars-parquet/src/arrow/write/pages.rs
6940 views
use std::fmt::Debug;12use arrow::array::{Array, FixedSizeListArray, ListArray, MapArray, StructArray};3use arrow::bitmap::{Bitmap, MutableBitmap};4use arrow::datatypes::PhysicalType;5use arrow::offset::{Offset, OffsetsBuffer};6use polars_error::{PolarsResult, polars_bail};78use super::{ColumnWriteOptions, WriteOptions, array_to_pages};9use crate::arrow::read::schema::is_nullable;10use crate::parquet::page::Page;11use crate::parquet::schema::types::{ParquetType, PrimitiveType as ParquetPrimitiveType};12use crate::write::DynIter;1314#[derive(Debug, Clone, PartialEq)]15pub struct PrimitiveNested {16pub is_optional: bool,17pub validity: Option<Bitmap>,18pub length: usize,19}2021#[derive(Debug, Clone, PartialEq)]22pub struct ListNested<O: Offset> {23pub is_optional: bool,24pub offsets: OffsetsBuffer<O>,25pub validity: Option<Bitmap>,26}2728#[derive(Debug, Clone, PartialEq)]29pub struct FixedSizeListNested {30pub validity: Option<Bitmap>,31pub is_optional: bool,32pub width: usize,33pub length: usize,34}3536#[derive(Debug, Clone, PartialEq)]37pub struct StructNested {38pub is_optional: bool,39pub validity: Option<Bitmap>,40pub length: usize,41}4243impl<O: Offset> ListNested<O> {44pub fn new(offsets: OffsetsBuffer<O>, validity: Option<Bitmap>, is_optional: bool) -> Self {45Self {46is_optional,47offsets,48validity,49}50}51}5253/// Descriptor of nested information of a field54#[derive(Debug, Clone, PartialEq)]55pub enum Nested {56/// a primitive (leaf or parquet column)57Primitive(PrimitiveNested),58List(ListNested<i32>),59LargeList(ListNested<i64>),60FixedSizeList(FixedSizeListNested),61Struct(StructNested),62}6364impl Nested {65/// Returns the length (number of rows) of the element66pub fn len(&self) -> usize {67match self {68Nested::Primitive(nested) => nested.length,69Nested::List(nested) => nested.offsets.len_proxy(),70Nested::LargeList(nested) => nested.offsets.len_proxy(),71Nested::FixedSizeList(nested) => nested.length,72Nested::Struct(nested) => nested.length,73}74}7576pub fn primitive(validity: Option<Bitmap>, is_optional: bool, length: usize) -> Self {77Self::Primitive(PrimitiveNested {78validity,79is_optional,80length,81})82}8384pub fn list(validity: Option<Bitmap>, is_optional: bool, offsets: OffsetsBuffer<i32>) -> Self {85Self::List(ListNested {86validity,87is_optional,88offsets,89})90}9192pub fn large_list(93validity: Option<Bitmap>,94is_optional: bool,95offsets: OffsetsBuffer<i64>,96) -> Self {97Self::LargeList(ListNested {98validity,99is_optional,100offsets,101})102}103104pub fn fixed_size_list(105validity: Option<Bitmap>,106is_optional: bool,107width: usize,108length: usize,109) -> Self {110Self::FixedSizeList(FixedSizeListNested {111validity,112is_optional,113width,114length,115})116}117118pub fn structure(validity: Option<Bitmap>, is_optional: bool, length: usize) -> Self {119Self::Struct(StructNested {120validity,121is_optional,122length,123})124}125}126127/// Constructs the necessary `Vec<Vec<Nested>>` to write the rep and def levels of `array` to parquet128pub fn to_nested(array: &dyn Array, type_: &ParquetType) -> PolarsResult<Vec<Vec<Nested>>> {129let mut nested = vec![];130131to_nested_recursive(array, type_, &mut nested, vec![])?;132Ok(nested)133}134135fn to_nested_recursive(136array: &dyn Array,137type_: &ParquetType,138nested: &mut Vec<Vec<Nested>>,139mut parents: Vec<Nested>,140) -> PolarsResult<()> {141let is_optional = is_nullable(type_.get_field_info());142143if !is_optional && array.null_count() > 0 {144polars_bail!(InvalidOperation: "writing a missing value to required field '{}'", type_.name());145}146147use PhysicalType::*;148match array.dtype().to_physical_type() {149Struct => {150let array = array.as_any().downcast_ref::<StructArray>().unwrap();151let fields = if let ParquetType::GroupType { fields, .. } = type_ {152fields153} else {154polars_bail!(InvalidOperation:155"Parquet type must be a group for a struct array",156)157};158159parents.push(Nested::Struct(StructNested {160is_optional,161validity: array.validity().cloned(),162length: array.len(),163}));164165for (type_, array) in fields.iter().zip(array.values()) {166to_nested_recursive(array.as_ref(), type_, nested, parents.clone())?;167}168},169FixedSizeList => {170let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();171let type_ = if let ParquetType::GroupType { fields, .. } = type_ {172if let ParquetType::GroupType { fields, .. } = &fields[0] {173&fields[0]174} else {175polars_bail!(InvalidOperation:176"Parquet type must be a group for a list array",177)178}179} else {180polars_bail!(InvalidOperation:181"Parquet type must be a group for a list array",182)183};184185parents.push(Nested::FixedSizeList(FixedSizeListNested {186validity: array.validity().cloned(),187length: array.len(),188width: array.size(),189is_optional,190}));191to_nested_recursive(array.values().as_ref(), type_, nested, parents)?;192},193List => {194let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();195let type_ = if let ParquetType::GroupType { fields, .. } = type_ {196if let ParquetType::GroupType { fields, .. } = &fields[0] {197&fields[0]198} else {199polars_bail!(InvalidOperation:200"Parquet type must be a group for a list array",201)202}203} else {204polars_bail!(InvalidOperation:205"Parquet type must be a group for a list array",206)207};208209parents.push(Nested::List(ListNested::new(210array.offsets().clone(),211array.validity().cloned(),212is_optional,213)));214to_nested_recursive(array.values().as_ref(), type_, nested, parents)?;215},216LargeList => {217let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();218let type_ = if let ParquetType::GroupType { fields, .. } = type_ {219if let ParquetType::GroupType { fields, .. } = &fields[0] {220&fields[0]221} else {222polars_bail!(InvalidOperation:223"Parquet type must be a group for a list array",224)225}226} else {227polars_bail!(InvalidOperation:228"Parquet type must be a group for a list array",229)230};231232parents.push(Nested::LargeList(ListNested::new(233array.offsets().clone(),234array.validity().cloned(),235is_optional,236)));237to_nested_recursive(array.values().as_ref(), type_, nested, parents)?;238},239Map => {240let array = array.as_any().downcast_ref::<MapArray>().unwrap();241let type_ = if let ParquetType::GroupType { fields, .. } = type_ {242if let ParquetType::GroupType { fields, .. } = &fields[0] {243&fields[0]244} else {245polars_bail!(InvalidOperation:246"Parquet type must be a group for a map array",247)248}249} else {250polars_bail!(InvalidOperation:251"Parquet type must be a group for a map array",252)253};254255parents.push(Nested::List(ListNested::new(256array.offsets().clone(),257array.validity().cloned(),258is_optional,259)));260to_nested_recursive(array.field().as_ref(), type_, nested, parents)?;261},262_ => {263parents.push(Nested::Primitive(PrimitiveNested {264validity: array.validity().cloned(),265is_optional,266length: array.len(),267}));268nested.push(parents)269},270}271Ok(())272}273274fn expand_list_validity<'a, O: Offset>(275array: &'a ListArray<O>,276validity: BitmapState,277array_stack: &mut Vec<(&'a dyn Array, BitmapState)>,278) {279let BitmapState::SomeSet(list_validity) = validity else {280array_stack.push((281array.values().as_ref(),282match validity {283BitmapState::AllSet => BitmapState::AllSet,284BitmapState::SomeSet(_) => unreachable!(),285BitmapState::AllUnset(_) => BitmapState::AllUnset(array.values().len()),286},287));288return;289};290291let offsets = array.offsets().buffer();292let mut validity = MutableBitmap::with_capacity(array.values().len());293let mut list_validity_iter = list_validity.iter();294295// @NOTE: We need to take into account here that the list might only point to a slice of the296// values, therefore we need to extend the validity mask with dummy values to match the length297// of the values array.298299let mut idx = 0;300validity.extend_constant(offsets[0].to_usize(), false);301while list_validity_iter.num_remaining() > 0 {302let num_ones = list_validity_iter.take_leading_ones();303let num_elements = offsets[idx + num_ones] - offsets[idx];304validity.extend_constant(num_elements.to_usize(), true);305306idx += num_ones;307308let num_zeros = list_validity_iter.take_leading_zeros();309let num_elements = offsets[idx + num_zeros] - offsets[idx];310validity.extend_constant(num_elements.to_usize(), false);311312idx += num_zeros;313}314validity.extend_constant(array.values().len() - validity.len(), false);315316debug_assert_eq!(idx, array.len());317let validity = validity.freeze();318319debug_assert_eq!(validity.len(), array.values().len());320array_stack.push((array.values().as_ref(), BitmapState::SomeSet(validity)));321}322323#[derive(Clone)]324enum BitmapState {325AllSet,326SomeSet(Bitmap),327AllUnset(usize),328}329330impl From<Option<&Bitmap>> for BitmapState {331fn from(bm: Option<&Bitmap>) -> Self {332let Some(bm) = bm else {333return Self::AllSet;334};335336let null_count = bm.unset_bits();337338if null_count == 0 {339Self::AllSet340} else if null_count == bm.len() {341Self::AllUnset(bm.len())342} else {343Self::SomeSet(bm.clone())344}345}346}347348impl From<BitmapState> for Option<Bitmap> {349fn from(bms: BitmapState) -> Self {350match bms {351BitmapState::AllSet => None,352BitmapState::SomeSet(bm) => Some(bm),353BitmapState::AllUnset(len) => Some(Bitmap::new_zeroed(len)),354}355}356}357358impl std::ops::BitAnd for &BitmapState {359type Output = BitmapState;360361fn bitand(self, rhs: Self) -> Self::Output {362use BitmapState as B;363match (self, rhs) {364(B::AllSet, B::AllSet) => B::AllSet,365(B::AllSet, B::SomeSet(v)) | (B::SomeSet(v), B::AllSet) => B::SomeSet(v.clone()),366(B::SomeSet(lhs), B::SomeSet(rhs)) => {367let result = lhs & rhs;368let null_count = result.unset_bits();369370if null_count == 0 {371B::AllSet372} else if null_count == result.len() {373B::AllUnset(result.len())374} else {375B::SomeSet(result)376}377},378(B::AllUnset(len), _) | (_, B::AllUnset(len)) => B::AllUnset(*len),379}380}381}382383/// Convert [`Array`] to a `Vec<Box<dyn Array>>` leaves in DFS order.384///385/// Each leaf array has the validity propagated from the nesting levels above.386pub fn to_leaves(array: &dyn Array, leaves: &mut Vec<Box<dyn Array>>) {387use PhysicalType as P;388389leaves.clear();390let mut array_stack: Vec<(&dyn Array, BitmapState)> = Vec::new();391392array_stack.push((array, BitmapState::AllSet));393394while let Some((array, inherited_validity)) = array_stack.pop() {395let child_validity = BitmapState::from(array.validity());396let validity = (&child_validity) & (&inherited_validity);397398match array.dtype().to_physical_type() {399P::Struct => {400let array = array.as_any().downcast_ref::<StructArray>().unwrap();401402leaves.reserve(array.len().saturating_sub(1));403array404.values()405.iter()406.rev()407.for_each(|field| array_stack.push((field.as_ref(), validity.clone())));408},409P::List => {410let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();411expand_list_validity(array, validity, &mut array_stack);412},413P::LargeList => {414let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();415expand_list_validity(array, validity, &mut array_stack);416},417P::FixedSizeList => {418let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();419420let BitmapState::SomeSet(fsl_validity) = validity else {421array_stack.push((422array.values().as_ref(),423match validity {424BitmapState::AllSet => BitmapState::AllSet,425BitmapState::SomeSet(_) => unreachable!(),426BitmapState::AllUnset(_) => BitmapState::AllUnset(array.values().len()),427},428));429continue;430};431432let num_values = array.values().len();433let size = array.size();434435let mut validity = MutableBitmap::with_capacity(num_values);436let mut fsl_validity_iter = fsl_validity.iter();437438let mut idx = 0;439while fsl_validity_iter.num_remaining() > 0 {440let num_ones = fsl_validity_iter.take_leading_ones();441let num_elements = num_ones * size;442validity.extend_constant(num_elements, true);443444idx += num_ones;445446let num_zeros = fsl_validity_iter.take_leading_zeros();447let num_elements = num_zeros * size;448validity.extend_constant(num_elements, false);449450idx += num_zeros;451}452453debug_assert_eq!(idx, array.len());454455let validity = BitmapState::SomeSet(validity.freeze());456457array_stack.push((array.values().as_ref(), validity));458},459P::Map => {460let array = array.as_any().downcast_ref::<MapArray>().unwrap();461array_stack.push((array.field().as_ref(), validity));462},463P::Null464| P::Boolean465| P::Primitive(_)466| P::Binary467| P::FixedSizeBinary468| P::LargeBinary469| P::Utf8470| P::LargeUtf8471| P::Dictionary(_)472| P::BinaryView473| P::Utf8View => {474leaves.push(array.with_validity(validity.into()));475},476477other => todo!("Writing {:?} to parquet not yet implemented", other),478}479}480}481482/// Convert `ParquetType` to `Vec<ParquetPrimitiveType>` leaves in DFS order.483pub fn to_parquet_leaves(type_: ParquetType) -> Vec<ParquetPrimitiveType> {484let mut leaves = vec![];485to_parquet_leaves_recursive(type_, &mut leaves);486leaves487}488489fn to_parquet_leaves_recursive(type_: ParquetType, leaves: &mut Vec<ParquetPrimitiveType>) {490match type_ {491ParquetType::PrimitiveType(primitive) => leaves.push(primitive),492ParquetType::GroupType { fields, .. } => {493fields494.into_iter()495.for_each(|type_| to_parquet_leaves_recursive(type_, leaves));496},497}498}499500/// Returns a vector of iterators of [`Page`], one per leaf column in the array501pub fn array_to_columns<A: AsRef<dyn Array> + Send + Sync>(502array: A,503type_: ParquetType,504column_options: &ColumnWriteOptions,505options: WriteOptions,506) -> PolarsResult<Vec<DynIter<'static, PolarsResult<Page>>>> {507let array = array.as_ref();508509let nested = to_nested(array, &type_)?;510let types = to_parquet_leaves(type_);511512let mut values = Vec::new();513to_leaves(array, &mut values);514515let mut field_options = Vec::with_capacity(types.len());516column_options.to_leaves(&mut field_options);517518assert_eq!(field_options.len(), types.len());519520values521.iter()522.zip(nested)523.zip(types)524.zip(field_options)525.map(|(((values, nested), type_), field_options)| {526array_to_pages(values.as_ref(), type_, &nested, options, field_options)527})528.collect()529}530531pub fn arrays_to_columns<A: AsRef<dyn Array> + Send + Sync>(532arrays: &[A],533type_: ParquetType,534options: WriteOptions,535column_options: &ColumnWriteOptions,536) -> PolarsResult<Vec<DynIter<'static, PolarsResult<Page>>>> {537let array = arrays[0].as_ref();538let nested = to_nested(array, &type_)?;539540let types = to_parquet_leaves(type_);541542let mut field_options = Vec::with_capacity(types.len());543column_options.to_leaves(&mut field_options);544545// leaves; index level is nesting depth.546// index i: has a vec because we have multiple chunks.547let mut leaves = vec![];548549// Ensure we transpose the leaves. So that all the leaves from the same columns are at the same level vec.550let mut scratch = vec![];551for arr in arrays {552to_leaves(arr.as_ref(), &mut scratch);553for (i, leave) in std::mem::take(&mut scratch).into_iter().enumerate() {554while i < leaves.len() {555leaves.push(vec![]);556}557leaves[i].push(leave);558}559}560561leaves562.into_iter()563.zip(nested)564.zip(types)565.zip(field_options)566.map(move |(((values, nested), type_), column_options)| {567let iter = values.into_iter().map(|leave_values| {568array_to_pages(569leave_values.as_ref(),570type_.clone(),571&nested,572options,573column_options,574)575});576577// Need a scratch to bubble up the error :/578let mut scratch = Vec::with_capacity(iter.size_hint().0);579for v in iter {580scratch.push(v?)581}582Ok(DynIter::new(scratch.into_iter().flatten()))583})584.collect::<PolarsResult<Vec<_>>>()585}586587#[cfg(test)]588mod tests {589use arrow::array::*;590use arrow::datatypes::*;591592use super::super::{FieldInfo, ParquetPhysicalType};593use super::*;594use crate::parquet::schema::Repetition;595use crate::parquet::schema::types::{596GroupLogicalType, PrimitiveConvertedType, PrimitiveLogicalType,597};598599#[test]600fn test_struct() {601let boolean = BooleanArray::from_slice([false, false, true, true]).boxed();602let int = Int32Array::from_slice([42, 28, 19, 31]).boxed();603604let fields = vec![605Field::new("b".into(), ArrowDataType::Boolean, false),606Field::new("c".into(), ArrowDataType::Int32, false),607];608609let array = StructArray::new(610ArrowDataType::Struct(fields),6114,612vec![boolean.clone(), int.clone()],613Some(Bitmap::from([true, true, false, true])),614);615616let type_ = ParquetType::GroupType {617field_info: FieldInfo {618name: "a".into(),619repetition: Repetition::Optional,620id: None,621},622logical_type: None,623converted_type: None,624fields: vec![625ParquetType::PrimitiveType(ParquetPrimitiveType {626field_info: FieldInfo {627name: "b".into(),628repetition: Repetition::Required,629id: None,630},631logical_type: None,632converted_type: None,633physical_type: ParquetPhysicalType::Boolean,634}),635ParquetType::PrimitiveType(ParquetPrimitiveType {636field_info: FieldInfo {637name: "c".into(),638repetition: Repetition::Required,639id: None,640},641logical_type: None,642converted_type: None,643physical_type: ParquetPhysicalType::Int32,644}),645],646};647let a = to_nested(&array, &type_).unwrap();648649assert_eq!(650a,651vec![652vec![653Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),654Nested::primitive(None, false, 4),655],656vec![657Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),658Nested::primitive(None, false, 4),659],660]661);662}663664#[test]665fn test_struct_struct() {666let boolean = BooleanArray::from_slice([false, false, true, true]).boxed();667let int = Int32Array::from_slice([42, 28, 19, 31]).boxed();668669let fields = vec![670Field::new("b".into(), ArrowDataType::Boolean, false),671Field::new("c".into(), ArrowDataType::Int32, false),672];673674let array = StructArray::new(675ArrowDataType::Struct(fields),6764,677vec![boolean.clone(), int.clone()],678Some(Bitmap::from([true, true, false, true])),679);680681let fields = vec![682Field::new("b".into(), array.dtype().clone(), true),683Field::new("c".into(), array.dtype().clone(), true),684];685686let array = StructArray::new(687ArrowDataType::Struct(fields),6884,689vec![Box::new(array.clone()), Box::new(array)],690None,691);692693let type_ = ParquetType::GroupType {694field_info: FieldInfo {695name: "a".into(),696repetition: Repetition::Optional,697id: None,698},699logical_type: None,700converted_type: None,701fields: vec![702ParquetType::PrimitiveType(ParquetPrimitiveType {703field_info: FieldInfo {704name: "b".into(),705repetition: Repetition::Required,706id: None,707},708logical_type: None,709converted_type: None,710physical_type: ParquetPhysicalType::Boolean,711}),712ParquetType::PrimitiveType(ParquetPrimitiveType {713field_info: FieldInfo {714name: "c".into(),715repetition: Repetition::Required,716id: None,717},718logical_type: None,719converted_type: None,720physical_type: ParquetPhysicalType::Int32,721}),722],723};724725let type_ = ParquetType::GroupType {726field_info: FieldInfo {727name: "a".into(),728repetition: Repetition::Required,729id: None,730},731logical_type: None,732converted_type: None,733fields: vec![type_.clone(), type_],734};735736let a = to_nested(&array, &type_).unwrap();737738assert_eq!(739a,740vec![741// a.b.b742vec![743Nested::structure(None, false, 4),744Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),745Nested::primitive(None, false, 4),746],747// a.b.c748vec![749Nested::structure(None, false, 4),750Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),751Nested::primitive(None, false, 4),752],753// a.c.b754vec![755Nested::structure(None, false, 4),756Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),757Nested::primitive(None, false, 4),758],759// a.c.c760vec![761Nested::structure(None, false, 4),762Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),763Nested::primitive(None, false, 4),764],765]766);767}768769#[test]770fn test_list_struct() {771let boolean = BooleanArray::from_slice([false, false, true, true]).boxed();772let int = Int32Array::from_slice([42, 28, 19, 31]).boxed();773774let fields = vec![775Field::new("b".into(), ArrowDataType::Boolean, false),776Field::new("c".into(), ArrowDataType::Int32, false),777];778779let array = StructArray::new(780ArrowDataType::Struct(fields),7814,782vec![boolean.clone(), int.clone()],783Some(Bitmap::from([true, true, false, true])),784);785786let array = ListArray::new(787ArrowDataType::List(Box::new(Field::new(788"l".into(),789array.dtype().clone(),790true,791))),792vec![0i32, 2, 4].try_into().unwrap(),793Box::new(array),794None,795);796797let type_ = ParquetType::GroupType {798field_info: FieldInfo {799name: "a".into(),800repetition: Repetition::Optional,801id: None,802},803logical_type: None,804converted_type: None,805fields: vec![806ParquetType::PrimitiveType(ParquetPrimitiveType {807field_info: FieldInfo {808name: "b".into(),809repetition: Repetition::Required,810id: None,811},812logical_type: None,813converted_type: None,814physical_type: ParquetPhysicalType::Boolean,815}),816ParquetType::PrimitiveType(ParquetPrimitiveType {817field_info: FieldInfo {818name: "c".into(),819repetition: Repetition::Required,820id: None,821},822logical_type: None,823converted_type: None,824physical_type: ParquetPhysicalType::Int32,825}),826],827};828829let type_ = ParquetType::GroupType {830field_info: FieldInfo {831name: "l".into(),832repetition: Repetition::Required,833id: None,834},835logical_type: None,836converted_type: None,837fields: vec![ParquetType::GroupType {838field_info: FieldInfo {839name: "list".into(),840repetition: Repetition::Repeated,841id: None,842},843logical_type: None,844converted_type: None,845fields: vec![type_],846}],847};848849let a = to_nested(&array, &type_).unwrap();850851assert_eq!(852a,853vec![854vec![855Nested::List(ListNested::<i32> {856is_optional: false,857offsets: vec![0, 2, 4].try_into().unwrap(),858validity: None,859}),860Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),861Nested::primitive(None, false, 4),862],863vec![864Nested::List(ListNested::<i32> {865is_optional: false,866offsets: vec![0, 2, 4].try_into().unwrap(),867validity: None,868}),869Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),870Nested::primitive(None, false, 4),871],872]873);874}875876#[test]877fn test_map() {878let kv_type = ArrowDataType::Struct(vec![879Field::new("k".into(), ArrowDataType::Utf8, false),880Field::new("v".into(), ArrowDataType::Int32, false),881]);882let kv_field = Field::new("kv".into(), kv_type.clone(), false);883let map_type = ArrowDataType::Map(Box::new(kv_field), false);884885let key_array = Utf8Array::<i32>::from_slice(["k1", "k2", "k3", "k4", "k5", "k6"]).boxed();886let val_array = Int32Array::from_slice([42, 28, 19, 31, 21, 17]).boxed();887let kv_array = StructArray::try_new(kv_type, 6, vec![key_array, val_array], None)888.unwrap()889.boxed();890let offsets = OffsetsBuffer::try_from(vec![0, 2, 3, 4, 6]).unwrap();891892let array = MapArray::try_new(map_type, offsets, kv_array, None).unwrap();893894let type_ = ParquetType::GroupType {895field_info: FieldInfo {896name: "kv".into(),897repetition: Repetition::Optional,898id: None,899},900logical_type: None,901converted_type: None,902fields: vec![903ParquetType::PrimitiveType(ParquetPrimitiveType {904field_info: FieldInfo {905name: "k".into(),906repetition: Repetition::Required,907id: None,908},909logical_type: Some(PrimitiveLogicalType::String),910converted_type: Some(PrimitiveConvertedType::Utf8),911physical_type: ParquetPhysicalType::ByteArray,912}),913ParquetType::PrimitiveType(ParquetPrimitiveType {914field_info: FieldInfo {915name: "v".into(),916repetition: Repetition::Required,917id: None,918},919logical_type: None,920converted_type: None,921physical_type: ParquetPhysicalType::Int32,922}),923],924};925926let type_ = ParquetType::GroupType {927field_info: FieldInfo {928name: "m".into(),929repetition: Repetition::Required,930id: None,931},932logical_type: Some(GroupLogicalType::Map),933converted_type: None,934fields: vec![ParquetType::GroupType {935field_info: FieldInfo {936name: "map".into(),937repetition: Repetition::Repeated,938id: None,939},940logical_type: None,941converted_type: None,942fields: vec![type_],943}],944};945946let a = to_nested(&array, &type_).unwrap();947948assert_eq!(949a,950vec![951vec![952Nested::List(ListNested::<i32> {953is_optional: false,954offsets: vec![0, 2, 3, 4, 6].try_into().unwrap(),955validity: None,956}),957Nested::structure(None, true, 6),958Nested::primitive(None, false, 6),959],960vec![961Nested::List(ListNested::<i32> {962is_optional: false,963offsets: vec![0, 2, 3, 4, 6].try_into().unwrap(),964validity: None,965}),966Nested::structure(None, true, 6),967Nested::primitive(None, false, 6),968],969]970);971}972}973974975