Path: blob/main/crates/polars-parquet/src/arrow/write/pages.rs
8480 views
use std::fmt::Debug;12use arrow::array::{Array, FixedSizeListArray, ListArray, MapArray, StructArray};3use arrow::bitmap::{Bitmap, MutableBitmap};4use arrow::datatypes::{ArrowDataType, PhysicalType};5use arrow::offset::{Offset, OffsetsBuffer};6use polars_error::{PolarsResult, polars_bail};78use super::{Encoding, WriteOptions, array_to_pages};9use crate::arrow::read::schema::is_nullable;10use crate::parquet::page::Page;11use crate::parquet::schema::types::{ParquetType, PrimitiveType as ParquetPrimitiveType};12use crate::write::DynIter;1314#[derive(Debug, Clone, PartialEq)]15pub struct PrimitiveNested {16pub is_optional: bool,17pub validity: Option<Bitmap>,18pub length: usize,19}2021#[derive(Debug, Clone, PartialEq)]22pub struct ListNested<O: Offset> {23pub is_optional: bool,24pub offsets: OffsetsBuffer<O>,25pub validity: Option<Bitmap>,26}2728#[derive(Debug, Clone, PartialEq)]29pub struct FixedSizeListNested {30pub validity: Option<Bitmap>,31pub is_optional: bool,32pub width: usize,33pub length: usize,34}3536#[derive(Debug, Clone, PartialEq)]37pub struct StructNested {38pub is_optional: bool,39pub validity: Option<Bitmap>,40pub length: usize,41}4243impl<O: Offset> ListNested<O> {44pub fn new(offsets: OffsetsBuffer<O>, validity: Option<Bitmap>, is_optional: bool) -> Self {45Self {46is_optional,47offsets,48validity,49}50}51}5253/// Descriptor of nested information of a field54#[derive(Debug, Clone, PartialEq)]55pub enum Nested {56/// a primitive (leaf or parquet column)57Primitive(PrimitiveNested),58List(ListNested<i32>),59LargeList(ListNested<i64>),60FixedSizeList(FixedSizeListNested),61Struct(StructNested),62}6364impl Nested {65/// Returns the length (number of rows) of the element66pub fn len(&self) -> usize {67match self {68Nested::Primitive(nested) => nested.length,69Nested::List(nested) => nested.offsets.len_proxy(),70Nested::LargeList(nested) => nested.offsets.len_proxy(),71Nested::FixedSizeList(nested) => nested.length,72Nested::Struct(nested) => nested.length,73}74}7576pub fn primitive(validity: Option<Bitmap>, is_optional: bool, length: usize) -> Self {77Self::Primitive(PrimitiveNested {78validity,79is_optional,80length,81})82}8384pub fn list(validity: Option<Bitmap>, is_optional: bool, offsets: OffsetsBuffer<i32>) -> Self {85Self::List(ListNested {86validity,87is_optional,88offsets,89})90}9192pub fn large_list(93validity: Option<Bitmap>,94is_optional: bool,95offsets: OffsetsBuffer<i64>,96) -> Self {97Self::LargeList(ListNested {98validity,99is_optional,100offsets,101})102}103104pub fn fixed_size_list(105validity: Option<Bitmap>,106is_optional: bool,107width: usize,108length: usize,109) -> Self {110Self::FixedSizeList(FixedSizeListNested {111validity,112is_optional,113width,114length,115})116}117118pub fn structure(validity: Option<Bitmap>, is_optional: bool, length: usize) -> Self {119Self::Struct(StructNested {120validity,121is_optional,122length,123})124}125}126127/// Constructs the necessary `Vec<Vec<Nested>>` to write the rep and def levels of `array` to parquet128pub fn to_nested(array: &dyn Array, type_: &ParquetType) -> PolarsResult<Vec<Vec<Nested>>> {129let mut nested = vec![];130131to_nested_recursive(array, type_, &mut nested, vec![])?;132Ok(nested)133}134135fn to_nested_recursive(136array: &dyn Array,137type_: &ParquetType,138nested: &mut Vec<Vec<Nested>>,139mut parents: Vec<Nested>,140) -> PolarsResult<()> {141let is_optional = is_nullable(type_.get_field_info());142143if !is_optional && array.null_count() > 0 {144polars_bail!(InvalidOperation: "writing a missing value to required field '{}'", type_.name());145}146147use PhysicalType::*;148match array.dtype().to_physical_type() {149Struct => {150let array = array.as_any().downcast_ref::<StructArray>().unwrap();151let fields = if let ParquetType::GroupType { fields, .. } = type_ {152fields153} else {154// @NOTE: Support empty struct by mapping to Boolean array.155if let ArrowDataType::Struct(fs) = array.dtype()156&& fs.is_empty()157{158parents.push(Nested::Primitive(PrimitiveNested {159validity: array.validity().cloned(),160is_optional,161length: array.len(),162}));163nested.push(parents);164return Ok(());165}166167polars_bail!(InvalidOperation:168"Parquet type must be a group for a struct array",169)170};171172parents.push(Nested::Struct(StructNested {173is_optional,174validity: array.validity().cloned(),175length: array.len(),176}));177178for (type_, array) in fields.iter().zip(array.values()) {179to_nested_recursive(array.as_ref(), type_, nested, parents.clone())?;180}181},182FixedSizeList => {183let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();184let type_ = if let ParquetType::GroupType { fields, .. } = type_ {185if let ParquetType::GroupType { fields, .. } = &fields[0] {186&fields[0]187} else {188polars_bail!(InvalidOperation:189"Parquet type must be a group for a list array",190)191}192} else {193polars_bail!(InvalidOperation:194"Parquet type must be a group for a list array",195)196};197198parents.push(Nested::FixedSizeList(FixedSizeListNested {199validity: array.validity().cloned(),200length: array.len(),201width: array.size(),202is_optional,203}));204to_nested_recursive(array.values().as_ref(), type_, nested, parents)?;205},206List => {207let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();208let type_ = if let ParquetType::GroupType { fields, .. } = type_ {209if let ParquetType::GroupType { fields, .. } = &fields[0] {210&fields[0]211} else {212polars_bail!(InvalidOperation:213"Parquet type must be a group for a list array",214)215}216} else {217polars_bail!(InvalidOperation:218"Parquet type must be a group for a list array",219)220};221222parents.push(Nested::List(ListNested::new(223array.offsets().clone(),224array.validity().cloned(),225is_optional,226)));227to_nested_recursive(array.values().as_ref(), type_, nested, parents)?;228},229LargeList => {230let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();231let type_ = if let ParquetType::GroupType { fields, .. } = type_ {232if let ParquetType::GroupType { fields, .. } = &fields[0] {233&fields[0]234} else {235polars_bail!(InvalidOperation:236"Parquet type must be a group for a list array",237)238}239} else {240polars_bail!(InvalidOperation:241"Parquet type must be a group for a list array",242)243};244245parents.push(Nested::LargeList(ListNested::new(246array.offsets().clone(),247array.validity().cloned(),248is_optional,249)));250to_nested_recursive(array.values().as_ref(), type_, nested, parents)?;251},252Map => {253let array = array.as_any().downcast_ref::<MapArray>().unwrap();254let type_ = if let ParquetType::GroupType { fields, .. } = type_ {255if let ParquetType::GroupType { fields, .. } = &fields[0] {256&fields[0]257} else {258polars_bail!(InvalidOperation:259"Parquet type must be a group for a map array",260)261}262} else {263polars_bail!(InvalidOperation:264"Parquet type must be a group for a map array",265)266};267268parents.push(Nested::List(ListNested::new(269array.offsets().clone(),270array.validity().cloned(),271is_optional,272)));273to_nested_recursive(array.field().as_ref(), type_, nested, parents)?;274},275_ => {276parents.push(Nested::Primitive(PrimitiveNested {277validity: array.validity().cloned(),278is_optional,279length: array.len(),280}));281nested.push(parents)282},283}284Ok(())285}286287fn expand_list_validity<'a, O: Offset>(288array: &'a ListArray<O>,289validity: BitmapState,290array_stack: &mut Vec<(&'a dyn Array, BitmapState)>,291) {292let BitmapState::SomeSet(list_validity) = validity else {293array_stack.push((294array.values().as_ref(),295match validity {296BitmapState::AllSet => BitmapState::AllSet,297BitmapState::SomeSet(_) => unreachable!(),298BitmapState::AllUnset(_) => BitmapState::AllUnset(array.values().len()),299},300));301return;302};303304let offsets = array.offsets().buffer();305let mut validity = MutableBitmap::with_capacity(array.values().len());306let mut list_validity_iter = list_validity.iter();307308// @NOTE: We need to take into account here that the list might only point to a slice of the309// values, therefore we need to extend the validity mask with dummy values to match the length310// of the values array.311312let mut idx = 0;313validity.extend_constant(offsets[0].to_usize(), false);314while list_validity_iter.num_remaining() > 0 {315let num_ones = list_validity_iter.take_leading_ones();316let num_elements = offsets[idx + num_ones] - offsets[idx];317validity.extend_constant(num_elements.to_usize(), true);318319idx += num_ones;320321let num_zeros = list_validity_iter.take_leading_zeros();322let num_elements = offsets[idx + num_zeros] - offsets[idx];323validity.extend_constant(num_elements.to_usize(), false);324325idx += num_zeros;326}327validity.extend_constant(array.values().len() - validity.len(), false);328329debug_assert_eq!(idx, array.len());330let validity = validity.freeze();331332debug_assert_eq!(validity.len(), array.values().len());333array_stack.push((array.values().as_ref(), BitmapState::SomeSet(validity)));334}335336#[derive(Clone)]337enum BitmapState {338AllSet,339SomeSet(Bitmap),340AllUnset(usize),341}342343impl From<Option<&Bitmap>> for BitmapState {344fn from(bm: Option<&Bitmap>) -> Self {345let Some(bm) = bm else {346return Self::AllSet;347};348349let null_count = bm.unset_bits();350351if null_count == 0 {352Self::AllSet353} else if null_count == bm.len() {354Self::AllUnset(bm.len())355} else {356Self::SomeSet(bm.clone())357}358}359}360361impl From<BitmapState> for Option<Bitmap> {362fn from(bms: BitmapState) -> Self {363match bms {364BitmapState::AllSet => None,365BitmapState::SomeSet(bm) => Some(bm),366BitmapState::AllUnset(len) => Some(Bitmap::new_zeroed(len)),367}368}369}370371impl std::ops::BitAnd for &BitmapState {372type Output = BitmapState;373374fn bitand(self, rhs: Self) -> Self::Output {375use BitmapState as B;376match (self, rhs) {377(B::AllSet, B::AllSet) => B::AllSet,378(B::AllSet, B::SomeSet(v)) | (B::SomeSet(v), B::AllSet) => B::SomeSet(v.clone()),379(B::SomeSet(lhs), B::SomeSet(rhs)) => {380let result = lhs & rhs;381let null_count = result.unset_bits();382383if null_count == 0 {384B::AllSet385} else if null_count == result.len() {386B::AllUnset(result.len())387} else {388B::SomeSet(result)389}390},391(B::AllUnset(len), _) | (_, B::AllUnset(len)) => B::AllUnset(*len),392}393}394}395396/// Convert [`Array`] to a `Vec<Box<dyn Array>>` leaves in DFS order.397///398/// Each leaf array has the validity propagated from the nesting levels above.399pub fn to_leaves(array: &dyn Array, leaves: &mut Vec<Box<dyn Array>>) {400use PhysicalType as P;401402leaves.clear();403let mut array_stack: Vec<(&dyn Array, BitmapState)> = Vec::new();404405array_stack.push((array, BitmapState::AllSet));406407while let Some((array, inherited_validity)) = array_stack.pop() {408let child_validity = BitmapState::from(array.validity());409let validity = (&child_validity) & (&inherited_validity);410411match array.dtype().to_physical_type() {412P::Struct if !matches!(array.dtype(), ArrowDataType::Struct(fs) if fs.is_empty()) => {413let array = array.as_any().downcast_ref::<StructArray>().unwrap();414415leaves.reserve(array.len().saturating_sub(1));416array417.values()418.iter()419.rev()420.for_each(|field| array_stack.push((field.as_ref(), validity.clone())));421},422P::List => {423let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();424expand_list_validity(array, validity, &mut array_stack);425},426P::LargeList => {427let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();428expand_list_validity(array, validity, &mut array_stack);429},430P::FixedSizeList => {431let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();432433let BitmapState::SomeSet(fsl_validity) = validity else {434array_stack.push((435array.values().as_ref(),436match validity {437BitmapState::AllSet => BitmapState::AllSet,438BitmapState::SomeSet(_) => unreachable!(),439BitmapState::AllUnset(_) => BitmapState::AllUnset(array.values().len()),440},441));442continue;443};444445let num_values = array.values().len();446let size = array.size();447448let mut validity = MutableBitmap::with_capacity(num_values);449let mut fsl_validity_iter = fsl_validity.iter();450451let mut idx = 0;452while fsl_validity_iter.num_remaining() > 0 {453let num_ones = fsl_validity_iter.take_leading_ones();454let num_elements = num_ones * size;455validity.extend_constant(num_elements, true);456457idx += num_ones;458459let num_zeros = fsl_validity_iter.take_leading_zeros();460let num_elements = num_zeros * size;461validity.extend_constant(num_elements, false);462463idx += num_zeros;464}465466debug_assert_eq!(idx, array.len());467468let validity = BitmapState::SomeSet(validity.freeze());469470array_stack.push((array.values().as_ref(), validity));471},472P::Map => {473let array = array.as_any().downcast_ref::<MapArray>().unwrap();474array_stack.push((array.field().as_ref(), validity));475},476P::Null477| P::Boolean478| P::Primitive(_)479| P::Binary480| P::FixedSizeBinary481| P::LargeBinary482| P::Utf8483| P::LargeUtf8484| P::Dictionary(_)485| P::BinaryView486| P::Utf8View487| P::Struct => {488leaves.push(array.with_validity(validity.into()));489},490491other => todo!("Writing {:?} to parquet not yet implemented", other),492}493}494}495496/// Convert `ParquetType` to `Vec<ParquetPrimitiveType>` leaves in DFS order.497pub fn to_parquet_leaves(type_: ParquetType) -> Vec<ParquetPrimitiveType> {498let mut leaves = vec![];499to_parquet_leaves_recursive(type_, &mut leaves);500leaves501}502503fn to_parquet_leaves_recursive(type_: ParquetType, leaves: &mut Vec<ParquetPrimitiveType>) {504match type_ {505ParquetType::PrimitiveType(primitive) => leaves.push(primitive),506ParquetType::GroupType { fields, .. } => {507fields508.into_iter()509.for_each(|type_| to_parquet_leaves_recursive(type_, leaves));510},511}512}513514/// Returns a vector of iterators of [`Page`], one per leaf column in the array515pub fn array_to_columns<A: AsRef<dyn Array> + Send + Sync>(516array: A,517type_: ParquetType,518options: WriteOptions,519encoding: &[Encoding],520) -> PolarsResult<Vec<DynIter<'static, PolarsResult<Page>>>> {521let array = array.as_ref();522523let nested = to_nested(array, &type_)?;524525let types = to_parquet_leaves(type_);526527let mut values = Vec::new();528to_leaves(array, &mut values);529530assert_eq!(encoding.len(), types.len());531532let x = values533.iter()534.zip(nested)535.zip(types)536.zip(encoding.iter())537.map(|(((values, nested), type_), encoding)| {538array_to_pages(values.as_ref(), type_, &nested, options, *encoding)539})540.collect::<PolarsResult<Vec<DynIter<'static, PolarsResult<Page>>>>>()?;541Ok(x)542}543544pub fn arrays_to_columns<A: AsRef<dyn Array> + Send + Sync>(545arrays: &[A],546type_: ParquetType,547options: WriteOptions,548encoding: &[Encoding],549) -> PolarsResult<Vec<DynIter<'static, PolarsResult<Page>>>> {550let array = arrays[0].as_ref();551let nested = to_nested(array, &type_)?;552553let types = to_parquet_leaves(type_);554555// leaves; index level is nesting depth.556// index i: has a vec because we have multiple chunks.557let mut leaves = vec![];558559// Ensure we transpose the leaves. So that all the leaves from the same columns are at the same level vec.560let mut scratch = vec![];561for arr in arrays {562to_leaves(arr.as_ref(), &mut scratch);563for (i, leave) in std::mem::take(&mut scratch).into_iter().enumerate() {564while i < leaves.len() {565leaves.push(vec![]);566}567leaves[i].push(leave);568}569}570571leaves572.into_iter()573.zip(nested)574.zip(types)575.zip(encoding.iter())576.map(move |(((values, nested), type_), encoding)| {577let iter = values.into_iter().map(|leave_values| {578array_to_pages(579leave_values.as_ref(),580type_.clone(),581&nested,582options,583*encoding,584)585});586587// Need a scratch to bubble up the error :/588let mut scratch = Vec::with_capacity(iter.size_hint().0);589for v in iter {590scratch.push(v?)591}592Ok(DynIter::new(scratch.into_iter().flatten()))593})594.collect::<PolarsResult<Vec<_>>>()595}596597#[cfg(test)]598mod tests {599use arrow::array::*;600use arrow::datatypes::*;601602use super::super::{FieldInfo, ParquetPhysicalType};603use super::*;604use crate::parquet::schema::Repetition;605use crate::parquet::schema::types::{606GroupLogicalType, PrimitiveConvertedType, PrimitiveLogicalType,607};608609#[test]610fn test_struct() {611let boolean = BooleanArray::from_slice([false, false, true, true]).boxed();612let int = Int32Array::from_slice([42, 28, 19, 31]).boxed();613614let fields = vec![615Field::new("b".into(), ArrowDataType::Boolean, false),616Field::new("c".into(), ArrowDataType::Int32, false),617];618619let array = StructArray::new(620ArrowDataType::Struct(fields),6214,622vec![boolean.clone(), int.clone()],623Some(Bitmap::from([true, true, false, true])),624);625626let type_ = ParquetType::GroupType {627field_info: FieldInfo {628name: "a".into(),629repetition: Repetition::Optional,630id: None,631},632logical_type: None,633converted_type: None,634fields: vec![635ParquetType::PrimitiveType(ParquetPrimitiveType {636field_info: FieldInfo {637name: "b".into(),638repetition: Repetition::Required,639id: None,640},641logical_type: None,642converted_type: None,643physical_type: ParquetPhysicalType::Boolean,644}),645ParquetType::PrimitiveType(ParquetPrimitiveType {646field_info: FieldInfo {647name: "c".into(),648repetition: Repetition::Required,649id: None,650},651logical_type: None,652converted_type: None,653physical_type: ParquetPhysicalType::Int32,654}),655],656};657let a = to_nested(&array, &type_).unwrap();658659assert_eq!(660a,661vec![662vec![663Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),664Nested::primitive(None, false, 4),665],666vec![667Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),668Nested::primitive(None, false, 4),669],670]671);672}673674#[test]675fn test_struct_struct() {676let boolean = BooleanArray::from_slice([false, false, true, true]).boxed();677let int = Int32Array::from_slice([42, 28, 19, 31]).boxed();678679let fields = vec![680Field::new("b".into(), ArrowDataType::Boolean, false),681Field::new("c".into(), ArrowDataType::Int32, false),682];683684let array = StructArray::new(685ArrowDataType::Struct(fields),6864,687vec![boolean.clone(), int.clone()],688Some(Bitmap::from([true, true, false, true])),689);690691let fields = vec![692Field::new("b".into(), array.dtype().clone(), true),693Field::new("c".into(), array.dtype().clone(), true),694];695696let array = StructArray::new(697ArrowDataType::Struct(fields),6984,699vec![Box::new(array.clone()), Box::new(array)],700None,701);702703let type_ = ParquetType::GroupType {704field_info: FieldInfo {705name: "a".into(),706repetition: Repetition::Optional,707id: None,708},709logical_type: None,710converted_type: None,711fields: vec![712ParquetType::PrimitiveType(ParquetPrimitiveType {713field_info: FieldInfo {714name: "b".into(),715repetition: Repetition::Required,716id: None,717},718logical_type: None,719converted_type: None,720physical_type: ParquetPhysicalType::Boolean,721}),722ParquetType::PrimitiveType(ParquetPrimitiveType {723field_info: FieldInfo {724name: "c".into(),725repetition: Repetition::Required,726id: None,727},728logical_type: None,729converted_type: None,730physical_type: ParquetPhysicalType::Int32,731}),732],733};734735let type_ = ParquetType::GroupType {736field_info: FieldInfo {737name: "a".into(),738repetition: Repetition::Required,739id: None,740},741logical_type: None,742converted_type: None,743fields: vec![type_.clone(), type_],744};745746let a = to_nested(&array, &type_).unwrap();747748assert_eq!(749a,750vec![751// a.b.b752vec![753Nested::structure(None, false, 4),754Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),755Nested::primitive(None, false, 4),756],757// a.b.c758vec![759Nested::structure(None, false, 4),760Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),761Nested::primitive(None, false, 4),762],763// a.c.b764vec![765Nested::structure(None, false, 4),766Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),767Nested::primitive(None, false, 4),768],769// a.c.c770vec![771Nested::structure(None, false, 4),772Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),773Nested::primitive(None, false, 4),774],775]776);777}778779#[test]780fn test_list_struct() {781let boolean = BooleanArray::from_slice([false, false, true, true]).boxed();782let int = Int32Array::from_slice([42, 28, 19, 31]).boxed();783784let fields = vec![785Field::new("b".into(), ArrowDataType::Boolean, false),786Field::new("c".into(), ArrowDataType::Int32, false),787];788789let array = StructArray::new(790ArrowDataType::Struct(fields),7914,792vec![boolean.clone(), int.clone()],793Some(Bitmap::from([true, true, false, true])),794);795796let array = ListArray::new(797ArrowDataType::List(Box::new(Field::new(798"l".into(),799array.dtype().clone(),800true,801))),802vec![0i32, 2, 4].try_into().unwrap(),803Box::new(array),804None,805);806807let type_ = ParquetType::GroupType {808field_info: FieldInfo {809name: "a".into(),810repetition: Repetition::Optional,811id: None,812},813logical_type: None,814converted_type: None,815fields: vec![816ParquetType::PrimitiveType(ParquetPrimitiveType {817field_info: FieldInfo {818name: "b".into(),819repetition: Repetition::Required,820id: None,821},822logical_type: None,823converted_type: None,824physical_type: ParquetPhysicalType::Boolean,825}),826ParquetType::PrimitiveType(ParquetPrimitiveType {827field_info: FieldInfo {828name: "c".into(),829repetition: Repetition::Required,830id: None,831},832logical_type: None,833converted_type: None,834physical_type: ParquetPhysicalType::Int32,835}),836],837};838839let type_ = ParquetType::GroupType {840field_info: FieldInfo {841name: "l".into(),842repetition: Repetition::Required,843id: None,844},845logical_type: None,846converted_type: None,847fields: vec![ParquetType::GroupType {848field_info: FieldInfo {849name: "list".into(),850repetition: Repetition::Repeated,851id: None,852},853logical_type: None,854converted_type: None,855fields: vec![type_],856}],857};858859let a = to_nested(&array, &type_).unwrap();860861assert_eq!(862a,863vec![864vec![865Nested::List(ListNested::<i32> {866is_optional: false,867offsets: vec![0, 2, 4].try_into().unwrap(),868validity: None,869}),870Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),871Nested::primitive(None, false, 4),872],873vec![874Nested::List(ListNested::<i32> {875is_optional: false,876offsets: vec![0, 2, 4].try_into().unwrap(),877validity: None,878}),879Nested::structure(Some(Bitmap::from([true, true, false, true])), true, 4),880Nested::primitive(None, false, 4),881],882]883);884}885886#[test]887fn test_map() {888let kv_type = ArrowDataType::Struct(vec![889Field::new("k".into(), ArrowDataType::Utf8, false),890Field::new("v".into(), ArrowDataType::Int32, false),891]);892let kv_field = Field::new("kv".into(), kv_type.clone(), false);893let map_type = ArrowDataType::Map(Box::new(kv_field), false);894895let key_array = Utf8Array::<i32>::from_slice(["k1", "k2", "k3", "k4", "k5", "k6"]).boxed();896let val_array = Int32Array::from_slice([42, 28, 19, 31, 21, 17]).boxed();897let kv_array = StructArray::try_new(kv_type, 6, vec![key_array, val_array], None)898.unwrap()899.boxed();900let offsets = OffsetsBuffer::try_from(vec![0, 2, 3, 4, 6]).unwrap();901902let array = MapArray::try_new(map_type, offsets, kv_array, None).unwrap();903904let type_ = ParquetType::GroupType {905field_info: FieldInfo {906name: "kv".into(),907repetition: Repetition::Optional,908id: None,909},910logical_type: None,911converted_type: None,912fields: vec![913ParquetType::PrimitiveType(ParquetPrimitiveType {914field_info: FieldInfo {915name: "k".into(),916repetition: Repetition::Required,917id: None,918},919logical_type: Some(PrimitiveLogicalType::String),920converted_type: Some(PrimitiveConvertedType::Utf8),921physical_type: ParquetPhysicalType::ByteArray,922}),923ParquetType::PrimitiveType(ParquetPrimitiveType {924field_info: FieldInfo {925name: "v".into(),926repetition: Repetition::Required,927id: None,928},929logical_type: None,930converted_type: None,931physical_type: ParquetPhysicalType::Int32,932}),933],934};935936let type_ = ParquetType::GroupType {937field_info: FieldInfo {938name: "m".into(),939repetition: Repetition::Required,940id: None,941},942logical_type: Some(GroupLogicalType::Map),943converted_type: None,944fields: vec![ParquetType::GroupType {945field_info: FieldInfo {946name: "map".into(),947repetition: Repetition::Repeated,948id: None,949},950logical_type: None,951converted_type: None,952fields: vec![type_],953}],954};955956let a = to_nested(&array, &type_).unwrap();957958assert_eq!(959a,960vec![961vec![962Nested::List(ListNested::<i32> {963is_optional: false,964offsets: vec![0, 2, 3, 4, 6].try_into().unwrap(),965validity: None,966}),967Nested::structure(None, true, 6),968Nested::primitive(None, false, 6),969],970vec![971Nested::List(ListNested::<i32> {972is_optional: false,973offsets: vec![0, 2, 3, 4, 6].try_into().unwrap(),974validity: None,975}),976Nested::structure(None, true, 6),977Nested::primitive(None, false, 6),978],979]980);981}982}983984985