Path: blob/main/crates/polars-parquet/src/arrow/write/nested/mod.rs
6940 views
mod dremel;12pub use dremel::num_values;3use polars_error::PolarsResult;45use super::Nested;6use crate::parquet::encoding::hybrid_rle::encode;7use crate::parquet::read::levels::get_bit_width;8use crate::parquet::write::Version;910fn write_levels_v1<F: FnOnce(&mut Vec<u8>) -> PolarsResult<()>>(11buffer: &mut Vec<u8>,12encode: F,13) -> PolarsResult<()> {14buffer.extend_from_slice(&[0; 4]);15let start = buffer.len();1617encode(buffer)?;1819let end = buffer.len();20let length = end - start;2122// write the first 4 bytes as length23let length = (length as i32).to_le_bytes();24(0..4).for_each(|i| buffer[start - 4 + i] = length[i]);25Ok(())26}2728/// writes the rep levels to a `Vec<u8>`.29fn write_rep_levels(buffer: &mut Vec<u8>, nested: &[Nested], version: Version) -> PolarsResult<()> {30let max_level = max_rep_level(nested) as i16;31if max_level == 0 {32return Ok(());33}34let num_bits = get_bit_width(max_level);3536let levels = dremel::BufferedDremelIter::new(nested).map(|d| u32::from(d.rep));3738match version {39Version::V1 => {40write_levels_v1(buffer, |buffer: &mut Vec<u8>| {41encode::<u32, _, _>(buffer, levels, num_bits)?;42Ok(())43})?;44},45Version::V2 => {46encode::<u32, _, _>(buffer, levels, num_bits)?;47},48}4950Ok(())51}5253/// writes the def levels to a `Vec<u8>`.54fn write_def_levels(buffer: &mut Vec<u8>, nested: &[Nested], version: Version) -> PolarsResult<()> {55let max_level = max_def_level(nested) as i16;56if max_level == 0 {57return Ok(());58}59let num_bits = get_bit_width(max_level);6061let levels = dremel::BufferedDremelIter::new(nested).map(|d| u32::from(d.def));6263match version {64Version::V1 => write_levels_v1(buffer, move |buffer: &mut Vec<u8>| {65encode::<u32, _, _>(buffer, levels, num_bits)?;66Ok(())67}),68Version::V2 => Ok(encode::<u32, _, _>(buffer, levels, num_bits)?),69}70}7172fn max_def_level(nested: &[Nested]) -> usize {73nested74.iter()75.map(|nested| match nested {76Nested::Primitive(nested) => nested.is_optional as usize,77Nested::List(nested) => 1 + (nested.is_optional as usize),78Nested::LargeList(nested) => 1 + (nested.is_optional as usize),79Nested::Struct(nested) => nested.is_optional as usize,80Nested::FixedSizeList(nested) => 1 + nested.is_optional as usize,81})82.sum()83}8485fn max_rep_level(nested: &[Nested]) -> usize {86nested87.iter()88.map(|nested| match nested {89Nested::FixedSizeList(_) | Nested::LargeList(_) | Nested::List(_) => 1,90Nested::Primitive(_) | Nested::Struct(_) => 0,91})92.sum()93}9495/// Write `repetition_levels` and `definition_levels` to buffer.96pub fn write_rep_and_def(97page_version: Version,98nested: &[Nested],99buffer: &mut Vec<u8>,100) -> PolarsResult<(usize, usize)> {101write_rep_levels(buffer, nested, page_version)?;102let repetition_levels_byte_length = buffer.len();103104write_def_levels(buffer, nested, page_version)?;105let definition_levels_byte_length = buffer.len() - repetition_levels_byte_length;106107Ok((repetition_levels_byte_length, definition_levels_byte_length))108}109110111