Path: blob/main/crates/polars-parquet/src/arrow/read/deserialize/dictionary_encoded/required.rs
8509 views
use arrow::types::AlignedBytes;12use super::{IndexMapping, oob_dict_idx, required_skip_whole_chunks, verify_dict_indices};3use crate::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder};4use crate::parquet::error::ParquetResult;56/// Decoding kernel for required dictionary encoded.7#[inline(never)]8pub fn decode<B: AlignedBytes, D: IndexMapping<Output = B>>(9mut values: HybridRleDecoder<'_>,10dict: D,11target: &mut Vec<B>,12mut num_rows_to_skip: usize,13) -> ParquetResult<()> {14debug_assert!(num_rows_to_skip <= values.len());1516let num_rows = values.len() - num_rows_to_skip;17let end_length = target.len() + num_rows;1819if num_rows == 0 {20return Ok(());21}2223target.reserve(num_rows);2425if dict.is_empty() {26return Err(oob_dict_idx());27}2829// Skip over whole HybridRleChunks30required_skip_whole_chunks(&mut values, &mut num_rows_to_skip)?;3132while let Some(chunk) = values.next_chunk()? {33debug_assert!(num_rows_to_skip < chunk.len() || chunk.len() == 0);3435match chunk {36HybridRleChunk::Rle(value, size) => {37if size == 0 {38continue;39}4041let Some(value) = dict.get(value) else {42return Err(oob_dict_idx());43};4445target.resize(target.len() + size - num_rows_to_skip, value);46},47HybridRleChunk::Bitpacked(mut decoder) => {48if num_rows_to_skip > 0 {49decoder.skip_chunks(num_rows_to_skip / 32);50num_rows_to_skip %= 32;5152if let Some((chunk, chunk_size)) = decoder.chunked().next_inexact() {53let chunk = &chunk[num_rows_to_skip..chunk_size];54verify_dict_indices(chunk, dict.len())?;55target.extend(chunk.iter().map(|&idx| {56// SAFETY: The dict indices were verified before.57unsafe { dict.get_unchecked(idx) }58}));59}60}6162let mut chunked = decoder.chunked();63for chunk in chunked.by_ref() {64verify_dict_indices(&chunk, dict.len())?;65target.extend(chunk.iter().map(|&idx| {66// SAFETY: The dict indices were verified before.67unsafe { dict.get_unchecked(idx) }68}));69}7071if let Some((chunk, chunk_size)) = chunked.remainder() {72verify_dict_indices(&chunk[..chunk_size], dict.len())?;73target.extend(chunk[..chunk_size].iter().map(|&idx| {74// SAFETY: The dict indices were verified before.75unsafe { dict.get_unchecked(idx) }76}));77}78},79}8081num_rows_to_skip = 0;82}8384debug_assert_eq!(target.len(), end_length);8586Ok(())87}888990