Path: blob/main/crates/polars-parquet/src/arrow/read/deserialize/dictionary_encoded/predicate.rs
8479 views
use arrow::bitmap::{Bitmap, BitmapBuilder};12use super::{oob_dict_idx, verify_dict_indices};3use crate::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder};4use crate::parquet::error::ParquetResult;56#[inline(never)]7pub fn decode(8values: HybridRleDecoder<'_>,9dict_mask: &Bitmap,10pred_true_mask: &mut BitmapBuilder,11) -> ParquetResult<()> {12let num_filtered_dict_values = dict_mask.set_bits();1314let expected_pred_true_mask_len = pred_true_mask.len() + values.len();1516// @NOTE: this has to be changed when there are nulls null17if num_filtered_dict_values == 0 {18pred_true_mask.extend_constant(values.len(), false);19} else if num_filtered_dict_values == 1 {20let needle = dict_mask.leading_zeros();21decode_single(values, needle as u32, pred_true_mask)?;22} else {23decode_multiple(values, dict_mask, pred_true_mask)?;24}2526assert_eq!(expected_pred_true_mask_len, pred_true_mask.len());2728Ok(())29}3031#[inline(never)]32pub fn decode_single(33mut values: HybridRleDecoder<'_>,34needle: u32,35pred_true_mask: &mut BitmapBuilder,36) -> ParquetResult<()> {37pred_true_mask.reserve(values.len());3839let mut unpacked = [0u32; 32];40while let Some(chunk) = values.next_chunk()? {41match chunk {42HybridRleChunk::Rle(value, size) => {43pred_true_mask.extend_constant(size, value == needle);44},45HybridRleChunk::Bitpacked(mut decoder) => {46let size = decoder.len();47let mut chunked = decoder.chunked();4849for _ in 0..size / 32 {50let n = chunked.next_into(&mut unpacked).unwrap();51debug_assert_eq!(n, 32);5253let mut is_equal_mask = 0u64;54for (i, &v) in unpacked.iter().enumerate() {55is_equal_mask |= u64::from(v == needle) << i;56}5758// SAFETY: We reserved enough in the beginning of the function.59unsafe { pred_true_mask.push_word_with_len_unchecked(is_equal_mask, 32) };60}6162if let Some(n) = chunked.next_into(&mut unpacked) {63debug_assert_eq!(n, size % 32);6465let mut is_equal_mask = 0u64;66for (i, &v) in unpacked[..n].iter().enumerate() {67is_equal_mask |= u64::from(v == needle) << i;68}6970// SAFETY: We reserved enough in the beginning of the function.71unsafe { pred_true_mask.push_word_with_len_unchecked(is_equal_mask, n) };72}73},74}75}7677Ok(())78}7980#[inline(never)]81pub fn decode_multiple(82mut values: HybridRleDecoder<'_>,83dict_mask: &Bitmap,84pred_true_mask: &mut BitmapBuilder,85) -> ParquetResult<()> {86pred_true_mask.reserve(values.len());8788let mut unpacked = [0u32; 32];89while let Some(chunk) = values.next_chunk()? {90match chunk {91HybridRleChunk::Rle(value, size) => {92let is_pred_true = dict_mask.get(value as usize).ok_or_else(oob_dict_idx)?;93pred_true_mask.extend_constant(size, is_pred_true);94},95HybridRleChunk::Bitpacked(mut decoder) => {96let size = decoder.len();97let mut chunked = decoder.chunked();9899for _ in 0..size / 32 {100let n = chunked.next_into(&mut unpacked).unwrap();101debug_assert_eq!(n, 32);102103verify_dict_indices(&unpacked, dict_mask.len())?;104let mut is_pred_true_mask = 0u64;105for (i, &v) in unpacked.iter().enumerate() {106// SAFETY: We just verified the dictionary indices107let is_pred_true = unsafe { dict_mask.get_bit_unchecked(v as usize) };108is_pred_true_mask |= u64::from(is_pred_true) << i;109}110111// SAFETY: We reserved enough in the beginning of the function.112unsafe { pred_true_mask.push_word_with_len_unchecked(is_pred_true_mask, 32) };113}114115if let Some(n) = chunked.next_into(&mut unpacked) {116debug_assert_eq!(n, size % 32);117118verify_dict_indices(&unpacked[..n], dict_mask.len())?;119let mut is_pred_true_mask = 0u64;120for (i, &v) in unpacked[..n].iter().enumerate() {121// SAFETY: We just verified the dictionary indices122let is_pred_true = unsafe { dict_mask.get_bit_unchecked(v as usize) };123is_pred_true_mask |= u64::from(is_pred_true) << i;124}125126// SAFETY: We reserved enough in the beginning of the function.127unsafe { pred_true_mask.push_word_with_len_unchecked(is_pred_true_mask, n) };128}129},130}131}132133Ok(())134}135136137