Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/arrow/read/deserialize/dictionary_encoded/predicate.rs
8479 views
1
use arrow::bitmap::{Bitmap, BitmapBuilder};
2
3
use super::{oob_dict_idx, verify_dict_indices};
4
use crate::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder};
5
use crate::parquet::error::ParquetResult;
6
7
#[inline(never)]
8
pub fn decode(
9
values: HybridRleDecoder<'_>,
10
dict_mask: &Bitmap,
11
pred_true_mask: &mut BitmapBuilder,
12
) -> ParquetResult<()> {
13
let num_filtered_dict_values = dict_mask.set_bits();
14
15
let expected_pred_true_mask_len = pred_true_mask.len() + values.len();
16
17
// @NOTE: this has to be changed when there are nulls null
18
if num_filtered_dict_values == 0 {
19
pred_true_mask.extend_constant(values.len(), false);
20
} else if num_filtered_dict_values == 1 {
21
let needle = dict_mask.leading_zeros();
22
decode_single(values, needle as u32, pred_true_mask)?;
23
} else {
24
decode_multiple(values, dict_mask, pred_true_mask)?;
25
}
26
27
assert_eq!(expected_pred_true_mask_len, pred_true_mask.len());
28
29
Ok(())
30
}
31
32
#[inline(never)]
33
pub fn decode_single(
34
mut values: HybridRleDecoder<'_>,
35
needle: u32,
36
pred_true_mask: &mut BitmapBuilder,
37
) -> ParquetResult<()> {
38
pred_true_mask.reserve(values.len());
39
40
let mut unpacked = [0u32; 32];
41
while let Some(chunk) = values.next_chunk()? {
42
match chunk {
43
HybridRleChunk::Rle(value, size) => {
44
pred_true_mask.extend_constant(size, value == needle);
45
},
46
HybridRleChunk::Bitpacked(mut decoder) => {
47
let size = decoder.len();
48
let mut chunked = decoder.chunked();
49
50
for _ in 0..size / 32 {
51
let n = chunked.next_into(&mut unpacked).unwrap();
52
debug_assert_eq!(n, 32);
53
54
let mut is_equal_mask = 0u64;
55
for (i, &v) in unpacked.iter().enumerate() {
56
is_equal_mask |= u64::from(v == needle) << i;
57
}
58
59
// SAFETY: We reserved enough in the beginning of the function.
60
unsafe { pred_true_mask.push_word_with_len_unchecked(is_equal_mask, 32) };
61
}
62
63
if let Some(n) = chunked.next_into(&mut unpacked) {
64
debug_assert_eq!(n, size % 32);
65
66
let mut is_equal_mask = 0u64;
67
for (i, &v) in unpacked[..n].iter().enumerate() {
68
is_equal_mask |= u64::from(v == needle) << i;
69
}
70
71
// SAFETY: We reserved enough in the beginning of the function.
72
unsafe { pred_true_mask.push_word_with_len_unchecked(is_equal_mask, n) };
73
}
74
},
75
}
76
}
77
78
Ok(())
79
}
80
81
#[inline(never)]
82
pub fn decode_multiple(
83
mut values: HybridRleDecoder<'_>,
84
dict_mask: &Bitmap,
85
pred_true_mask: &mut BitmapBuilder,
86
) -> ParquetResult<()> {
87
pred_true_mask.reserve(values.len());
88
89
let mut unpacked = [0u32; 32];
90
while let Some(chunk) = values.next_chunk()? {
91
match chunk {
92
HybridRleChunk::Rle(value, size) => {
93
let is_pred_true = dict_mask.get(value as usize).ok_or_else(oob_dict_idx)?;
94
pred_true_mask.extend_constant(size, is_pred_true);
95
},
96
HybridRleChunk::Bitpacked(mut decoder) => {
97
let size = decoder.len();
98
let mut chunked = decoder.chunked();
99
100
for _ in 0..size / 32 {
101
let n = chunked.next_into(&mut unpacked).unwrap();
102
debug_assert_eq!(n, 32);
103
104
verify_dict_indices(&unpacked, dict_mask.len())?;
105
let mut is_pred_true_mask = 0u64;
106
for (i, &v) in unpacked.iter().enumerate() {
107
// SAFETY: We just verified the dictionary indices
108
let is_pred_true = unsafe { dict_mask.get_bit_unchecked(v as usize) };
109
is_pred_true_mask |= u64::from(is_pred_true) << i;
110
}
111
112
// SAFETY: We reserved enough in the beginning of the function.
113
unsafe { pred_true_mask.push_word_with_len_unchecked(is_pred_true_mask, 32) };
114
}
115
116
if let Some(n) = chunked.next_into(&mut unpacked) {
117
debug_assert_eq!(n, size % 32);
118
119
verify_dict_indices(&unpacked[..n], dict_mask.len())?;
120
let mut is_pred_true_mask = 0u64;
121
for (i, &v) in unpacked[..n].iter().enumerate() {
122
// SAFETY: We just verified the dictionary indices
123
let is_pred_true = unsafe { dict_mask.get_bit_unchecked(v as usize) };
124
is_pred_true_mask |= u64::from(is_pred_true) << i;
125
}
126
127
// SAFETY: We reserved enough in the beginning of the function.
128
unsafe { pred_true_mask.push_word_with_len_unchecked(is_pred_true_mask, n) };
129
}
130
},
131
}
132
}
133
134
Ok(())
135
}
136
137