Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/parquet/encoding/hybrid_rle/bitmap.rs
7887 views
1
use std::io::Write;
2
3
const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
4
5
/// Sets bit at position `i` in `byte`
6
#[inline]
7
pub fn set(byte: u8, i: usize) -> u8 {
8
byte | BIT_MASK[i]
9
}
10
11
/// An [`Iterator`] of bool that decodes a bitmap.
12
/// This is a specialization of [`super::super::bitpacked::Decoder`] for `num_bits == 1`.
13
#[derive(Debug)]
14
pub struct BitmapIter<'a> {
15
iter: std::slice::Iter<'a, u8>,
16
current_byte: &'a u8,
17
remaining: usize,
18
mask: u8,
19
}
20
21
impl<'a> BitmapIter<'a> {
22
/// Returns a new [`BitmapIter`].
23
/// # Panics
24
/// This function panics iff `offset / 8 > slice.len()`
25
#[inline]
26
pub fn new(slice: &'a [u8], offset: usize, len: usize) -> Self {
27
let bytes = &slice[offset / 8..];
28
29
let mut iter = bytes.iter();
30
31
let current_byte = iter.next().unwrap_or(&0);
32
33
Self {
34
iter,
35
mask: 1u8.rotate_left(offset as u32),
36
remaining: len,
37
current_byte,
38
}
39
}
40
}
41
42
impl Iterator for BitmapIter<'_> {
43
type Item = bool;
44
45
#[inline]
46
fn next(&mut self) -> Option<Self::Item> {
47
// easily predictable in branching
48
if self.remaining == 0 {
49
return None;
50
} else {
51
self.remaining -= 1;
52
}
53
let value = self.current_byte & self.mask != 0;
54
self.mask = self.mask.rotate_left(1);
55
if self.mask == 1 {
56
// reached a new byte => try to fetch it from the iterator
57
if let Some(v) = self.iter.next() {
58
self.current_byte = v
59
}
60
}
61
Some(value)
62
}
63
64
#[inline]
65
fn size_hint(&self) -> (usize, Option<usize>) {
66
(self.remaining, Some(self.remaining))
67
}
68
}
69
70
/// Writes an iterator of bools into writer, with LSB first.
71
pub fn encode_bool<W: Write, I: Iterator<Item = bool>>(
72
writer: &mut W,
73
mut iterator: I,
74
) -> std::io::Result<()> {
75
// the length of the iterator.
76
let length = iterator.size_hint().1.unwrap();
77
78
let chunks = length / 8;
79
let reminder = length % 8;
80
81
(0..chunks).try_for_each(|_| {
82
let mut byte = 0u8;
83
(0..8).for_each(|i| {
84
if iterator.next().unwrap() {
85
byte = set(byte, i)
86
}
87
});
88
writer.write_all(&[byte])
89
})?;
90
91
if reminder != 0 {
92
let mut last = 0u8;
93
iterator.enumerate().for_each(|(i, value)| {
94
if value {
95
last = set(last, i)
96
}
97
});
98
writer.write_all(&[last])
99
} else {
100
Ok(())
101
}
102
}
103
104