Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/bitmap/aligned.rs
6939 views
1
use std::iter::Copied;
2
use std::slice::Iter;
3
4
use crate::bitmap::utils::BitChunk;
5
6
fn load_chunk_le<T: BitChunk>(src: &[u8]) -> T {
7
if let Ok(chunk) = src.try_into() {
8
return T::from_le_bytes(chunk);
9
}
10
11
let mut chunk = T::Bytes::default();
12
let len = src.len().min(chunk.as_ref().len());
13
chunk.as_mut()[..len].copy_from_slice(&src[..len]);
14
T::from_le_bytes(chunk)
15
}
16
17
/// Represents a bitmap split in three portions, a prefix, a suffix and an
18
/// aligned bulk section in the middle.
19
#[derive(Default, Clone, Debug)]
20
pub struct AlignedBitmapSlice<'a, T: BitChunk> {
21
prefix: T,
22
prefix_len: u32,
23
bulk: &'a [T],
24
suffix: T,
25
suffix_len: u32,
26
}
27
28
impl<'a, T: BitChunk> AlignedBitmapSlice<'a, T> {
29
#[inline(always)]
30
pub fn prefix(&self) -> T {
31
self.prefix
32
}
33
34
#[inline(always)]
35
pub fn bulk_iter(&self) -> Copied<Iter<'a, T>> {
36
self.bulk.iter().copied()
37
}
38
39
#[inline(always)]
40
pub fn bulk(&self) -> &'a [T] {
41
self.bulk
42
}
43
44
#[inline(always)]
45
pub fn suffix(&self) -> T {
46
self.suffix
47
}
48
49
/// The length (in bits) of the portion of the bitmap found in prefix.
50
#[inline(always)]
51
pub fn prefix_bitlen(&self) -> usize {
52
self.prefix_len as usize
53
}
54
55
/// The length (in bits) of the portion of the bitmap found in bulk.
56
#[inline(always)]
57
pub fn bulk_bitlen(&self) -> usize {
58
8 * size_of::<T>() * self.bulk.len()
59
}
60
61
/// The length (in bits) of the portion of the bitmap found in suffix.
62
#[inline(always)]
63
pub fn suffix_bitlen(&self) -> usize {
64
self.suffix_len as usize
65
}
66
67
pub fn new(mut bytes: &'a [u8], mut offset: usize, len: usize) -> Self {
68
if len == 0 {
69
return Self::default();
70
}
71
72
assert!(bytes.len() * 8 >= offset + len);
73
74
// Strip off useless bytes from start.
75
let start_byte_idx = offset / 8;
76
bytes = &bytes[start_byte_idx..];
77
offset %= 8;
78
79
// Fast-path: fits entirely in one chunk.
80
let chunk_len = size_of::<T>();
81
let chunk_len_bits = 8 * chunk_len;
82
if offset + len <= chunk_len_bits {
83
let mut prefix = load_chunk_le::<T>(bytes) >> offset;
84
if len < chunk_len_bits {
85
prefix &= (T::one() << len) - T::one();
86
}
87
return Self {
88
prefix,
89
prefix_len: len as u32,
90
..Self::default()
91
};
92
}
93
94
// Find how many bytes from the start our aligned section would start.
95
let mut align_offset = bytes.as_ptr().align_offset(chunk_len);
96
let mut align_offset_bits = 8 * align_offset;
97
98
// Oops, the original pointer was already aligned, but our offset means
99
// we can't start there, start one chunk later.
100
if offset > align_offset_bits {
101
align_offset_bits += chunk_len_bits;
102
align_offset += chunk_len;
103
}
104
105
// Calculate based on this the lengths of our sections (in bits).
106
let prefix_len = (align_offset_bits - offset).min(len);
107
let rest_len = len - prefix_len;
108
let suffix_len = rest_len % chunk_len_bits;
109
let bulk_len = rest_len - suffix_len;
110
debug_assert!(prefix_len < chunk_len_bits);
111
debug_assert!(bulk_len.is_multiple_of(chunk_len_bits));
112
debug_assert!(suffix_len < chunk_len_bits);
113
114
// Now we just have to load.
115
let (prefix_bytes, rest_bytes) = bytes.split_at(align_offset);
116
let (bulk_bytes, suffix_bytes) = rest_bytes.split_at(bulk_len / 8);
117
let mut prefix = load_chunk_le::<T>(prefix_bytes) >> offset;
118
let mut suffix = load_chunk_le::<T>(suffix_bytes);
119
prefix &= (T::one() << prefix_len) - T::one();
120
suffix &= (T::one() << suffix_len) - T::one();
121
Self {
122
prefix,
123
bulk: bytemuck::cast_slice(bulk_bytes),
124
suffix,
125
prefix_len: prefix_len as u32,
126
suffix_len: suffix_len as u32,
127
}
128
}
129
}
130
131