Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-row/src/variable/no_order.rs
6939 views
1
/// Row encoding for variable width elements without maintaining order.
2
///
3
/// Each element is prepended by a sentinel value.
4
///
5
/// If the sentinel value is:
6
/// - 0xFF: the element is None
7
/// - 0xFE: the element's length is encoded as 4 LE bytes following the sentinel
8
/// - 0x00 - 0xFD: the element's length is the sentinel value
9
///
10
/// After the sentinel value (and possible length), the data is then given.
11
use std::mem::MaybeUninit;
12
13
use arrow::array::{BinaryViewArray, MutableBinaryViewArray};
14
use arrow::bitmap::BitmapBuilder;
15
use polars_utils::slice::Slice2Uninit;
16
17
use crate::row::RowEncodingOptions;
18
19
pub fn len_from_item(value: Option<usize>, opt: RowEncodingOptions) -> usize {
20
debug_assert!(opt.contains(RowEncodingOptions::NO_ORDER));
21
22
match value {
23
None => 1,
24
Some(l) if l < 254 => l + 1,
25
Some(l) => l + 5,
26
}
27
}
28
29
pub unsafe fn len_from_buffer(buffer: &[u8], opt: RowEncodingOptions) -> usize {
30
debug_assert!(opt.contains(RowEncodingOptions::NO_ORDER));
31
32
let sentinel = *unsafe { buffer.get_unchecked(0) };
33
34
match sentinel {
35
0xFF => 1,
36
0xFE => {
37
5 + u32::from_le_bytes(unsafe { buffer.get_unchecked(1..5) }.try_into().unwrap())
38
as usize
39
},
40
length => 1 + length as usize,
41
}
42
}
43
44
pub unsafe fn encode_variable_no_order<'a, I: Iterator<Item = Option<&'a [u8]>>>(
45
buffer: &mut [MaybeUninit<u8>],
46
input: I,
47
opt: RowEncodingOptions,
48
offsets: &mut [usize],
49
) {
50
debug_assert!(opt.contains(RowEncodingOptions::NO_ORDER));
51
52
for (offset, opt_value) in offsets.iter_mut().zip(input) {
53
let buffer = unsafe { buffer.get_unchecked_mut(*offset..) };
54
match opt_value {
55
None => {
56
*unsafe { buffer.get_unchecked_mut(0) } = MaybeUninit::new(0xFF);
57
*offset += 1;
58
},
59
Some(v) => {
60
if v.len() >= 254 {
61
unsafe {
62
*buffer.get_unchecked_mut(0) = MaybeUninit::new(0xFE);
63
buffer
64
.get_unchecked_mut(1..5)
65
.copy_from_slice((v.len() as u32).to_le_bytes().as_uninit());
66
buffer
67
.get_unchecked_mut(5..5 + v.len())
68
.copy_from_slice(v.as_uninit());
69
}
70
*offset += 5 + v.len();
71
} else {
72
unsafe {
73
*buffer.get_unchecked_mut(0) = MaybeUninit::new(v.len() as u8);
74
buffer
75
.get_unchecked_mut(1..1 + v.len())
76
.copy_from_slice(v.as_uninit());
77
}
78
*offset += 1 + v.len();
79
}
80
},
81
}
82
}
83
}
84
85
pub unsafe fn decode_variable_no_order(
86
rows: &mut [&[u8]],
87
opt: RowEncodingOptions,
88
) -> BinaryViewArray {
89
debug_assert!(opt.contains(RowEncodingOptions::NO_ORDER));
90
91
let num_rows = rows.len();
92
let mut array = MutableBinaryViewArray::<[u8]>::with_capacity(num_rows);
93
let mut validity = BitmapBuilder::new();
94
95
for row in rows.iter_mut() {
96
let sentinel = *unsafe { row.get_unchecked(0) };
97
*row = unsafe { row.get_unchecked(1..) };
98
if sentinel == 0xFF {
99
validity.reserve(num_rows);
100
validity.extend_constant(array.len(), true);
101
validity.push(false);
102
array.push_value_ignore_validity("");
103
break;
104
}
105
106
let length = if sentinel < 0xFE {
107
sentinel as usize
108
} else {
109
let length = u32::from_le_bytes(unsafe { row.get_unchecked(..4) }.try_into().unwrap());
110
*row = unsafe { row.get_unchecked(4..) };
111
length as usize
112
};
113
114
array.push_value_ignore_validity(unsafe { row.get_unchecked(..length) });
115
*row = unsafe { row.get_unchecked(length..) };
116
}
117
118
if validity.is_empty() {
119
return array.into();
120
}
121
122
for row in rows[array.len()..].iter_mut() {
123
let sentinel = *unsafe { row.get_unchecked(0) };
124
*row = unsafe { row.get_unchecked(1..) };
125
126
validity.push(sentinel != 0xFF);
127
if sentinel == 0xFF {
128
array.push_value_ignore_validity("");
129
continue;
130
}
131
132
let length = if sentinel < 0xFE {
133
sentinel as usize
134
} else {
135
let length = u32::from_le_bytes(unsafe { row.get_unchecked(..4) }.try_into().unwrap());
136
*row = unsafe { row.get_unchecked(4..) };
137
length as usize
138
};
139
140
array.push_value_ignore_validity(unsafe { row.get_unchecked(..length) });
141
*row = unsafe { row.get_unchecked(length..) };
142
}
143
144
let array = array.freeze();
145
array.with_validity(validity.into_opt_validity())
146
}
147
148