Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/physical_binary.rs
6939 views
1
use crate::bitmap::{BitmapBuilder, MutableBitmap};
2
use crate::offset::{Offset, Offsets};
3
4
/// # Safety
5
/// The caller must ensure that `iterator` is `TrustedLen`.
6
#[inline]
7
#[allow(clippy::type_complexity)]
8
pub(crate) unsafe fn try_trusted_len_unzip<E, I, P, O>(
9
iterator: I,
10
) -> std::result::Result<(Option<MutableBitmap>, Offsets<O>, Vec<u8>), E>
11
where
12
O: Offset,
13
P: AsRef<[u8]>,
14
I: Iterator<Item = std::result::Result<Option<P>, E>>,
15
{
16
let (_, upper) = iterator.size_hint();
17
let len = upper.expect("trusted_len_unzip requires an upper limit");
18
19
let mut null = BitmapBuilder::with_capacity(len);
20
let mut offsets = Vec::<O>::with_capacity(len + 1);
21
let mut values = Vec::<u8>::new();
22
23
let mut length = O::default();
24
let mut dst = offsets.as_mut_ptr();
25
std::ptr::write(dst, length);
26
dst = dst.add(1);
27
for item in iterator {
28
if let Some(item) = item? {
29
null.push_unchecked(true);
30
let s = item.as_ref();
31
length += O::from_as_usize(s.len());
32
values.extend_from_slice(s);
33
} else {
34
null.push_unchecked(false);
35
};
36
37
std::ptr::write(dst, length);
38
dst = dst.add(1);
39
}
40
assert_eq!(
41
dst.offset_from(offsets.as_ptr()) as usize,
42
len + 1,
43
"Trusted iterator length was not accurately reported"
44
);
45
offsets.set_len(len + 1);
46
47
Ok((
48
null.into_opt_mut_validity(),
49
Offsets::new_unchecked(offsets),
50
values,
51
))
52
}
53
54
/// Creates [`MutableBitmap`] and two [`Vec`]s from an iterator of `Option`.
55
/// The first buffer corresponds to a offset buffer, the second one
56
/// corresponds to a values buffer.
57
/// # Safety
58
/// The caller must ensure that `iterator` is `TrustedLen`.
59
#[inline]
60
pub(crate) unsafe fn trusted_len_unzip<O, I, P>(
61
iterator: I,
62
) -> (Option<MutableBitmap>, Offsets<O>, Vec<u8>)
63
where
64
O: Offset,
65
P: AsRef<[u8]>,
66
I: Iterator<Item = Option<P>>,
67
{
68
let (_, upper) = iterator.size_hint();
69
let len = upper.expect("trusted_len_unzip requires an upper limit");
70
71
let mut offsets = Offsets::<O>::with_capacity(len);
72
let mut values = Vec::<u8>::new();
73
let mut validity = MutableBitmap::new();
74
75
extend_from_trusted_len_iter(&mut offsets, &mut values, &mut validity, iterator);
76
77
let validity = if validity.unset_bits() > 0 {
78
Some(validity)
79
} else {
80
None
81
};
82
83
(validity, offsets, values)
84
}
85
86
/// Creates two [`Buffer`]s from an iterator of `&[u8]`.
87
/// The first buffer corresponds to a offset buffer, the second to a values buffer.
88
/// # Safety
89
/// The caller must ensure that `iterator` is [`TrustedLen`].
90
#[inline]
91
pub(crate) unsafe fn trusted_len_values_iter<O, I, P>(iterator: I) -> (Offsets<O>, Vec<u8>)
92
where
93
O: Offset,
94
P: AsRef<[u8]>,
95
I: Iterator<Item = P>,
96
{
97
let (_, upper) = iterator.size_hint();
98
let len = upper.expect("trusted_len_unzip requires an upper limit");
99
100
let mut offsets = Offsets::<O>::with_capacity(len);
101
let mut values = Vec::<u8>::new();
102
103
extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator);
104
105
(offsets, values)
106
}
107
108
// Populates `offsets` and `values` [`Vec`]s with information extracted
109
// from the incoming `iterator`.
110
// # Safety
111
// The caller must ensure the `iterator` is [`TrustedLen`]
112
#[inline]
113
pub(crate) unsafe fn extend_from_trusted_len_values_iter<I, P, O>(
114
offsets: &mut Offsets<O>,
115
values: &mut Vec<u8>,
116
iterator: I,
117
) where
118
O: Offset,
119
P: AsRef<[u8]>,
120
I: Iterator<Item = P>,
121
{
122
let lengths = iterator.map(|item| {
123
let s = item.as_ref();
124
// Push new entries for both `values` and `offsets` buffer
125
values.extend_from_slice(s);
126
s.len()
127
});
128
offsets.try_extend_from_lengths(lengths).unwrap();
129
}
130
131
// Populates `offsets` and `values` [`Vec`]s with information extracted
132
// from the incoming `iterator`.
133
// the return value indicates how many items were added.
134
#[inline]
135
pub(crate) fn extend_from_values_iter<I, P, O>(
136
offsets: &mut Offsets<O>,
137
values: &mut Vec<u8>,
138
iterator: I,
139
) -> usize
140
where
141
O: Offset,
142
P: AsRef<[u8]>,
143
I: Iterator<Item = P>,
144
{
145
let (size_hint, _) = iterator.size_hint();
146
147
offsets.reserve(size_hint);
148
149
let start_index = offsets.len_proxy();
150
151
for item in iterator {
152
let bytes = item.as_ref();
153
values.extend_from_slice(bytes);
154
offsets.try_push(bytes.len()).unwrap();
155
}
156
offsets.len_proxy() - start_index
157
}
158
159
// Populates `offsets`, `values`, and `validity` [`Vec`]s with
160
// information extracted from the incoming `iterator`.
161
//
162
// # Safety
163
// The caller must ensure that `iterator` is [`TrustedLen`]
164
#[inline]
165
pub(crate) unsafe fn extend_from_trusted_len_iter<O, I, P>(
166
offsets: &mut Offsets<O>,
167
values: &mut Vec<u8>,
168
validity: &mut MutableBitmap,
169
iterator: I,
170
) where
171
O: Offset,
172
P: AsRef<[u8]>,
173
I: Iterator<Item = Option<P>>,
174
{
175
let (_, upper) = iterator.size_hint();
176
let additional = upper.expect("extend_from_trusted_len_iter requires an upper limit");
177
178
offsets.reserve(additional);
179
validity.reserve(additional);
180
181
let lengths = iterator.map(|item| {
182
if let Some(item) = item {
183
let bytes = item.as_ref();
184
values.extend_from_slice(bytes);
185
validity.push_unchecked(true);
186
bytes.len()
187
} else {
188
validity.push_unchecked(false);
189
0
190
}
191
});
192
offsets.try_extend_from_lengths(lengths).unwrap();
193
}
194
195
/// Creates two [`Vec`]s from an iterator of `&[u8]`.
196
/// The first buffer corresponds to a offset buffer, the second to a values buffer.
197
#[inline]
198
pub(crate) fn values_iter<O, I, P>(iterator: I) -> (Offsets<O>, Vec<u8>)
199
where
200
O: Offset,
201
P: AsRef<[u8]>,
202
I: Iterator<Item = P>,
203
{
204
let (lower, _) = iterator.size_hint();
205
206
let mut offsets = Offsets::<O>::with_capacity(lower);
207
let mut values = Vec::<u8>::new();
208
209
for item in iterator {
210
let s = item.as_ref();
211
values.extend_from_slice(s);
212
offsets.try_push(s.len()).unwrap();
213
}
214
(offsets, values)
215
}
216
217
/// Extends `validity` with all items from `other`
218
pub(crate) fn extend_validity(
219
length: usize,
220
validity: &mut Option<MutableBitmap>,
221
other: &Option<MutableBitmap>,
222
) {
223
if let Some(other) = other {
224
if let Some(validity) = validity {
225
let slice = other.as_slice();
226
// SAFETY: invariant offset + length <= slice.len()
227
unsafe { validity.extend_from_slice_unchecked(slice, 0, other.len()) }
228
} else {
229
let mut new_validity = MutableBitmap::from_len_set(length);
230
new_validity.extend_from_slice(other.as_slice(), 0, other.len());
231
*validity = Some(new_validity);
232
}
233
}
234
}
235
236