Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-utils/src/idx_vec.rs
6939 views
1
use std::fmt::{Debug, Formatter};
2
use std::ops::Deref;
3
4
use crate::index::{IdxSize, NonZeroIdxSize};
5
6
pub type IdxVec = UnitVec<IdxSize>;
7
8
/// A type logically equivalent to `Vec<T>`, but which does not do a
9
/// memory allocation until at least two elements have been pushed, storing the
10
/// first element in the data pointer directly.
11
///
12
/// Uses IdxSize internally to store lengths, will panic if trying to reserve
13
/// for more elements.
14
#[derive(Eq)]
15
pub struct UnitVec<T> {
16
len: IdxSize,
17
capacity: NonZeroIdxSize,
18
data: *mut T,
19
}
20
21
unsafe impl<T: Send + Sync> Send for UnitVec<T> {}
22
unsafe impl<T: Send + Sync> Sync for UnitVec<T> {}
23
24
impl<T> UnitVec<T> {
25
#[inline(always)]
26
fn data_ptr_mut(&mut self) -> *mut T {
27
let external = self.data;
28
let inline = &mut self.data as *mut *mut T as *mut T;
29
if self.is_inline() { inline } else { external }
30
}
31
32
#[inline(always)]
33
fn data_ptr(&self) -> *const T {
34
let external = self.data;
35
let inline = &self.data as *const *mut T as *mut T;
36
if self.is_inline() { inline } else { external }
37
}
38
39
#[inline]
40
pub fn new() -> Self {
41
// This is optimized away, all const.
42
assert!(size_of::<T>() <= size_of::<*mut T>() && align_of::<T>() <= align_of::<*mut T>());
43
Self {
44
len: 0,
45
capacity: NonZeroIdxSize::new(1).unwrap(),
46
data: std::ptr::null_mut(),
47
}
48
}
49
50
pub fn is_inline(&self) -> bool {
51
self.capacity.get() == 1
52
}
53
54
#[inline(always)]
55
pub fn len(&self) -> usize {
56
self.len as usize
57
}
58
59
#[inline(always)]
60
pub fn is_empty(&self) -> bool {
61
self.len == 0
62
}
63
64
#[inline(always)]
65
pub fn capacity(&self) -> usize {
66
self.capacity.get() as usize
67
}
68
69
#[inline(always)]
70
pub fn clear(&mut self) {
71
self.len = 0;
72
}
73
74
#[inline(always)]
75
pub fn push(&mut self, idx: T) {
76
if self.len == self.capacity.get() {
77
self.reserve(1);
78
}
79
80
unsafe { self.push_unchecked(idx) }
81
}
82
83
#[inline(always)]
84
/// # Safety
85
/// Caller must ensure that `UnitVec` has enough capacity.
86
pub unsafe fn push_unchecked(&mut self, idx: T) {
87
unsafe {
88
self.data_ptr_mut().add(self.len as usize).write(idx);
89
self.len += 1;
90
}
91
}
92
93
#[cold]
94
#[inline(never)]
95
pub fn reserve(&mut self, additional: usize) {
96
let new_len = self
97
.len
98
.checked_add(additional.try_into().unwrap())
99
.unwrap();
100
if new_len > self.capacity.get() {
101
let double = self.capacity.get() * 2;
102
self.realloc(double.max(new_len).max(8));
103
}
104
}
105
106
/// # Panics
107
/// Panics if `new_cap <= 1` or `new_cap < self.len`
108
fn realloc(&mut self, new_cap: IdxSize) {
109
assert!(new_cap > 1 && new_cap >= self.len);
110
unsafe {
111
let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(new_cap as usize));
112
let buffer = me.as_mut_ptr();
113
std::ptr::copy(self.data_ptr(), buffer, self.len as usize);
114
self.dealloc();
115
self.data = buffer;
116
self.capacity = NonZeroIdxSize::new(new_cap).unwrap();
117
}
118
}
119
120
fn dealloc(&mut self) {
121
unsafe {
122
if !self.is_inline() {
123
let _ = Vec::from_raw_parts(self.data, self.len as usize, self.capacity());
124
self.capacity = NonZeroIdxSize::new(1).unwrap();
125
}
126
}
127
}
128
129
pub fn with_capacity(capacity: usize) -> Self {
130
if capacity <= 1 {
131
Self::new()
132
} else {
133
let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(capacity));
134
let data = me.as_mut_ptr();
135
Self {
136
len: 0,
137
capacity: NonZeroIdxSize::new(capacity.try_into().unwrap()).unwrap(),
138
data,
139
}
140
}
141
}
142
143
#[inline]
144
pub fn iter(&self) -> std::slice::Iter<'_, T> {
145
self.as_slice().iter()
146
}
147
148
#[inline]
149
pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> {
150
self.as_mut_slice().iter_mut()
151
}
152
153
#[inline]
154
pub fn as_slice(&self) -> &[T] {
155
self.as_ref()
156
}
157
158
#[inline]
159
pub fn as_mut_slice(&mut self) -> &mut [T] {
160
self.as_mut()
161
}
162
163
#[inline]
164
pub fn pop(&mut self) -> Option<T> {
165
if self.len == 0 {
166
None
167
} else {
168
unsafe {
169
self.len -= 1;
170
Some(std::ptr::read(self.as_ptr().add(self.len())))
171
}
172
}
173
}
174
}
175
176
impl<T> Extend<T> for UnitVec<T> {
177
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
178
let iter = iter.into_iter();
179
self.reserve(iter.size_hint().0);
180
for v in iter {
181
self.push(v)
182
}
183
}
184
}
185
186
impl<T> Drop for UnitVec<T> {
187
fn drop(&mut self) {
188
self.dealloc()
189
}
190
}
191
192
impl<T> Clone for UnitVec<T> {
193
fn clone(&self) -> Self {
194
unsafe {
195
if self.is_inline() {
196
Self { ..*self }
197
} else {
198
let mut copy = Self::with_capacity(self.len as usize);
199
std::ptr::copy(self.data_ptr(), copy.data_ptr_mut(), self.len as usize);
200
copy.len = self.len;
201
copy
202
}
203
}
204
}
205
}
206
207
impl<T: Debug> Debug for UnitVec<T> {
208
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
209
write!(f, "UnitVec: {:?}", self.as_slice())
210
}
211
}
212
213
impl<T> Default for UnitVec<T> {
214
fn default() -> Self {
215
Self {
216
len: 0,
217
capacity: NonZeroIdxSize::new(1).unwrap(),
218
data: std::ptr::null_mut(),
219
}
220
}
221
}
222
223
impl<T> Deref for UnitVec<T> {
224
type Target = [T];
225
226
fn deref(&self) -> &Self::Target {
227
self.as_slice()
228
}
229
}
230
231
impl<T> AsRef<[T]> for UnitVec<T> {
232
fn as_ref(&self) -> &[T] {
233
unsafe { std::slice::from_raw_parts(self.data_ptr(), self.len as usize) }
234
}
235
}
236
237
impl<T> AsMut<[T]> for UnitVec<T> {
238
fn as_mut(&mut self) -> &mut [T] {
239
unsafe { std::slice::from_raw_parts_mut(self.data_ptr_mut(), self.len as usize) }
240
}
241
}
242
243
impl<T: PartialEq> PartialEq for UnitVec<T> {
244
fn eq(&self, other: &Self) -> bool {
245
self.as_slice() == other.as_slice()
246
}
247
}
248
249
impl<T> FromIterator<T> for UnitVec<T> {
250
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
251
let mut iter = iter.into_iter();
252
253
let Some(first) = iter.next() else {
254
return Self::new();
255
};
256
257
let Some(second) = iter.next() else {
258
let mut out = Self::new();
259
out.push(first);
260
return out;
261
};
262
263
let mut vec = Vec::with_capacity(iter.size_hint().0 + 2);
264
vec.push(first);
265
vec.push(second);
266
vec.extend(iter);
267
Self::from(vec)
268
}
269
}
270
271
impl<T> IntoIterator for UnitVec<T> {
272
type Item = T;
273
274
type IntoIter = IntoIter<T>;
275
276
fn into_iter(mut self) -> Self::IntoIter {
277
if self.is_inline() {
278
IntoIter::Inline(self.pop().into_iter())
279
} else {
280
IntoIter::External(Vec::from(self).into_iter())
281
}
282
}
283
}
284
285
pub enum IntoIter<T> {
286
Inline(std::option::IntoIter<T>),
287
External(std::vec::IntoIter<T>),
288
}
289
290
impl<T> Iterator for IntoIter<T> {
291
type Item = T;
292
293
fn next(&mut self) -> Option<Self::Item> {
294
match self {
295
IntoIter::Inline(it) => it.next(),
296
IntoIter::External(it) => it.next(),
297
}
298
}
299
}
300
301
impl<T> From<Vec<T>> for UnitVec<T> {
302
fn from(mut value: Vec<T>) -> Self {
303
if value.capacity() <= 1 {
304
let mut new = UnitVec::new();
305
if let Some(v) = value.pop() {
306
new.push(v)
307
}
308
new
309
} else {
310
let mut me = std::mem::ManuallyDrop::new(value);
311
UnitVec {
312
data: me.as_mut_ptr(),
313
capacity: NonZeroIdxSize::new(me.capacity().try_into().unwrap()).unwrap(),
314
len: me.len().try_into().unwrap(),
315
}
316
}
317
}
318
}
319
320
impl<T> From<UnitVec<T>> for Vec<T> {
321
fn from(mut value: UnitVec<T>) -> Self {
322
if value.is_inline() {
323
let mut out = Vec::with_capacity(value.len());
324
if let Some(item) = value.pop() {
325
out.push(item);
326
}
327
out
328
} else {
329
// SAFETY: when not inline, the data points to a buffer allocated by a Vec.
330
let out =
331
unsafe { Vec::from_raw_parts(value.data, value.len as usize, value.capacity()) };
332
// Prevent deallocating the buffer
333
std::mem::forget(value);
334
out
335
}
336
}
337
}
338
339
impl<T: Clone> From<&[T]> for UnitVec<T> {
340
fn from(value: &[T]) -> Self {
341
if value.len() <= 1 {
342
let mut new = UnitVec::new();
343
if let Some(v) = value.first() {
344
new.push(v.clone())
345
}
346
new
347
} else {
348
value.to_vec().into()
349
}
350
}
351
}
352
353
#[macro_export]
354
macro_rules! unitvec {
355
() => (
356
$crate::idx_vec::UnitVec::new()
357
);
358
($elem:expr; $n:expr) => (
359
let mut new = $crate::idx_vec::UnitVec::new();
360
for _ in 0..$n {
361
new.push($elem)
362
}
363
new
364
);
365
($elem:expr) => (
366
{let mut new = $crate::idx_vec::UnitVec::new();
367
let v = $elem;
368
// SAFETY: first element always fits.
369
unsafe { new.push_unchecked(v) };
370
new}
371
);
372
($($x:expr),+ $(,)?) => (
373
vec![$($x),+].into()
374
);
375
}
376
377
mod tests {
378
379
#[test]
380
#[should_panic]
381
fn test_unitvec_realloc_zero() {
382
super::UnitVec::<usize>::new().realloc(0);
383
}
384
385
#[test]
386
#[should_panic]
387
fn test_unitvec_realloc_one() {
388
super::UnitVec::<usize>::new().realloc(1);
389
}
390
391
#[test]
392
#[should_panic]
393
fn test_untivec_realloc_lt_len() {
394
super::UnitVec::<usize>::from(&[1, 2][..]).realloc(1)
395
}
396
397
#[test]
398
fn test_unitvec_clone() {
399
{
400
let v = unitvec![1usize];
401
assert_eq!(v, v.clone());
402
}
403
404
for n in [
405
26903816120209729usize,
406
42566276440897687,
407
44435161834424652,
408
49390731489933083,
409
51201454727649242,
410
83861672190814841,
411
92169290527847622,
412
92476373900398436,
413
95488551309275459,
414
97499984126814549,
415
] {
416
let v = unitvec![n];
417
assert_eq!(v, v.clone());
418
}
419
}
420
}
421
422