Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-utils/src/idx_vec.rs
8422 views
1
use std::fmt::{Debug, Formatter};
2
use std::mem::ManuallyDrop;
3
use std::ops::{Deref, DerefMut};
4
5
use crate::index::{IdxSize, NonZeroIdxSize};
6
7
pub type IdxVec = UnitVec<IdxSize>;
8
9
union PointerOrValue<T> {
10
ptr: *mut T,
11
value: ManuallyDrop<T>,
12
}
13
14
/// A type logically equivalent to `Vec<T>`, but which does not do a
15
/// memory allocation until at least two elements have been pushed, storing the
16
/// first element inside the UnitVec directly.
17
///
18
/// Uses IdxSize internally to store lengths, will panic if trying to reserve
19
/// for more elements.
20
pub struct UnitVec<T> {
21
len: IdxSize,
22
capacity: NonZeroIdxSize,
23
data: PointerOrValue<T>,
24
}
25
26
unsafe impl<T: Send + Sync> Send for UnitVec<T> {}
27
unsafe impl<T: Send + Sync> Sync for UnitVec<T> {}
28
29
impl<T> UnitVec<T> {
30
#[inline(always)]
31
fn data_ptr_mut(&mut self) -> *mut T {
32
if self.is_inline() {
33
unsafe { &mut *self.data.value }
34
} else {
35
unsafe { self.data.ptr }
36
}
37
}
38
39
#[inline(always)]
40
fn data_ptr(&self) -> *const T {
41
if self.is_inline() {
42
unsafe { &*self.data.value }
43
} else {
44
unsafe { self.data.ptr }
45
}
46
}
47
48
#[inline]
49
pub fn new() -> Self {
50
Self {
51
len: 0,
52
capacity: NonZeroIdxSize::new(1).unwrap(),
53
data: PointerOrValue {
54
ptr: std::ptr::null_mut(),
55
},
56
}
57
}
58
59
pub fn is_inline(&self) -> bool {
60
self.capacity.get() == 1
61
}
62
63
#[inline(always)]
64
pub fn len(&self) -> usize {
65
self.len as usize
66
}
67
68
#[inline(always)]
69
pub fn is_empty(&self) -> bool {
70
self.len == 0
71
}
72
73
#[inline(always)]
74
pub fn capacity(&self) -> usize {
75
self.capacity.get() as usize
76
}
77
78
#[inline(always)]
79
pub fn clear(&mut self) {
80
if std::mem::needs_drop::<T>() {
81
while self.len > 0 {
82
self.pop();
83
}
84
} else {
85
self.len = 0;
86
}
87
}
88
89
#[inline(always)]
90
pub fn push(&mut self, idx: T) {
91
if self.len == self.capacity.get() {
92
self.reserve(1);
93
}
94
95
unsafe { self.push_unchecked(idx) }
96
}
97
98
#[inline(always)]
99
/// # Safety
100
/// Caller must ensure that `UnitVec` has enough capacity.
101
pub unsafe fn push_unchecked(&mut self, idx: T) {
102
unsafe {
103
self.data_ptr_mut().add(self.len as usize).write(idx);
104
self.len += 1;
105
}
106
}
107
108
#[inline]
109
pub fn pop(&mut self) -> Option<T> {
110
if self.len == 0 {
111
None
112
} else {
113
unsafe {
114
self.len -= 1;
115
Some(self.data_ptr().add(self.len as usize).read())
116
}
117
}
118
}
119
120
#[cold]
121
#[inline(never)]
122
pub fn reserve(&mut self, additional: usize) {
123
let new_len = self
124
.len
125
.checked_add(additional.try_into().unwrap())
126
.unwrap();
127
if new_len > self.capacity.get() {
128
let double = self.capacity.get() * 2;
129
self.realloc(double.max(new_len).max(8));
130
}
131
}
132
133
/// # Panics
134
/// Panics if `new_cap <= 1` or `new_cap < self.len`
135
fn realloc(&mut self, mut new_cap: IdxSize) {
136
assert!(new_cap > 1 && new_cap >= self.len);
137
unsafe {
138
let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(new_cap as usize));
139
new_cap = me.capacity().try_into().unwrap();
140
let buffer = me.as_mut_ptr();
141
std::ptr::copy(self.data_ptr(), buffer, self.len as usize);
142
self.dealloc();
143
self.data = PointerOrValue { ptr: buffer };
144
self.capacity = NonZeroIdxSize::new(new_cap).unwrap();
145
}
146
}
147
148
unsafe fn dealloc(&mut self) {
149
unsafe {
150
if !self.is_inline() {
151
drop(Vec::from_raw_parts(
152
self.data.ptr.cast::<ManuallyDrop<T>>(),
153
self.len as usize,
154
self.capacity(),
155
));
156
}
157
}
158
}
159
160
pub fn with_capacity(capacity: usize) -> Self {
161
if capacity <= 1 {
162
Self::new()
163
} else {
164
let mut me = std::mem::ManuallyDrop::new(Vec::with_capacity(capacity));
165
let cap = me.capacity().try_into().unwrap();
166
let ptr = me.as_mut_ptr();
167
Self {
168
len: 0,
169
capacity: NonZeroIdxSize::new(cap).unwrap(),
170
data: PointerOrValue { ptr },
171
}
172
}
173
}
174
175
#[inline]
176
pub fn iter(&self) -> std::slice::Iter<'_, T> {
177
self.as_slice().iter()
178
}
179
180
#[inline]
181
pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> {
182
self.as_mut_slice().iter_mut()
183
}
184
185
#[inline]
186
pub fn as_slice(&self) -> &[T] {
187
self.as_ref()
188
}
189
190
#[inline]
191
pub fn as_mut_slice(&mut self) -> &mut [T] {
192
self.as_mut()
193
}
194
}
195
196
impl<T: Copy> UnitVec<T> {
197
pub fn retain(&mut self, mut f: impl FnMut(T) -> bool) {
198
let mut i = 0;
199
for j in 0..self.len() {
200
if f(self[j]) {
201
self[i] = self[j];
202
i += 1;
203
}
204
}
205
206
if i == 0 {
207
*self = Self::new();
208
} else if i == 1 {
209
*self = Self::from_slice(&[self[0]]);
210
} else {
211
self.len = i as IdxSize;
212
}
213
}
214
}
215
216
impl<T: Clone> UnitVec<T> {
217
pub fn from_slice(sl: &[T]) -> Self {
218
if sl.len() <= 1 {
219
let mut new = UnitVec::new();
220
if let Some(v) = sl.first() {
221
new.push(v.clone())
222
}
223
new
224
} else {
225
sl.to_vec().into()
226
}
227
}
228
}
229
230
impl<T> Extend<T> for UnitVec<T> {
231
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
232
let iter = iter.into_iter();
233
self.reserve(iter.size_hint().0);
234
for v in iter {
235
self.push(v)
236
}
237
}
238
}
239
240
impl<T> Drop for UnitVec<T> {
241
fn drop(&mut self) {
242
self.clear();
243
unsafe { self.dealloc() }
244
}
245
}
246
247
impl<T: Clone> Clone for UnitVec<T> {
248
fn clone(&self) -> Self {
249
Self::from_iter(self.iter().cloned())
250
}
251
}
252
253
impl<T: Debug> Debug for UnitVec<T> {
254
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
255
write!(f, "UnitVec: {:?}", self.as_slice())
256
}
257
}
258
259
impl<T> Default for UnitVec<T> {
260
fn default() -> Self {
261
Self::new()
262
}
263
}
264
265
impl<T> Deref for UnitVec<T> {
266
type Target = [T];
267
268
fn deref(&self) -> &Self::Target {
269
self.as_slice()
270
}
271
}
272
273
impl<T> DerefMut for UnitVec<T> {
274
fn deref_mut(&mut self) -> &mut Self::Target {
275
self.as_mut_slice()
276
}
277
}
278
279
impl<T> AsRef<[T]> for UnitVec<T> {
280
fn as_ref(&self) -> &[T] {
281
unsafe { std::slice::from_raw_parts(self.data_ptr(), self.len as usize) }
282
}
283
}
284
285
impl<T> AsMut<[T]> for UnitVec<T> {
286
fn as_mut(&mut self) -> &mut [T] {
287
unsafe { std::slice::from_raw_parts_mut(self.data_ptr_mut(), self.len as usize) }
288
}
289
}
290
291
impl<T: PartialEq> PartialEq for UnitVec<T> {
292
fn eq(&self, other: &Self) -> bool {
293
self.as_slice() == other.as_slice()
294
}
295
}
296
297
impl<T: Eq> Eq for UnitVec<T> {}
298
299
impl<T> FromIterator<T> for UnitVec<T> {
300
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
301
let mut iter = iter.into_iter();
302
303
let Some(first) = iter.next() else {
304
return Self::new();
305
};
306
307
let Some(second) = iter.next() else {
308
let mut out = Self::new();
309
out.push(first);
310
return out;
311
};
312
313
let mut vec = Vec::with_capacity(iter.size_hint().0 + 2);
314
vec.push(first);
315
vec.push(second);
316
vec.extend(iter);
317
Self::from(vec)
318
}
319
}
320
321
impl<T> IntoIterator for UnitVec<T> {
322
type Item = T;
323
324
type IntoIter = IntoIter<T>;
325
326
fn into_iter(mut self) -> Self::IntoIter {
327
if self.is_inline() {
328
IntoIter::Inline(self.pop().into_iter())
329
} else {
330
IntoIter::External(Vec::from(self).into_iter())
331
}
332
}
333
}
334
335
pub enum IntoIter<T> {
336
Inline(std::option::IntoIter<T>),
337
External(std::vec::IntoIter<T>),
338
}
339
340
impl<T> Iterator for IntoIter<T> {
341
type Item = T;
342
343
fn next(&mut self) -> Option<Self::Item> {
344
match self {
345
IntoIter::Inline(it) => it.next(),
346
IntoIter::External(it) => it.next(),
347
}
348
}
349
350
fn size_hint(&self) -> (usize, Option<usize>) {
351
match self {
352
IntoIter::Inline(it) => it.size_hint(),
353
IntoIter::External(it) => it.size_hint(),
354
}
355
}
356
}
357
358
impl<T, const N: usize> From<[T; N]> for UnitVec<T> {
359
fn from(value: [T; N]) -> Self {
360
UnitVec::from_iter(value)
361
}
362
}
363
364
impl<T> ExactSizeIterator for IntoIter<T> {}
365
366
impl<T> From<Vec<T>> for UnitVec<T> {
367
fn from(mut value: Vec<T>) -> Self {
368
if value.capacity() <= 1 {
369
let mut new = UnitVec::new();
370
if let Some(v) = value.pop() {
371
new.push(v)
372
}
373
new
374
} else {
375
let mut me = std::mem::ManuallyDrop::new(value);
376
UnitVec {
377
data: PointerOrValue {
378
ptr: me.as_mut_ptr(),
379
},
380
capacity: NonZeroIdxSize::new(me.capacity().try_into().unwrap()).unwrap(),
381
len: me.len().try_into().unwrap(),
382
}
383
}
384
}
385
}
386
387
impl<T> From<UnitVec<T>> for Vec<T> {
388
fn from(mut value: UnitVec<T>) -> Self {
389
if value.is_inline() {
390
let mut out = Vec::with_capacity(value.len());
391
if let Some(item) = value.pop() {
392
out.push(item);
393
}
394
out
395
} else {
396
// SAFETY: when not inline, the data points to a buffer allocated by a Vec.
397
let out = unsafe {
398
Vec::from_raw_parts(value.data.ptr, value.len as usize, value.capacity())
399
};
400
// Prevent deallocating the buffer
401
std::mem::forget(value);
402
out
403
}
404
}
405
}
406
407
#[macro_export]
408
macro_rules! unitvec {
409
() => {{
410
$crate::idx_vec::UnitVec::new()
411
}};
412
($elem:expr; $n:expr) => {{
413
let mut new = $crate::idx_vec::UnitVec::new();
414
for _ in 0..$n {
415
new.push($elem)
416
}
417
new
418
}};
419
($elem:expr) => {{
420
let mut new = $crate::idx_vec::UnitVec::new();
421
let v = $elem;
422
// SAFETY: first element always fits.
423
unsafe { new.push_unchecked(v) };
424
new
425
}};
426
($($x:expr),+ $(,)?) => {{
427
vec![$($x),+].into()
428
}};
429
}
430
431
mod tests {
432
433
#[test]
434
#[should_panic]
435
fn test_unitvec_realloc_zero() {
436
super::UnitVec::<usize>::new().realloc(0);
437
}
438
439
#[test]
440
#[should_panic]
441
fn test_unitvec_realloc_one() {
442
super::UnitVec::<usize>::new().realloc(1);
443
}
444
445
#[test]
446
#[should_panic]
447
fn test_untivec_realloc_lt_len() {
448
super::UnitVec::<usize>::from([1, 2]).realloc(1)
449
}
450
451
#[test]
452
fn test_unitvec_clone() {
453
{
454
let v = unitvec![1usize];
455
assert_eq!(v, v.clone());
456
}
457
458
for n in [
459
26903816120209729usize,
460
42566276440897687,
461
44435161834424652,
462
49390731489933083,
463
51201454727649242,
464
83861672190814841,
465
92169290527847622,
466
92476373900398436,
467
95488551309275459,
468
97499984126814549,
469
] {
470
let v = unitvec![n];
471
assert_eq!(v, v.clone());
472
}
473
}
474
475
#[test]
476
fn test_unitvec_repeat_n() {
477
assert_eq!(unitvec![5; 3].as_slice(), &[5, 5, 5])
478
}
479
}
480
481