Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-utils/src/index.rs
6939 views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
use std::fmt::{Debug, Formatter};
3
4
use polars_error::{PolarsResult, polars_ensure};
5
6
use crate::nulls::IsNull;
7
8
#[cfg(not(feature = "bigidx"))]
9
pub type IdxSize = u32;
10
#[cfg(feature = "bigidx")]
11
pub type IdxSize = u64;
12
13
#[cfg(not(feature = "bigidx"))]
14
pub type NonZeroIdxSize = std::num::NonZeroU32;
15
#[cfg(feature = "bigidx")]
16
pub type NonZeroIdxSize = std::num::NonZeroU64;
17
18
#[cfg(not(feature = "bigidx"))]
19
pub type AtomicIdxSize = std::sync::atomic::AtomicU32;
20
#[cfg(feature = "bigidx")]
21
pub type AtomicIdxSize = std::sync::atomic::AtomicU64;
22
23
#[derive(Clone, Copy)]
24
#[repr(transparent)]
25
pub struct NullableIdxSize {
26
pub inner: IdxSize,
27
}
28
29
impl PartialEq<Self> for NullableIdxSize {
30
fn eq(&self, other: &Self) -> bool {
31
self.inner == other.inner
32
}
33
}
34
35
impl Eq for NullableIdxSize {}
36
37
unsafe impl bytemuck::Zeroable for NullableIdxSize {}
38
unsafe impl bytemuck::AnyBitPattern for NullableIdxSize {}
39
unsafe impl bytemuck::NoUninit for NullableIdxSize {}
40
41
impl Debug for NullableIdxSize {
42
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
43
write!(f, "{:?}", self.inner)
44
}
45
}
46
47
impl NullableIdxSize {
48
#[inline(always)]
49
pub fn is_null_idx(&self) -> bool {
50
// The left/right join maintain_order algorithms depend on the special value for sorting
51
self.inner == IdxSize::MAX
52
}
53
54
#[inline(always)]
55
pub const fn null() -> Self {
56
Self {
57
inner: IdxSize::MAX,
58
}
59
}
60
61
#[inline(always)]
62
pub fn idx(&self) -> IdxSize {
63
self.inner
64
}
65
66
#[inline(always)]
67
pub fn to_opt(&self) -> Option<IdxSize> {
68
if self.is_null_idx() {
69
None
70
} else {
71
Some(self.idx())
72
}
73
}
74
}
75
76
impl From<IdxSize> for NullableIdxSize {
77
#[inline(always)]
78
fn from(value: IdxSize) -> Self {
79
Self { inner: value }
80
}
81
}
82
83
pub trait Bounded {
84
fn len(&self) -> usize;
85
86
fn is_empty(&self) -> bool {
87
self.len() == 0
88
}
89
}
90
91
pub trait NullCount {
92
fn null_count(&self) -> usize {
93
0
94
}
95
}
96
97
impl<T: NullCount> NullCount for &T {
98
fn null_count(&self) -> usize {
99
(*self).null_count()
100
}
101
}
102
103
impl<T> Bounded for &[T] {
104
fn len(&self) -> usize {
105
<[T]>::len(self)
106
}
107
}
108
109
impl<T> NullCount for &[T] {
110
fn null_count(&self) -> usize {
111
0
112
}
113
}
114
115
pub trait Indexable {
116
type Item: IsNull;
117
118
fn get(&self, i: usize) -> Self::Item;
119
120
/// # Safety
121
/// Doesn't do any bound checks.
122
unsafe fn get_unchecked(&self, i: usize) -> Self::Item;
123
}
124
125
impl<T: Copy + IsNull> Indexable for &[T] {
126
type Item = T;
127
128
fn get(&self, i: usize) -> Self::Item {
129
self[i]
130
}
131
132
/// # Safety
133
/// Doesn't do any bound checks.
134
unsafe fn get_unchecked(&self, i: usize) -> Self::Item {
135
*<[T]>::get_unchecked(self, i)
136
}
137
}
138
139
pub fn check_bounds(idx: &[IdxSize], len: IdxSize) -> PolarsResult<()> {
140
// We iterate in large uninterrupted chunks to help auto-vectorization.
141
let Some(max_idx) = idx.iter().copied().max() else {
142
return Ok(());
143
};
144
145
polars_ensure!(max_idx < len, OutOfBounds: "indices are out of bounds");
146
Ok(())
147
}
148
149
pub trait ToIdx {
150
fn to_idx(self, len: u64) -> IdxSize;
151
}
152
153
macro_rules! impl_to_idx {
154
($ty:ty) => {
155
impl ToIdx for $ty {
156
#[inline]
157
fn to_idx(self, _len: u64) -> IdxSize {
158
self as IdxSize
159
}
160
}
161
};
162
($ty:ty, $ity:ty) => {
163
impl ToIdx for $ty {
164
#[inline]
165
fn to_idx(self, len: u64) -> IdxSize {
166
let idx = self as $ity;
167
if idx < 0 {
168
(idx + len as $ity) as IdxSize
169
} else {
170
idx as IdxSize
171
}
172
}
173
}
174
};
175
}
176
177
impl_to_idx!(u8);
178
impl_to_idx!(u16);
179
impl_to_idx!(u32);
180
impl_to_idx!(u64);
181
impl_to_idx!(i8, i16);
182
impl_to_idx!(i16, i32);
183
impl_to_idx!(i32, i64);
184
impl_to_idx!(i64, i64);
185
186
// Allows for 2^24 (~16M) chunks
187
// Leaves 2^40 (~1T) rows per chunk
188
const DEFAULT_CHUNK_BITS: u64 = 24;
189
190
#[derive(Clone, Copy)]
191
#[repr(transparent)]
192
pub struct ChunkId<const CHUNK_BITS: u64 = DEFAULT_CHUNK_BITS> {
193
swizzled: u64,
194
}
195
196
impl<const CHUNK_BITS: u64> Debug for ChunkId<CHUNK_BITS> {
197
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
198
if self.is_null() {
199
write!(f, "NULL")
200
} else {
201
let (chunk, row) = self.extract();
202
write!(f, "({chunk}, {row})")
203
}
204
}
205
}
206
207
impl<const CHUNK_BITS: u64> ChunkId<CHUNK_BITS> {
208
#[inline(always)]
209
pub const fn null() -> Self {
210
Self { swizzled: u64::MAX }
211
}
212
213
#[inline(always)]
214
pub fn is_null(&self) -> bool {
215
self.swizzled == u64::MAX
216
}
217
218
#[inline(always)]
219
#[allow(clippy::unnecessary_cast)]
220
pub fn store(chunk: IdxSize, row: IdxSize) -> Self {
221
debug_assert!(chunk < !(u64::MAX << CHUNK_BITS) as IdxSize);
222
let swizzled = ((row as u64) << CHUNK_BITS) | chunk as u64;
223
224
Self { swizzled }
225
}
226
227
#[inline(always)]
228
#[allow(clippy::unnecessary_cast)]
229
pub fn extract(self) -> (IdxSize, IdxSize) {
230
let row = (self.swizzled >> CHUNK_BITS) as IdxSize;
231
let mask = (1u64 << CHUNK_BITS) - 1;
232
let chunk = (self.swizzled & mask) as IdxSize;
233
(chunk, row)
234
}
235
236
#[inline(always)]
237
pub fn inner_mut(&mut self) -> &mut u64 {
238
&mut self.swizzled
239
}
240
241
pub fn from_inner(inner: u64) -> Self {
242
Self { swizzled: inner }
243
}
244
245
pub fn into_inner(self) -> u64 {
246
self.swizzled
247
}
248
}
249
250
#[cfg(test)]
251
mod test {
252
use super::*;
253
254
#[test]
255
fn test_chunk_idx() {
256
let chunk = 213908;
257
let row = 813457;
258
259
let ci: ChunkId = ChunkId::store(chunk, row);
260
let (c, r) = ci.extract();
261
262
assert_eq!(c, chunk);
263
assert_eq!(r, row);
264
}
265
}
266
267