Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/utils/mod.rs
6940 views
1
mod any_value;
2
use arrow::compute::concatenate::concatenate_validities;
3
use arrow::compute::utils::combine_validities_and;
4
pub mod flatten;
5
pub(crate) mod series;
6
mod supertype;
7
use std::borrow::Cow;
8
use std::ops::{Deref, DerefMut};
9
mod schema;
10
11
pub use any_value::*;
12
use arrow::bitmap::Bitmap;
13
pub use arrow::legacy::utils::*;
14
pub use arrow::trusted_len::TrustMyLength;
15
use flatten::*;
16
use num_traits::{One, Zero};
17
use rayon::prelude::*;
18
pub use schema::*;
19
pub use series::*;
20
pub use supertype::*;
21
pub use {arrow, rayon};
22
23
use crate::POOL;
24
use crate::prelude::*;
25
26
#[repr(transparent)]
27
pub struct Wrap<T>(pub T);
28
29
impl<T> Deref for Wrap<T> {
30
type Target = T;
31
fn deref(&self) -> &Self::Target {
32
&self.0
33
}
34
}
35
36
#[inline(always)]
37
pub fn _set_partition_size() -> usize {
38
POOL.current_num_threads()
39
}
40
41
/// Just a wrapper structure which is useful for certain impl specializations.
42
///
43
/// This is for instance use to implement
44
/// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
45
/// as `Option<T::Native>` was already implemented:
46
/// `impl<T> FromIterator<Option<T::Native>> for ChunkedArray<T>`
47
pub struct NoNull<T> {
48
inner: T,
49
}
50
51
impl<T> NoNull<T> {
52
pub fn new(inner: T) -> Self {
53
NoNull { inner }
54
}
55
56
pub fn into_inner(self) -> T {
57
self.inner
58
}
59
}
60
61
impl<T> Deref for NoNull<T> {
62
type Target = T;
63
64
fn deref(&self) -> &Self::Target {
65
&self.inner
66
}
67
}
68
69
impl<T> DerefMut for NoNull<T> {
70
fn deref_mut(&mut self) -> &mut Self::Target {
71
&mut self.inner
72
}
73
}
74
75
pub(crate) fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
76
match iter.size_hint() {
77
(_lower, Some(upper)) => upper,
78
(0, None) => 1024,
79
(lower, None) => lower,
80
}
81
}
82
83
// prefer this one over split_ca, as this can push the null_count into the thread pool
84
// returns an `(offset, length)` tuple
85
#[doc(hidden)]
86
pub fn _split_offsets(len: usize, n: usize) -> Vec<(usize, usize)> {
87
if n == 1 {
88
vec![(0, len)]
89
} else {
90
let chunk_size = len / n;
91
92
(0..n)
93
.map(|partition| {
94
let offset = partition * chunk_size;
95
let len = if partition == (n - 1) {
96
len - offset
97
} else {
98
chunk_size
99
};
100
(partition * chunk_size, len)
101
})
102
.collect_trusted()
103
}
104
}
105
106
#[allow(clippy::len_without_is_empty)]
107
pub trait Container: Clone {
108
fn slice(&self, offset: i64, len: usize) -> Self;
109
110
fn split_at(&self, offset: i64) -> (Self, Self);
111
112
fn len(&self) -> usize;
113
114
fn iter_chunks(&self) -> impl Iterator<Item = Self>;
115
116
fn should_rechunk(&self) -> bool;
117
118
fn n_chunks(&self) -> usize;
119
120
fn chunk_lengths(&self) -> impl Iterator<Item = usize>;
121
}
122
123
impl Container for DataFrame {
124
fn slice(&self, offset: i64, len: usize) -> Self {
125
DataFrame::slice(self, offset, len)
126
}
127
128
fn split_at(&self, offset: i64) -> (Self, Self) {
129
DataFrame::split_at(self, offset)
130
}
131
132
fn len(&self) -> usize {
133
self.height()
134
}
135
136
fn iter_chunks(&self) -> impl Iterator<Item = Self> {
137
flatten_df_iter(self)
138
}
139
140
fn should_rechunk(&self) -> bool {
141
self.should_rechunk()
142
}
143
144
fn n_chunks(&self) -> usize {
145
DataFrame::first_col_n_chunks(self)
146
}
147
148
fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
149
// @scalar-correctness?
150
self.columns[0].as_materialized_series().chunk_lengths()
151
}
152
}
153
154
impl<T: PolarsDataType> Container for ChunkedArray<T> {
155
fn slice(&self, offset: i64, len: usize) -> Self {
156
ChunkedArray::slice(self, offset, len)
157
}
158
159
fn split_at(&self, offset: i64) -> (Self, Self) {
160
ChunkedArray::split_at(self, offset)
161
}
162
163
fn len(&self) -> usize {
164
ChunkedArray::len(self)
165
}
166
167
fn iter_chunks(&self) -> impl Iterator<Item = Self> {
168
self.downcast_iter()
169
.map(|arr| Self::with_chunk(self.name().clone(), arr.clone()))
170
}
171
172
fn should_rechunk(&self) -> bool {
173
false
174
}
175
176
fn n_chunks(&self) -> usize {
177
self.chunks().len()
178
}
179
180
fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
181
ChunkedArray::chunk_lengths(self)
182
}
183
}
184
185
impl Container for Series {
186
fn slice(&self, offset: i64, len: usize) -> Self {
187
self.0.slice(offset, len)
188
}
189
190
fn split_at(&self, offset: i64) -> (Self, Self) {
191
self.0.split_at(offset)
192
}
193
194
fn len(&self) -> usize {
195
self.0.len()
196
}
197
198
fn iter_chunks(&self) -> impl Iterator<Item = Self> {
199
(0..self.0.n_chunks()).map(|i| self.select_chunk(i))
200
}
201
202
fn should_rechunk(&self) -> bool {
203
false
204
}
205
206
fn n_chunks(&self) -> usize {
207
self.chunks().len()
208
}
209
210
fn chunk_lengths(&self) -> impl Iterator<Item = usize> {
211
self.0.chunk_lengths()
212
}
213
}
214
215
fn split_impl<C: Container>(container: &C, target: usize, chunk_size: usize) -> Vec<C> {
216
if target == 1 {
217
return vec![container.clone()];
218
}
219
let mut out = Vec::with_capacity(target);
220
let chunk_size = chunk_size as i64;
221
222
// First split
223
let (chunk, mut remainder) = container.split_at(chunk_size);
224
out.push(chunk);
225
226
// Take the rest of the splits of exactly chunk size, but skip the last remainder as we won't split that.
227
for _ in 1..target - 1 {
228
let (a, b) = remainder.split_at(chunk_size);
229
out.push(a);
230
remainder = b
231
}
232
// This can be slightly larger than `chunk_size`, but is smaller than `2 * chunk_size`.
233
out.push(remainder);
234
out
235
}
236
237
/// Splits, but doesn't flatten chunks. E.g. a container can still have multiple chunks.
238
pub fn split<C: Container>(container: &C, target: usize) -> Vec<C> {
239
let total_len = container.len();
240
if total_len == 0 {
241
return vec![container.clone()];
242
}
243
244
let chunk_size = std::cmp::max(total_len / target, 1);
245
246
if container.n_chunks() == target
247
&& container
248
.chunk_lengths()
249
.all(|len| len.abs_diff(chunk_size) < 100)
250
// We cannot get chunks if they are misaligned
251
&& !container.should_rechunk()
252
{
253
return container.iter_chunks().collect();
254
}
255
split_impl(container, target, chunk_size)
256
}
257
258
/// Split a [`Container`] in `target` elements. The target doesn't have to be respected if not
259
/// Deviation of the target might be done to create more equal size chunks.
260
pub fn split_and_flatten<C: Container>(container: &C, target: usize) -> Vec<C> {
261
let total_len = container.len();
262
if total_len == 0 {
263
return vec![container.clone()];
264
}
265
266
let chunk_size = std::cmp::max(total_len / target, 1);
267
268
if container.n_chunks() == target
269
&& container
270
.chunk_lengths()
271
.all(|len| len.abs_diff(chunk_size) < 100)
272
// We cannot get chunks if they are misaligned
273
&& !container.should_rechunk()
274
{
275
return container.iter_chunks().collect();
276
}
277
278
if container.n_chunks() == 1 {
279
split_impl(container, target, chunk_size)
280
} else {
281
let mut out = Vec::with_capacity(target);
282
let chunks = container.iter_chunks();
283
284
'new_chunk: for mut chunk in chunks {
285
loop {
286
let h = chunk.len();
287
if h < chunk_size {
288
// TODO if the chunk is much smaller than chunk size, we should try to merge it with the next one.
289
out.push(chunk);
290
continue 'new_chunk;
291
}
292
293
// If a split leads to the next chunk being smaller than 30% take the whole chunk
294
if ((h - chunk_size) as f64 / chunk_size as f64) < 0.3 {
295
out.push(chunk);
296
continue 'new_chunk;
297
}
298
299
let (a, b) = chunk.split_at(chunk_size as i64);
300
out.push(a);
301
chunk = b;
302
}
303
}
304
out
305
}
306
}
307
308
/// Split a [`DataFrame`] in `target` elements. The target doesn't have to be respected if not
309
/// strict. Deviation of the target might be done to create more equal size chunks.
310
///
311
/// # Panics
312
/// if chunks are not aligned
313
pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
314
if strict {
315
split(df, target)
316
} else {
317
split_and_flatten(df, target)
318
}
319
}
320
321
#[doc(hidden)]
322
/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
323
/// `strict` in that it respects `n` even if the chunks are suboptimal.
324
pub fn split_df(df: &mut DataFrame, target: usize, strict: bool) -> Vec<DataFrame> {
325
if target == 0 || df.is_empty() {
326
return vec![df.clone()];
327
}
328
// make sure that chunks are aligned.
329
df.align_chunks_par();
330
split_df_as_ref(df, target, strict)
331
}
332
333
pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
334
let (raw_offset, slice_len) = slice_offsets(offset, len, vals.len());
335
&vals[raw_offset..raw_offset + slice_len]
336
}
337
338
#[inline]
339
pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
340
let signed_start_offset = if offset < 0 {
341
offset.saturating_add_unsigned(array_len as u64)
342
} else {
343
offset
344
};
345
let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);
346
347
let signed_array_len: i64 = array_len
348
.try_into()
349
.expect("array length larger than i64::MAX");
350
let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
351
let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);
352
353
let slice_start_idx = clamped_start_offset as usize;
354
let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
355
(slice_start_idx, slice_len)
356
}
357
358
/// Apply a macro on the Series
359
#[macro_export]
360
macro_rules! match_dtype_to_physical_apply_macro {
361
($obj:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
362
match $obj {
363
DataType::String => $macro_string!($($opt_args)*),
364
DataType::Boolean => $macro_bool!($($opt_args)*),
365
#[cfg(feature = "dtype-u8")]
366
DataType::UInt8 => $macro!(u8 $(, $opt_args)*),
367
#[cfg(feature = "dtype-u16")]
368
DataType::UInt16 => $macro!(u16 $(, $opt_args)*),
369
DataType::UInt32 => $macro!(u32 $(, $opt_args)*),
370
DataType::UInt64 => $macro!(u64 $(, $opt_args)*),
371
#[cfg(feature = "dtype-i8")]
372
DataType::Int8 => $macro!(i8 $(, $opt_args)*),
373
#[cfg(feature = "dtype-i16")]
374
DataType::Int16 => $macro!(i16 $(, $opt_args)*),
375
DataType::Int32 => $macro!(i32 $(, $opt_args)*),
376
DataType::Int64 => $macro!(i64 $(, $opt_args)*),
377
#[cfg(feature = "dtype-i128")]
378
DataType::Int128 => $macro!(i128 $(, $opt_args)*),
379
DataType::Float32 => $macro!(f32 $(, $opt_args)*),
380
DataType::Float64 => $macro!(f64 $(, $opt_args)*),
381
dt => panic!("not implemented for dtype {:?}", dt),
382
}
383
}};
384
}
385
386
/// Apply a macro on the Series
387
#[macro_export]
388
macro_rules! match_dtype_to_logical_apply_macro {
389
($obj:expr, $macro:ident, $macro_string:ident, $macro_binary:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
390
match $obj {
391
DataType::String => $macro_string!($($opt_args)*),
392
DataType::Binary => $macro_binary!($($opt_args)*),
393
DataType::Boolean => $macro_bool!($($opt_args)*),
394
#[cfg(feature = "dtype-u8")]
395
DataType::UInt8 => $macro!(UInt8Type $(, $opt_args)*),
396
#[cfg(feature = "dtype-u16")]
397
DataType::UInt16 => $macro!(UInt16Type $(, $opt_args)*),
398
DataType::UInt32 => $macro!(UInt32Type $(, $opt_args)*),
399
DataType::UInt64 => $macro!(UInt64Type $(, $opt_args)*),
400
#[cfg(feature = "dtype-i8")]
401
DataType::Int8 => $macro!(Int8Type $(, $opt_args)*),
402
#[cfg(feature = "dtype-i16")]
403
DataType::Int16 => $macro!(Int16Type $(, $opt_args)*),
404
DataType::Int32 => $macro!(Int32Type $(, $opt_args)*),
405
DataType::Int64 => $macro!(Int64Type $(, $opt_args)*),
406
#[cfg(feature = "dtype-i128")]
407
DataType::Int128 => $macro!(Int128Type $(, $opt_args)*),
408
DataType::Float32 => $macro!(Float32Type $(, $opt_args)*),
409
DataType::Float64 => $macro!(Float64Type $(, $opt_args)*),
410
dt => panic!("not implemented for dtype {:?}", dt),
411
}
412
}};
413
}
414
415
/// Apply a macro on the Downcasted ChunkedArrays
416
#[macro_export]
417
macro_rules! match_arrow_dtype_apply_macro_ca {
418
($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{
419
match $self.dtype() {
420
DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*),
421
DataType::Boolean => $macro_bool!($self.bool().unwrap() $(, $opt_args)*),
422
#[cfg(feature = "dtype-u8")]
423
DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
424
#[cfg(feature = "dtype-u16")]
425
DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
426
DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
427
DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
428
#[cfg(feature = "dtype-i8")]
429
DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
430
#[cfg(feature = "dtype-i16")]
431
DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
432
DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
433
DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
434
#[cfg(feature = "dtype-i128")]
435
DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
436
DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
437
DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
438
dt => panic!("not implemented for dtype {:?}", dt),
439
}
440
}};
441
}
442
443
#[macro_export]
444
macro_rules! with_match_physical_numeric_type {(
445
$dtype:expr, | $_:tt $T:ident | $($body:tt)*
446
) => ({
447
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
448
use $crate::datatypes::DataType::*;
449
match $dtype {
450
#[cfg(feature = "dtype-i8")]
451
Int8 => __with_ty__! { i8 },
452
#[cfg(feature = "dtype-i16")]
453
Int16 => __with_ty__! { i16 },
454
Int32 => __with_ty__! { i32 },
455
Int64 => __with_ty__! { i64 },
456
#[cfg(feature = "dtype-i128")]
457
Int128 => __with_ty__! { i128 },
458
#[cfg(feature = "dtype-u8")]
459
UInt8 => __with_ty__! { u8 },
460
#[cfg(feature = "dtype-u16")]
461
UInt16 => __with_ty__! { u16 },
462
UInt32 => __with_ty__! { u32 },
463
UInt64 => __with_ty__! { u64 },
464
Float32 => __with_ty__! { f32 },
465
Float64 => __with_ty__! { f64 },
466
dt => panic!("not implemented for dtype {:?}", dt),
467
}
468
})}
469
470
#[macro_export]
471
macro_rules! with_match_physical_integer_type {(
472
$dtype:expr, | $_:tt $T:ident | $($body:tt)*
473
) => ({
474
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
475
use $crate::datatypes::DataType::*;
476
match $dtype {
477
#[cfg(feature = "dtype-i8")]
478
Int8 => __with_ty__! { i8 },
479
#[cfg(feature = "dtype-i16")]
480
Int16 => __with_ty__! { i16 },
481
Int32 => __with_ty__! { i32 },
482
Int64 => __with_ty__! { i64 },
483
#[cfg(feature = "dtype-i128")]
484
Int128 => __with_ty__! { i128 },
485
#[cfg(feature = "dtype-u8")]
486
UInt8 => __with_ty__! { u8 },
487
#[cfg(feature = "dtype-u16")]
488
UInt16 => __with_ty__! { u16 },
489
UInt32 => __with_ty__! { u32 },
490
UInt64 => __with_ty__! { u64 },
491
dt => panic!("not implemented for dtype {:?}", dt),
492
}
493
})}
494
495
#[macro_export]
496
macro_rules! with_match_physical_float_type {(
497
$dtype:expr, | $_:tt $T:ident | $($body:tt)*
498
) => ({
499
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
500
use $crate::datatypes::DataType::*;
501
match $dtype {
502
Float32 => __with_ty__! { f32 },
503
Float64 => __with_ty__! { f64 },
504
dt => panic!("not implemented for dtype {:?}", dt),
505
}
506
})}
507
508
#[macro_export]
509
macro_rules! with_match_physical_float_polars_type {(
510
$key_type:expr, | $_:tt $T:ident | $($body:tt)*
511
) => ({
512
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
513
use $crate::datatypes::DataType::*;
514
match $key_type {
515
Float32 => __with_ty__! { Float32Type },
516
Float64 => __with_ty__! { Float64Type },
517
dt => panic!("not implemented for dtype {:?}", dt),
518
}
519
})}
520
521
#[macro_export]
522
macro_rules! with_match_physical_numeric_polars_type {(
523
$key_type:expr, | $_:tt $T:ident | $($body:tt)*
524
) => ({
525
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
526
use $crate::datatypes::DataType::*;
527
match $key_type {
528
#[cfg(feature = "dtype-i8")]
529
Int8 => __with_ty__! { Int8Type },
530
#[cfg(feature = "dtype-i16")]
531
Int16 => __with_ty__! { Int16Type },
532
Int32 => __with_ty__! { Int32Type },
533
Int64 => __with_ty__! { Int64Type },
534
#[cfg(feature = "dtype-i128")]
535
Int128 => __with_ty__! { Int128Type },
536
#[cfg(feature = "dtype-u8")]
537
UInt8 => __with_ty__! { UInt8Type },
538
#[cfg(feature = "dtype-u16")]
539
UInt16 => __with_ty__! { UInt16Type },
540
UInt32 => __with_ty__! { UInt32Type },
541
UInt64 => __with_ty__! { UInt64Type },
542
Float32 => __with_ty__! { Float32Type },
543
Float64 => __with_ty__! { Float64Type },
544
dt => panic!("not implemented for dtype {:?}", dt),
545
}
546
})}
547
548
#[macro_export]
549
macro_rules! with_match_physical_integer_polars_type {(
550
$key_type:expr, | $_:tt $T:ident | $($body:tt)*
551
) => ({
552
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
553
use $crate::datatypes::DataType::*;
554
use $crate::datatypes::*;
555
match $key_type {
556
#[cfg(feature = "dtype-i8")]
557
Int8 => __with_ty__! { Int8Type },
558
#[cfg(feature = "dtype-i16")]
559
Int16 => __with_ty__! { Int16Type },
560
Int32 => __with_ty__! { Int32Type },
561
Int64 => __with_ty__! { Int64Type },
562
#[cfg(feature = "dtype-i128")]
563
Int128 => __with_ty__! { Int128Type },
564
#[cfg(feature = "dtype-u8")]
565
UInt8 => __with_ty__! { UInt8Type },
566
#[cfg(feature = "dtype-u16")]
567
UInt16 => __with_ty__! { UInt16Type },
568
UInt32 => __with_ty__! { UInt32Type },
569
UInt64 => __with_ty__! { UInt64Type },
570
dt => panic!("not implemented for dtype {:?}", dt),
571
}
572
})}
573
574
#[macro_export]
575
macro_rules! with_match_categorical_physical_type {(
576
$dtype:expr, | $_:tt $T:ident | $($body:tt)*
577
) => ({
578
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
579
match $dtype {
580
CategoricalPhysical::U8 => __with_ty__! { Categorical8Type },
581
CategoricalPhysical::U16 => __with_ty__! { Categorical16Type },
582
CategoricalPhysical::U32 => __with_ty__! { Categorical32Type },
583
}
584
})}
585
586
/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
587
/// So no logical.
588
#[macro_export]
589
macro_rules! downcast_as_macro_arg_physical {
590
($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
591
match $self.dtype() {
592
#[cfg(feature = "dtype-u8")]
593
DataType::UInt8 => $macro!($self.u8().unwrap() $(, $opt_args)*),
594
#[cfg(feature = "dtype-u16")]
595
DataType::UInt16 => $macro!($self.u16().unwrap() $(, $opt_args)*),
596
DataType::UInt32 => $macro!($self.u32().unwrap() $(, $opt_args)*),
597
DataType::UInt64 => $macro!($self.u64().unwrap() $(, $opt_args)*),
598
#[cfg(feature = "dtype-i8")]
599
DataType::Int8 => $macro!($self.i8().unwrap() $(, $opt_args)*),
600
#[cfg(feature = "dtype-i16")]
601
DataType::Int16 => $macro!($self.i16().unwrap() $(, $opt_args)*),
602
DataType::Int32 => $macro!($self.i32().unwrap() $(, $opt_args)*),
603
DataType::Int64 => $macro!($self.i64().unwrap() $(, $opt_args)*),
604
#[cfg(feature = "dtype-i128")]
605
DataType::Int128 => $macro!($self.i128().unwrap() $(, $opt_args)*),
606
DataType::Float32 => $macro!($self.f32().unwrap() $(, $opt_args)*),
607
DataType::Float64 => $macro!($self.f64().unwrap() $(, $opt_args)*),
608
dt => panic!("not implemented for {:?}", dt),
609
}
610
}};
611
}
612
613
/// Apply a macro on the Downcasted ChunkedArrays of DataTypes that are logical numerics.
614
/// So no logical.
615
#[macro_export]
616
macro_rules! downcast_as_macro_arg_physical_mut {
617
($self:expr, $macro:ident $(, $opt_args:expr)*) => {{
618
// clone so that we do not borrow
619
match $self.dtype().clone() {
620
#[cfg(feature = "dtype-u8")]
621
DataType::UInt8 => {
622
let ca: &mut UInt8Chunked = $self.as_mut();
623
$macro!(UInt8Type, ca $(, $opt_args)*)
624
},
625
#[cfg(feature = "dtype-u16")]
626
DataType::UInt16 => {
627
let ca: &mut UInt16Chunked = $self.as_mut();
628
$macro!(UInt16Type, ca $(, $opt_args)*)
629
},
630
DataType::UInt32 => {
631
let ca: &mut UInt32Chunked = $self.as_mut();
632
$macro!(UInt32Type, ca $(, $opt_args)*)
633
},
634
DataType::UInt64 => {
635
let ca: &mut UInt64Chunked = $self.as_mut();
636
$macro!(UInt64Type, ca $(, $opt_args)*)
637
},
638
#[cfg(feature = "dtype-i8")]
639
DataType::Int8 => {
640
let ca: &mut Int8Chunked = $self.as_mut();
641
$macro!(Int8Type, ca $(, $opt_args)*)
642
},
643
#[cfg(feature = "dtype-i16")]
644
DataType::Int16 => {
645
let ca: &mut Int16Chunked = $self.as_mut();
646
$macro!(Int16Type, ca $(, $opt_args)*)
647
},
648
DataType::Int32 => {
649
let ca: &mut Int32Chunked = $self.as_mut();
650
$macro!(Int32Type, ca $(, $opt_args)*)
651
},
652
DataType::Int64 => {
653
let ca: &mut Int64Chunked = $self.as_mut();
654
$macro!(Int64Type, ca $(, $opt_args)*)
655
},
656
#[cfg(feature = "dtype-i128")]
657
DataType::Int128 => {
658
let ca: &mut Int128Chunked = $self.as_mut();
659
$macro!(Int128Type, ca $(, $opt_args)*)
660
},
661
DataType::Float32 => {
662
let ca: &mut Float32Chunked = $self.as_mut();
663
$macro!(Float32Type, ca $(, $opt_args)*)
664
},
665
DataType::Float64 => {
666
let ca: &mut Float64Chunked = $self.as_mut();
667
$macro!(Float64Type, ca $(, $opt_args)*)
668
},
669
dt => panic!("not implemented for {:?}", dt),
670
}
671
}};
672
}
673
674
#[macro_export]
675
macro_rules! apply_method_all_arrow_series {
676
($self:expr, $method:ident, $($args:expr),*) => {
677
match $self.dtype() {
678
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
679
DataType::String => $self.str().unwrap().$method($($args),*),
680
#[cfg(feature = "dtype-u8")]
681
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
682
#[cfg(feature = "dtype-u16")]
683
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
684
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
685
DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
686
#[cfg(feature = "dtype-i8")]
687
DataType::Int8 => $self.i8().unwrap().$method($($args),*),
688
#[cfg(feature = "dtype-i16")]
689
DataType::Int16 => $self.i16().unwrap().$method($($args),*),
690
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
691
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
692
#[cfg(feature = "dtype-i128")]
693
DataType::Int128 => $self.i128().unwrap().$method($($args),*),
694
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
695
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
696
DataType::Time => $self.time().unwrap().$method($($args),*),
697
DataType::Date => $self.date().unwrap().$method($($args),*),
698
DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
699
DataType::List(_) => $self.list().unwrap().$method($($args),*),
700
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
701
dt => panic!("dtype {:?} not supported", dt)
702
}
703
}
704
}
705
706
#[macro_export]
707
macro_rules! apply_method_physical_integer {
708
($self:expr, $method:ident, $($args:expr),*) => {
709
match $self.dtype() {
710
#[cfg(feature = "dtype-u8")]
711
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
712
#[cfg(feature = "dtype-u16")]
713
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
714
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
715
DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
716
#[cfg(feature = "dtype-i8")]
717
DataType::Int8 => $self.i8().unwrap().$method($($args),*),
718
#[cfg(feature = "dtype-i16")]
719
DataType::Int16 => $self.i16().unwrap().$method($($args),*),
720
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
721
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
722
#[cfg(feature = "dtype-i128")]
723
DataType::Int128 => $self.i128().unwrap().$method($($args),*),
724
dt => panic!("not implemented for dtype {:?}", dt),
725
}
726
}
727
}
728
729
// doesn't include Bool and String
730
#[macro_export]
731
macro_rules! apply_method_physical_numeric {
732
($self:expr, $method:ident, $($args:expr),*) => {
733
match $self.dtype() {
734
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
735
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
736
_ => apply_method_physical_integer!($self, $method, $($args),*),
737
}
738
}
739
}
740
741
#[macro_export]
742
macro_rules! df {
743
($($col_name:expr => $slice:expr), + $(,)?) => {
744
$crate::prelude::DataFrame::new(vec![
745
$($crate::prelude::Column::from(<$crate::prelude::Series as $crate::prelude::NamedFrom::<_, _>>::new($col_name.into(), $slice)),)+
746
])
747
}
748
}
749
750
pub fn get_time_units(tu_l: &TimeUnit, tu_r: &TimeUnit) -> TimeUnit {
751
use crate::datatypes::time_unit::TimeUnit::*;
752
match (tu_l, tu_r) {
753
(Nanoseconds, Microseconds) => Microseconds,
754
(_, Milliseconds) => Milliseconds,
755
_ => *tu_l,
756
}
757
}
758
759
#[cold]
760
#[inline(never)]
761
fn width_mismatch(df1: &DataFrame, df2: &DataFrame) -> PolarsError {
762
let mut df1_extra = Vec::new();
763
let mut df2_extra = Vec::new();
764
765
let s1 = df1.schema();
766
let s2 = df2.schema();
767
768
s1.field_compare(s2, &mut df1_extra, &mut df2_extra);
769
770
let df1_extra = df1_extra
771
.into_iter()
772
.map(|(_, (n, _))| n.as_str())
773
.collect::<Vec<_>>()
774
.join(", ");
775
let df2_extra = df2_extra
776
.into_iter()
777
.map(|(_, (n, _))| n.as_str())
778
.collect::<Vec<_>>()
779
.join(", ");
780
781
polars_err!(
782
SchemaMismatch: r#"unable to vstack, dataframes have different widths ({} != {}).
783
One dataframe has additional columns: [{df1_extra}].
784
Other dataframe has additional columns: [{df2_extra}]."#,
785
df1.width(),
786
df2.width(),
787
)
788
}
789
790
pub fn accumulate_dataframes_vertical_unchecked_optional<I>(dfs: I) -> Option<DataFrame>
791
where
792
I: IntoIterator<Item = DataFrame>,
793
{
794
let mut iter = dfs.into_iter();
795
let additional = iter.size_hint().0;
796
let mut acc_df = iter.next()?;
797
acc_df.reserve_chunks(additional);
798
799
for df in iter {
800
if acc_df.width() != df.width() {
801
panic!("{}", width_mismatch(&acc_df, &df));
802
}
803
804
acc_df.vstack_mut_owned_unchecked(df);
805
}
806
Some(acc_df)
807
}
808
809
/// This takes ownership of the DataFrame so that drop is called earlier.
810
/// Does not check if schema is correct
811
pub fn accumulate_dataframes_vertical_unchecked<I>(dfs: I) -> DataFrame
812
where
813
I: IntoIterator<Item = DataFrame>,
814
{
815
let mut iter = dfs.into_iter();
816
let additional = iter.size_hint().0;
817
let mut acc_df = iter.next().unwrap();
818
acc_df.reserve_chunks(additional);
819
820
for df in iter {
821
if acc_df.width() != df.width() {
822
panic!("{}", width_mismatch(&acc_df, &df));
823
}
824
825
acc_df.vstack_mut_owned_unchecked(df);
826
}
827
acc_df
828
}
829
830
/// This takes ownership of the DataFrame so that drop is called earlier.
831
/// # Panics
832
/// Panics if `dfs` is empty.
833
pub fn accumulate_dataframes_vertical<I>(dfs: I) -> PolarsResult<DataFrame>
834
where
835
I: IntoIterator<Item = DataFrame>,
836
{
837
let mut iter = dfs.into_iter();
838
let additional = iter.size_hint().0;
839
let mut acc_df = iter.next().unwrap();
840
acc_df.reserve_chunks(additional);
841
for df in iter {
842
if acc_df.width() != df.width() {
843
return Err(width_mismatch(&acc_df, &df));
844
}
845
846
acc_df.vstack_mut_owned(df)?;
847
}
848
849
Ok(acc_df)
850
}
851
852
/// Concat the DataFrames to a single DataFrame.
853
pub fn concat_df<'a, I>(dfs: I) -> PolarsResult<DataFrame>
854
where
855
I: IntoIterator<Item = &'a DataFrame>,
856
{
857
let mut iter = dfs.into_iter();
858
let additional = iter.size_hint().0;
859
let mut acc_df = iter.next().unwrap().clone();
860
acc_df.reserve_chunks(additional);
861
for df in iter {
862
acc_df.vstack_mut(df)?;
863
}
864
Ok(acc_df)
865
}
866
867
/// Concat the DataFrames to a single DataFrame.
868
pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFrame
869
where
870
I: IntoIterator<Item = &'a DataFrame>,
871
{
872
let mut iter = dfs.into_iter();
873
let additional = iter.size_hint().0;
874
let mut acc_df = iter.next().unwrap().clone();
875
acc_df.reserve_chunks(additional);
876
for df in iter {
877
acc_df.vstack_mut_unchecked(df);
878
}
879
acc_df
880
}
881
882
pub fn accumulate_dataframes_horizontal(dfs: Vec<DataFrame>) -> PolarsResult<DataFrame> {
883
let mut iter = dfs.into_iter();
884
let mut acc_df = iter.next().unwrap();
885
for df in iter {
886
acc_df.hstack_mut(df.get_columns())?;
887
}
888
Ok(acc_df)
889
}
890
891
/// Ensure the chunks in both ChunkedArrays have the same length.
892
/// # Panics
893
/// This will panic if `left.len() != right.len()` and array is chunked.
894
pub fn align_chunks_binary<'a, T, B>(
895
left: &'a ChunkedArray<T>,
896
right: &'a ChunkedArray<B>,
897
) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<B>>)
898
where
899
B: PolarsDataType,
900
T: PolarsDataType,
901
{
902
let assert = || {
903
assert_eq!(
904
left.len(),
905
right.len(),
906
"expected arrays of the same length"
907
)
908
};
909
match (left.chunks.len(), right.chunks.len()) {
910
// All chunks are equal length
911
(1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
912
// All chunks are equal length
913
(a, b)
914
if a == b
915
&& left
916
.chunk_lengths()
917
.zip(right.chunk_lengths())
918
.all(|(l, r)| l == r) =>
919
{
920
(Cow::Borrowed(left), Cow::Borrowed(right))
921
},
922
(_, 1) => {
923
assert();
924
(
925
Cow::Borrowed(left),
926
Cow::Owned(right.match_chunks(left.chunk_lengths())),
927
)
928
},
929
(1, _) => {
930
assert();
931
(
932
Cow::Owned(left.match_chunks(right.chunk_lengths())),
933
Cow::Borrowed(right),
934
)
935
},
936
(_, _) => {
937
assert();
938
// could optimize to choose to rechunk a primitive and not a string or list type
939
let left = left.rechunk();
940
(
941
Cow::Owned(left.match_chunks(right.chunk_lengths())),
942
Cow::Borrowed(right),
943
)
944
},
945
}
946
}
947
948
/// Ensure the chunks in ChunkedArray and Series have the same length.
949
/// # Panics
950
/// This will panic if `left.len() != right.len()` and array is chunked.
951
pub fn align_chunks_binary_ca_series<'a, T>(
952
left: &'a ChunkedArray<T>,
953
right: &'a Series,
954
) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, Series>)
955
where
956
T: PolarsDataType,
957
{
958
let assert = || {
959
assert_eq!(
960
left.len(),
961
right.len(),
962
"expected arrays of the same length"
963
)
964
};
965
match (left.chunks.len(), right.chunks().len()) {
966
// All chunks are equal length
967
(1, 1) => (Cow::Borrowed(left), Cow::Borrowed(right)),
968
// All chunks are equal length
969
(a, b)
970
if a == b
971
&& left
972
.chunk_lengths()
973
.zip(right.chunk_lengths())
974
.all(|(l, r)| l == r) =>
975
{
976
assert();
977
(Cow::Borrowed(left), Cow::Borrowed(right))
978
},
979
(_, 1) => (left.rechunk(), Cow::Borrowed(right)),
980
(1, _) => (Cow::Borrowed(left), Cow::Owned(right.rechunk())),
981
(_, _) => {
982
assert();
983
(left.rechunk(), Cow::Owned(right.rechunk()))
984
},
985
}
986
}
987
988
#[cfg(feature = "performant")]
989
pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
990
match (left.chunks().len(), right.chunks().len()) {
991
(1, 1) => (left, right),
992
// All chunks are equal length
993
(a, b)
994
if a == b
995
&& left
996
.chunk_lengths()
997
.zip(right.chunk_lengths())
998
.all(|(l, r)| l == r) =>
999
{
1000
(left, right)
1001
},
1002
(_, 1) => (left.rechunk(), right),
1003
(1, _) => (left, right.rechunk()),
1004
(_, _) => (left.rechunk(), right.rechunk()),
1005
}
1006
}
1007
1008
pub(crate) fn align_chunks_binary_owned<T, B>(
1009
left: ChunkedArray<T>,
1010
right: ChunkedArray<B>,
1011
) -> (ChunkedArray<T>, ChunkedArray<B>)
1012
where
1013
B: PolarsDataType,
1014
T: PolarsDataType,
1015
{
1016
match (left.chunks.len(), right.chunks.len()) {
1017
(1, 1) => (left, right),
1018
// All chunks are equal length
1019
(a, b)
1020
if a == b
1021
&& left
1022
.chunk_lengths()
1023
.zip(right.chunk_lengths())
1024
.all(|(l, r)| l == r) =>
1025
{
1026
(left, right)
1027
},
1028
(_, 1) => (left.rechunk().into_owned(), right),
1029
(1, _) => (left, right.rechunk().into_owned()),
1030
(_, _) => (left.rechunk().into_owned(), right.rechunk().into_owned()),
1031
}
1032
}
1033
1034
/// # Panics
1035
/// This will panic if `a.len() != b.len() || b.len() != c.len()` and array is chunked.
1036
#[allow(clippy::type_complexity)]
1037
pub fn align_chunks_ternary<'a, A, B, C>(
1038
a: &'a ChunkedArray<A>,
1039
b: &'a ChunkedArray<B>,
1040
c: &'a ChunkedArray<C>,
1041
) -> (
1042
Cow<'a, ChunkedArray<A>>,
1043
Cow<'a, ChunkedArray<B>>,
1044
Cow<'a, ChunkedArray<C>>,
1045
)
1046
where
1047
A: PolarsDataType,
1048
B: PolarsDataType,
1049
C: PolarsDataType,
1050
{
1051
if a.chunks.len() == 1 && b.chunks.len() == 1 && c.chunks.len() == 1 {
1052
return (Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c));
1053
}
1054
1055
assert!(
1056
a.len() == b.len() && b.len() == c.len(),
1057
"expected arrays of the same length"
1058
);
1059
1060
match (a.chunks.len(), b.chunks.len(), c.chunks.len()) {
1061
(_, 1, 1) => (
1062
Cow::Borrowed(a),
1063
Cow::Owned(b.match_chunks(a.chunk_lengths())),
1064
Cow::Owned(c.match_chunks(a.chunk_lengths())),
1065
),
1066
(1, 1, _) => (
1067
Cow::Owned(a.match_chunks(c.chunk_lengths())),
1068
Cow::Owned(b.match_chunks(c.chunk_lengths())),
1069
Cow::Borrowed(c),
1070
),
1071
(1, _, 1) => (
1072
Cow::Owned(a.match_chunks(b.chunk_lengths())),
1073
Cow::Borrowed(b),
1074
Cow::Owned(c.match_chunks(b.chunk_lengths())),
1075
),
1076
(1, _, _) => {
1077
let b = b.rechunk();
1078
(
1079
Cow::Owned(a.match_chunks(c.chunk_lengths())),
1080
Cow::Owned(b.match_chunks(c.chunk_lengths())),
1081
Cow::Borrowed(c),
1082
)
1083
},
1084
(_, 1, _) => {
1085
let a = a.rechunk();
1086
(
1087
Cow::Owned(a.match_chunks(c.chunk_lengths())),
1088
Cow::Owned(b.match_chunks(c.chunk_lengths())),
1089
Cow::Borrowed(c),
1090
)
1091
},
1092
(_, _, 1) => {
1093
let b = b.rechunk();
1094
(
1095
Cow::Borrowed(a),
1096
Cow::Owned(b.match_chunks(a.chunk_lengths())),
1097
Cow::Owned(c.match_chunks(a.chunk_lengths())),
1098
)
1099
},
1100
(len_a, len_b, len_c)
1101
if len_a == len_b
1102
&& len_b == len_c
1103
&& a.chunk_lengths()
1104
.zip(b.chunk_lengths())
1105
.zip(c.chunk_lengths())
1106
.all(|((a, b), c)| a == b && b == c) =>
1107
{
1108
(Cow::Borrowed(a), Cow::Borrowed(b), Cow::Borrowed(c))
1109
},
1110
_ => {
1111
// could optimize to choose to rechunk a primitive and not a string or list type
1112
let a = a.rechunk();
1113
let b = b.rechunk();
1114
(
1115
Cow::Owned(a.match_chunks(c.chunk_lengths())),
1116
Cow::Owned(b.match_chunks(c.chunk_lengths())),
1117
Cow::Borrowed(c),
1118
)
1119
},
1120
}
1121
}
1122
1123
pub fn binary_concatenate_validities<'a, T, B>(
1124
left: &'a ChunkedArray<T>,
1125
right: &'a ChunkedArray<B>,
1126
) -> Option<Bitmap>
1127
where
1128
B: PolarsDataType,
1129
T: PolarsDataType,
1130
{
1131
let (left, right) = align_chunks_binary(left, right);
1132
let left_validity = concatenate_validities(left.chunks());
1133
let right_validity = concatenate_validities(right.chunks());
1134
combine_validities_and(left_validity.as_ref(), right_validity.as_ref())
1135
}
1136
1137
/// Convenience for `x.into_iter().map(Into::into).collect()` using an `into_vec()` function.
1138
pub trait IntoVec<T> {
1139
fn into_vec(self) -> Vec<T>;
1140
}
1141
1142
impl<I, S> IntoVec<PlSmallStr> for I
1143
where
1144
I: IntoIterator<Item = S>,
1145
S: Into<PlSmallStr>,
1146
{
1147
fn into_vec(self) -> Vec<PlSmallStr> {
1148
self.into_iter().map(|s| s.into()).collect()
1149
}
1150
}
1151
1152
/// This logic is same as the impl on ChunkedArray
1153
/// The difference is that there is less indirection because the caller should preallocate
1154
/// `chunk_lens` once. On the `ChunkedArray` we indirect through an `ArrayRef` which is an indirection
1155
/// and a vtable.
1156
#[inline]
1157
pub(crate) fn index_to_chunked_index<
1158
I: Iterator<Item = Idx>,
1159
Idx: PartialOrd + std::ops::AddAssign + std::ops::SubAssign + Zero + One,
1160
>(
1161
chunk_lens: I,
1162
index: Idx,
1163
) -> (Idx, Idx) {
1164
let mut index_remainder = index;
1165
let mut current_chunk_idx = Zero::zero();
1166
1167
for chunk_len in chunk_lens {
1168
if chunk_len > index_remainder {
1169
break;
1170
} else {
1171
index_remainder -= chunk_len;
1172
current_chunk_idx += One::one();
1173
}
1174
}
1175
(current_chunk_idx, index_remainder)
1176
}
1177
1178
pub(crate) fn index_to_chunked_index_rev<
1179
I: Iterator<Item = Idx>,
1180
Idx: PartialOrd
1181
+ std::ops::AddAssign
1182
+ std::ops::SubAssign
1183
+ std::ops::Sub<Output = Idx>
1184
+ Zero
1185
+ One
1186
+ Copy
1187
+ std::fmt::Debug,
1188
>(
1189
chunk_lens_rev: I,
1190
index_from_back: Idx,
1191
total_chunks: Idx,
1192
) -> (Idx, Idx) {
1193
debug_assert!(index_from_back > Zero::zero(), "at least -1");
1194
let mut index_remainder = index_from_back;
1195
let mut current_chunk_idx = One::one();
1196
let mut current_chunk_len = Zero::zero();
1197
1198
for chunk_len in chunk_lens_rev {
1199
current_chunk_len = chunk_len;
1200
if chunk_len >= index_remainder {
1201
break;
1202
} else {
1203
index_remainder -= chunk_len;
1204
current_chunk_idx += One::one();
1205
}
1206
}
1207
(
1208
total_chunks - current_chunk_idx,
1209
current_chunk_len - index_remainder,
1210
)
1211
}
1212
1213
pub fn first_non_null<'a, I>(iter: I) -> Option<usize>
1214
where
1215
I: Iterator<Item = Option<&'a Bitmap>>,
1216
{
1217
let mut offset = 0;
1218
for validity in iter {
1219
if let Some(mask) = validity {
1220
let len_mask = mask.len();
1221
let n = mask.leading_zeros();
1222
if n < len_mask {
1223
return Some(offset + n);
1224
}
1225
offset += len_mask
1226
} else {
1227
return Some(offset);
1228
}
1229
}
1230
None
1231
}
1232
1233
pub fn last_non_null<'a, I>(iter: I, len: usize) -> Option<usize>
1234
where
1235
I: DoubleEndedIterator<Item = Option<&'a Bitmap>>,
1236
{
1237
if len == 0 {
1238
return None;
1239
}
1240
let mut offset = 0;
1241
for validity in iter.rev() {
1242
if let Some(mask) = validity {
1243
let len_mask = mask.len();
1244
let n = mask.trailing_zeros();
1245
if n < len_mask {
1246
return Some(len - offset - n - 1);
1247
}
1248
offset += len_mask;
1249
} else {
1250
return Some(len - offset - 1);
1251
}
1252
}
1253
None
1254
}
1255
1256
/// ensure that nulls are propagated to both arrays
1257
pub fn coalesce_nulls<'a, T: PolarsDataType>(
1258
a: &'a ChunkedArray<T>,
1259
b: &'a ChunkedArray<T>,
1260
) -> (Cow<'a, ChunkedArray<T>>, Cow<'a, ChunkedArray<T>>) {
1261
if a.null_count() > 0 || b.null_count() > 0 {
1262
let (a, b) = align_chunks_binary(a, b);
1263
let mut b = b.into_owned();
1264
let a = a.coalesce_nulls(b.chunks());
1265
1266
for arr in a.chunks().iter() {
1267
for arr_b in unsafe { b.chunks_mut() } {
1268
*arr_b = arr_b.with_validity(arr.validity().cloned())
1269
}
1270
}
1271
b.compute_len();
1272
(Cow::Owned(a), Cow::Owned(b))
1273
} else {
1274
(Cow::Borrowed(a), Cow::Borrowed(b))
1275
}
1276
}
1277
1278
pub fn coalesce_nulls_columns(a: &Column, b: &Column) -> (Column, Column) {
1279
if a.null_count() > 0 || b.null_count() > 0 {
1280
let mut a = a.as_materialized_series().rechunk();
1281
let mut b = b.as_materialized_series().rechunk();
1282
for (arr_a, arr_b) in unsafe { a.chunks_mut().iter_mut().zip(b.chunks_mut()) } {
1283
let validity = match (arr_a.validity(), arr_b.validity()) {
1284
(None, Some(b)) => Some(b.clone()),
1285
(Some(a), Some(b)) => Some(a & b),
1286
(Some(a), None) => Some(a.clone()),
1287
(None, None) => None,
1288
};
1289
*arr_a = arr_a.with_validity(validity.clone());
1290
*arr_b = arr_b.with_validity(validity);
1291
}
1292
a.compute_len();
1293
b.compute_len();
1294
(a.into(), b.into())
1295
} else {
1296
(a.clone(), b.clone())
1297
}
1298
}
1299
1300
pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
1301
if size_of::<IdxSize>() == size_of::<u32>() {
1302
format!(
1303
"{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
1304
operation,
1305
IdxSize::MAX,
1306
)
1307
} else {
1308
format!(
1309
"{} exceeded the maximum supported limit of {} rows.",
1310
operation,
1311
IdxSize::MAX,
1312
)
1313
}
1314
}
1315
1316
#[cfg(test)]
1317
mod test {
1318
use super::*;
1319
1320
#[test]
1321
fn test_split() {
1322
let ca: Int32Chunked = (0..10).collect_ca("a".into());
1323
1324
let out = split(&ca, 3);
1325
assert_eq!(out[0].len(), 3);
1326
assert_eq!(out[1].len(), 3);
1327
assert_eq!(out[2].len(), 4);
1328
}
1329
1330
#[test]
1331
fn test_align_chunks() -> PolarsResult<()> {
1332
let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1333
let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1334
let b2 = Int32Chunked::new(PlSmallStr::EMPTY, &[2, 3, 4]);
1335
1336
b.append(&b2)?;
1337
let (a, b) = align_chunks_binary(&a, &b);
1338
assert_eq!(
1339
a.chunk_lengths().collect::<Vec<_>>(),
1340
b.chunk_lengths().collect::<Vec<_>>()
1341
);
1342
1343
let a = Int32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3, 4]);
1344
let mut b = Int32Chunked::new(PlSmallStr::EMPTY, &[1]);
1345
let b1 = b.clone();
1346
b.append(&b1)?;
1347
b.append(&b1)?;
1348
b.append(&b1)?;
1349
let (a, b) = align_chunks_binary(&a, &b);
1350
assert_eq!(
1351
a.chunk_lengths().collect::<Vec<_>>(),
1352
b.chunk_lengths().collect::<Vec<_>>()
1353
);
1354
1355
Ok(())
1356
}
1357
}
1358
1359