Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/reduce/any_all.rs
6940 views
1
use arrow::array::BooleanArray;
2
use arrow::bitmap::binary_assign_mut;
3
4
use super::*;
5
6
pub fn new_any_reduction(ignore_nulls: bool) -> Box<dyn GroupedReduction> {
7
if ignore_nulls {
8
Box::new(AnyIgnoreNullGroupedReduction::default())
9
} else {
10
Box::new(AnyKleeneNullGroupedReduction::default())
11
}
12
}
13
14
pub fn new_all_reduction(ignore_nulls: bool) -> Box<dyn GroupedReduction> {
15
if ignore_nulls {
16
Box::new(AllIgnoreNullGroupedReduction::default())
17
} else {
18
Box::new(AllKleeneNullGroupedReduction::default())
19
}
20
}
21
22
#[derive(Default)]
23
struct AnyIgnoreNullGroupedReduction {
24
values: MutableBitmap,
25
evicted_values: BitmapBuilder,
26
}
27
28
impl GroupedReduction for AnyIgnoreNullGroupedReduction {
29
fn new_empty(&self) -> Box<dyn GroupedReduction> {
30
Box::new(Self::default())
31
}
32
33
fn reserve(&mut self, additional: usize) {
34
self.values.reserve(additional);
35
}
36
37
fn resize(&mut self, num_groups: IdxSize) {
38
self.values.resize(num_groups as usize, false);
39
}
40
41
fn update_group(
42
&mut self,
43
values: &Column,
44
group_idx: IdxSize,
45
_seq_id: u64,
46
) -> PolarsResult<()> {
47
assert!(values.dtype() == &DataType::Boolean);
48
let values = values.as_materialized_series_maintain_scalar();
49
let ca: &BooleanChunked = values.as_ref().as_ref();
50
if ca.any() {
51
self.values.set(group_idx as usize, true);
52
}
53
Ok(())
54
}
55
56
unsafe fn update_groups_while_evicting(
57
&mut self,
58
values: &Column,
59
subset: &[IdxSize],
60
group_idxs: &[EvictIdx],
61
_seq_id: u64,
62
) -> PolarsResult<()> {
63
assert!(values.dtype() == &DataType::Boolean);
64
assert!(subset.len() == group_idxs.len());
65
let values = values.as_materialized_series(); // @scalar-opt
66
let ca: &BooleanChunked = values.as_ref().as_ref();
67
let arr = ca.downcast_as_array();
68
unsafe {
69
// SAFETY: indices are in-bounds guaranteed by trait.
70
for (i, g) in subset.iter().zip(group_idxs) {
71
let ov = arr.get_unchecked(*i as usize);
72
if g.should_evict() {
73
self.evicted_values.push(self.values.get_unchecked(g.idx()));
74
self.values.set_unchecked(g.idx(), ov.unwrap_or(false));
75
} else {
76
self.values.or_pos_unchecked(g.idx(), ov.unwrap_or(false));
77
}
78
}
79
}
80
Ok(())
81
}
82
83
unsafe fn combine_subset(
84
&mut self,
85
other: &dyn GroupedReduction,
86
subset: &[IdxSize],
87
group_idxs: &[IdxSize],
88
) -> PolarsResult<()> {
89
let other = other.as_any().downcast_ref::<Self>().unwrap();
90
assert!(subset.len() == group_idxs.len());
91
unsafe {
92
// SAFETY: indices are in-bounds guaranteed by trait.
93
for (i, g) in subset.iter().zip(group_idxs) {
94
self.values
95
.or_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));
96
}
97
}
98
Ok(())
99
}
100
101
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
102
Box::new(Self {
103
values: core::mem::take(&mut self.evicted_values).into_mut(),
104
evicted_values: BitmapBuilder::new(),
105
})
106
}
107
108
fn finalize(&mut self) -> PolarsResult<Series> {
109
let v = core::mem::take(&mut self.values);
110
let arr = BooleanArray::from(v.freeze());
111
Ok(Series::from_array(PlSmallStr::EMPTY, arr))
112
}
113
114
fn as_any(&self) -> &dyn Any {
115
self
116
}
117
}
118
119
#[derive(Default)]
120
struct AllIgnoreNullGroupedReduction {
121
values: MutableBitmap,
122
evicted_values: BitmapBuilder,
123
}
124
125
impl GroupedReduction for AllIgnoreNullGroupedReduction {
126
fn new_empty(&self) -> Box<dyn GroupedReduction> {
127
Box::new(Self::default())
128
}
129
130
fn reserve(&mut self, additional: usize) {
131
self.values.reserve(additional);
132
}
133
134
fn resize(&mut self, num_groups: IdxSize) {
135
self.values.resize(num_groups as usize, true);
136
}
137
138
fn update_group(
139
&mut self,
140
values: &Column,
141
group_idx: IdxSize,
142
_seq_id: u64,
143
) -> PolarsResult<()> {
144
assert!(values.dtype() == &DataType::Boolean);
145
let values = values.as_materialized_series_maintain_scalar();
146
let ca: &BooleanChunked = values.as_ref().as_ref();
147
if !ca.all() {
148
self.values.set(group_idx as usize, false);
149
}
150
Ok(())
151
}
152
153
unsafe fn update_groups_while_evicting(
154
&mut self,
155
values: &Column,
156
subset: &[IdxSize],
157
group_idxs: &[EvictIdx],
158
_seq_id: u64,
159
) -> PolarsResult<()> {
160
assert!(values.dtype() == &DataType::Boolean);
161
assert!(subset.len() == group_idxs.len());
162
let values = values.as_materialized_series(); // @scalar-opt
163
let ca: &BooleanChunked = values.as_ref().as_ref();
164
let arr = ca.downcast_as_array();
165
unsafe {
166
// SAFETY: indices are in-bounds guaranteed by trait.
167
for (i, g) in subset.iter().zip(group_idxs) {
168
let ov = arr.get_unchecked(*i as usize);
169
if g.should_evict() {
170
self.evicted_values.push(self.values.get_unchecked(g.idx()));
171
self.values.set_unchecked(g.idx(), ov.unwrap_or(true));
172
} else {
173
self.values.and_pos_unchecked(g.idx(), ov.unwrap_or(true));
174
}
175
}
176
}
177
Ok(())
178
}
179
180
unsafe fn combine_subset(
181
&mut self,
182
other: &dyn GroupedReduction,
183
subset: &[IdxSize],
184
group_idxs: &[IdxSize],
185
) -> PolarsResult<()> {
186
let other = other.as_any().downcast_ref::<Self>().unwrap();
187
assert!(subset.len() == group_idxs.len());
188
unsafe {
189
// SAFETY: indices are in-bounds guaranteed by trait.
190
for (i, g) in subset.iter().zip(group_idxs) {
191
self.values
192
.and_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));
193
}
194
}
195
Ok(())
196
}
197
198
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
199
Box::new(Self {
200
values: core::mem::take(&mut self.evicted_values).into_mut(),
201
evicted_values: BitmapBuilder::new(),
202
})
203
}
204
205
fn finalize(&mut self) -> PolarsResult<Series> {
206
let v = core::mem::take(&mut self.values);
207
let arr = BooleanArray::from(v.freeze());
208
Ok(Series::from_array(PlSmallStr::EMPTY, arr))
209
}
210
211
fn as_any(&self) -> &dyn Any {
212
self
213
}
214
}
215
216
#[derive(Default)]
217
struct AnyKleeneNullGroupedReduction {
218
seen_true: MutableBitmap,
219
seen_null: MutableBitmap,
220
evicted_values: BitmapBuilder,
221
evicted_mask: BitmapBuilder,
222
}
223
224
impl GroupedReduction for AnyKleeneNullGroupedReduction {
225
fn new_empty(&self) -> Box<dyn GroupedReduction> {
226
Box::new(Self::default())
227
}
228
229
fn reserve(&mut self, additional: usize) {
230
self.seen_true.reserve(additional);
231
self.seen_null.reserve(additional)
232
}
233
234
fn resize(&mut self, num_groups: IdxSize) {
235
self.seen_true.resize(num_groups as usize, false);
236
self.seen_null.resize(num_groups as usize, false);
237
}
238
239
fn update_group(
240
&mut self,
241
values: &Column,
242
group_idx: IdxSize,
243
_seq_id: u64,
244
) -> PolarsResult<()> {
245
assert!(values.dtype() == &DataType::Boolean);
246
let values = values.as_materialized_series_maintain_scalar();
247
let ca: &BooleanChunked = values.as_ref().as_ref();
248
if ca.any() {
249
self.seen_true.set(group_idx as usize, true);
250
}
251
if ca.len() != ca.null_count() {
252
self.seen_null.set(group_idx as usize, true);
253
}
254
Ok(())
255
}
256
257
unsafe fn update_groups_while_evicting(
258
&mut self,
259
values: &Column,
260
subset: &[IdxSize],
261
group_idxs: &[EvictIdx],
262
_seq_id: u64,
263
) -> PolarsResult<()> {
264
assert!(values.dtype() == &DataType::Boolean);
265
assert!(subset.len() == group_idxs.len());
266
let values = values.as_materialized_series(); // @scalar-opt
267
let ca: &BooleanChunked = values.as_ref().as_ref();
268
let arr = ca.downcast_as_array();
269
unsafe {
270
// SAFETY: indices are in-bounds guaranteed by trait.
271
for (i, g) in subset.iter().zip(group_idxs) {
272
let ov = arr.get_unchecked(*i as usize);
273
if g.should_evict() {
274
self.evicted_values
275
.push(self.seen_true.get_unchecked(g.idx()));
276
self.evicted_mask
277
.push(self.seen_null.get_unchecked(g.idx()));
278
self.seen_true.set_unchecked(g.idx(), ov.unwrap_or(false));
279
self.seen_null.set_unchecked(g.idx(), ov.is_none());
280
} else {
281
self.seen_true
282
.or_pos_unchecked(g.idx(), ov.unwrap_or(false));
283
self.seen_null.or_pos_unchecked(g.idx(), ov.is_none());
284
}
285
}
286
}
287
Ok(())
288
}
289
290
unsafe fn combine_subset(
291
&mut self,
292
other: &dyn GroupedReduction,
293
subset: &[IdxSize],
294
group_idxs: &[IdxSize],
295
) -> PolarsResult<()> {
296
let other = other.as_any().downcast_ref::<Self>().unwrap();
297
assert!(subset.len() == group_idxs.len());
298
unsafe {
299
// SAFETY: indices are in-bounds guaranteed by trait.
300
for (i, g) in subset.iter().zip(group_idxs) {
301
self.seen_true
302
.or_pos_unchecked(*g as usize, other.seen_true.get_unchecked(*i as usize));
303
self.seen_null
304
.or_pos_unchecked(*g as usize, other.seen_null.get_unchecked(*i as usize));
305
}
306
}
307
Ok(())
308
}
309
310
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
311
Box::new(Self {
312
seen_true: core::mem::take(&mut self.evicted_values).into_mut(),
313
seen_null: core::mem::take(&mut self.evicted_mask).into_mut(),
314
evicted_values: BitmapBuilder::new(),
315
evicted_mask: BitmapBuilder::new(),
316
})
317
}
318
319
fn finalize(&mut self) -> PolarsResult<Series> {
320
let seen_true = core::mem::take(&mut self.seen_true);
321
let mut mask = core::mem::take(&mut self.seen_null);
322
binary_assign_mut(&mut mask, &seen_true, |mi: u64, ti: u64| mi & !ti);
323
let arr = BooleanArray::from(seen_true.freeze())
324
.with_validity(Some(mask.freeze()))
325
.boxed();
326
Ok(unsafe {
327
Series::from_chunks_and_dtype_unchecked(
328
PlSmallStr::EMPTY,
329
vec![arr],
330
&DataType::Boolean,
331
)
332
})
333
}
334
335
fn as_any(&self) -> &dyn Any {
336
self
337
}
338
}
339
340
#[derive(Default)]
341
struct AllKleeneNullGroupedReduction {
342
seen_false: MutableBitmap,
343
seen_null: MutableBitmap,
344
evicted_values: BitmapBuilder,
345
evicted_mask: BitmapBuilder,
346
}
347
348
impl GroupedReduction for AllKleeneNullGroupedReduction {
349
fn new_empty(&self) -> Box<dyn GroupedReduction> {
350
Box::new(Self::default())
351
}
352
353
fn reserve(&mut self, additional: usize) {
354
self.seen_false.reserve(additional);
355
self.seen_null.reserve(additional)
356
}
357
358
fn resize(&mut self, num_groups: IdxSize) {
359
self.seen_false.resize(num_groups as usize, false);
360
self.seen_null.resize(num_groups as usize, false);
361
}
362
363
fn update_group(
364
&mut self,
365
values: &Column,
366
group_idx: IdxSize,
367
_seq_id: u64,
368
) -> PolarsResult<()> {
369
assert!(values.dtype() == &DataType::Boolean);
370
let values = values.as_materialized_series_maintain_scalar();
371
let ca: &BooleanChunked = values.as_ref().as_ref();
372
if !ca.all() {
373
self.seen_false.set(group_idx as usize, true);
374
}
375
if ca.len() != ca.null_count() {
376
self.seen_null.set(group_idx as usize, true);
377
}
378
Ok(())
379
}
380
381
unsafe fn update_groups_while_evicting(
382
&mut self,
383
values: &Column,
384
subset: &[IdxSize],
385
group_idxs: &[EvictIdx],
386
_seq_id: u64,
387
) -> PolarsResult<()> {
388
assert!(values.dtype() == &DataType::Boolean);
389
assert!(subset.len() == group_idxs.len());
390
let values = values.as_materialized_series(); // @scalar-opt
391
let ca: &BooleanChunked = values.as_ref().as_ref();
392
let arr = ca.downcast_as_array();
393
unsafe {
394
// SAFETY: indices are in-bounds guaranteed by trait.
395
for (i, g) in subset.iter().zip(group_idxs) {
396
let ov = arr.get_unchecked(*i as usize);
397
if g.should_evict() {
398
self.evicted_values
399
.push(self.seen_false.get_unchecked(g.idx()));
400
self.evicted_mask
401
.push(self.seen_null.get_unchecked(g.idx()));
402
self.seen_false.set_unchecked(g.idx(), !ov.unwrap_or(true));
403
self.seen_null.set_unchecked(g.idx(), ov.is_none());
404
} else {
405
self.seen_false
406
.or_pos_unchecked(g.idx(), !ov.unwrap_or(true));
407
self.seen_null.or_pos_unchecked(g.idx(), ov.is_none());
408
}
409
}
410
}
411
Ok(())
412
}
413
414
unsafe fn combine_subset(
415
&mut self,
416
other: &dyn GroupedReduction,
417
subset: &[IdxSize],
418
group_idxs: &[IdxSize],
419
) -> PolarsResult<()> {
420
let other = other.as_any().downcast_ref::<Self>().unwrap();
421
assert!(subset.len() == group_idxs.len());
422
unsafe {
423
// SAFETY: indices are in-bounds guaranteed by trait.
424
for (i, g) in subset.iter().zip(group_idxs) {
425
self.seen_false
426
.or_pos_unchecked(*g as usize, other.seen_false.get_unchecked(*i as usize));
427
self.seen_null
428
.or_pos_unchecked(*g as usize, other.seen_null.get_unchecked(*i as usize));
429
}
430
}
431
Ok(())
432
}
433
434
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
435
Box::new(Self {
436
seen_false: core::mem::take(&mut self.evicted_values).into_mut(),
437
seen_null: core::mem::take(&mut self.evicted_mask).into_mut(),
438
evicted_values: BitmapBuilder::new(),
439
evicted_mask: BitmapBuilder::new(),
440
})
441
}
442
443
fn finalize(&mut self) -> PolarsResult<Series> {
444
let seen_false = core::mem::take(&mut self.seen_false);
445
let mut mask = core::mem::take(&mut self.seen_null);
446
binary_assign_mut(&mut mask, &seen_false, |mi: u64, fi: u64| mi & !fi);
447
let arr = BooleanArray::from((!seen_false).freeze())
448
.with_validity(Some(mask.freeze()))
449
.boxed();
450
Ok(unsafe {
451
Series::from_chunks_and_dtype_unchecked(
452
PlSmallStr::EMPTY,
453
vec![arr],
454
&DataType::Boolean,
455
)
456
})
457
}
458
459
fn as_any(&self) -> &dyn Any {
460
self
461
}
462
}
463
464