Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/reduce/any_all.rs
8424 views
1
use arrow::array::BooleanArray;
2
use arrow::bitmap::binary_assign_mut;
3
4
use super::*;
5
6
pub fn new_any_reduction(ignore_nulls: bool) -> Box<dyn GroupedReduction> {
7
if ignore_nulls {
8
Box::new(AnyIgnoreNullGroupedReduction::default())
9
} else {
10
Box::new(AnyKleeneNullGroupedReduction::default())
11
}
12
}
13
14
pub fn new_all_reduction(ignore_nulls: bool) -> Box<dyn GroupedReduction> {
15
if ignore_nulls {
16
Box::new(AllIgnoreNullGroupedReduction::default())
17
} else {
18
Box::new(AllKleeneNullGroupedReduction::default())
19
}
20
}
21
22
#[derive(Default)]
23
struct AnyIgnoreNullGroupedReduction {
24
values: MutableBitmap,
25
evicted_values: BitmapBuilder,
26
}
27
28
impl GroupedReduction for AnyIgnoreNullGroupedReduction {
29
fn new_empty(&self) -> Box<dyn GroupedReduction> {
30
Box::new(Self::default())
31
}
32
33
fn reserve(&mut self, additional: usize) {
34
self.values.reserve(additional);
35
}
36
37
fn resize(&mut self, num_groups: IdxSize) {
38
self.values.resize(num_groups as usize, false);
39
}
40
41
fn update_group(
42
&mut self,
43
values: &[&Column],
44
group_idx: IdxSize,
45
_seq_id: u64,
46
) -> PolarsResult<()> {
47
let &[values] = values else { unreachable!() };
48
assert!(values.dtype() == &DataType::Boolean);
49
let values = values.as_materialized_series_maintain_scalar();
50
let ca: &BooleanChunked = values.as_ref().as_ref();
51
if ca.any() {
52
self.values.set(group_idx as usize, true);
53
}
54
Ok(())
55
}
56
57
unsafe fn update_groups_while_evicting(
58
&mut self,
59
values: &[&Column],
60
subset: &[IdxSize],
61
group_idxs: &[EvictIdx],
62
_seq_id: u64,
63
) -> PolarsResult<()> {
64
let &[values] = values else { unreachable!() };
65
assert!(values.dtype() == &DataType::Boolean);
66
assert!(subset.len() == group_idxs.len());
67
let values = values.as_materialized_series(); // @scalar-opt
68
let ca: &BooleanChunked = values.as_ref().as_ref();
69
let arr = ca.downcast_as_array();
70
unsafe {
71
// SAFETY: indices are in-bounds guaranteed by trait.
72
for (i, g) in subset.iter().zip(group_idxs) {
73
let ov = arr.get_unchecked(*i as usize);
74
if g.should_evict() {
75
self.evicted_values.push(self.values.get_unchecked(g.idx()));
76
self.values.set_unchecked(g.idx(), ov.unwrap_or(false));
77
} else {
78
self.values.or_pos_unchecked(g.idx(), ov.unwrap_or(false));
79
}
80
}
81
}
82
Ok(())
83
}
84
85
unsafe fn combine_subset(
86
&mut self,
87
other: &dyn GroupedReduction,
88
subset: &[IdxSize],
89
group_idxs: &[IdxSize],
90
) -> PolarsResult<()> {
91
let other = other.as_any().downcast_ref::<Self>().unwrap();
92
assert!(subset.len() == group_idxs.len());
93
unsafe {
94
// SAFETY: indices are in-bounds guaranteed by trait.
95
for (i, g) in subset.iter().zip(group_idxs) {
96
self.values
97
.or_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));
98
}
99
}
100
Ok(())
101
}
102
103
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
104
Box::new(Self {
105
values: core::mem::take(&mut self.evicted_values).into_mut(),
106
evicted_values: BitmapBuilder::new(),
107
})
108
}
109
110
fn finalize(&mut self) -> PolarsResult<Series> {
111
let v = core::mem::take(&mut self.values);
112
let arr = BooleanArray::from(v.freeze());
113
Ok(Series::from_array(PlSmallStr::EMPTY, arr))
114
}
115
116
fn as_any(&self) -> &dyn Any {
117
self
118
}
119
}
120
121
#[derive(Default)]
122
struct AllIgnoreNullGroupedReduction {
123
values: MutableBitmap,
124
evicted_values: BitmapBuilder,
125
}
126
127
impl GroupedReduction for AllIgnoreNullGroupedReduction {
128
fn new_empty(&self) -> Box<dyn GroupedReduction> {
129
Box::new(Self::default())
130
}
131
132
fn reserve(&mut self, additional: usize) {
133
self.values.reserve(additional);
134
}
135
136
fn resize(&mut self, num_groups: IdxSize) {
137
self.values.resize(num_groups as usize, true);
138
}
139
140
fn update_group(
141
&mut self,
142
values: &[&Column],
143
group_idx: IdxSize,
144
_seq_id: u64,
145
) -> PolarsResult<()> {
146
let &[values] = values else { unreachable!() };
147
assert!(values.dtype() == &DataType::Boolean);
148
let values = values.as_materialized_series_maintain_scalar();
149
let ca: &BooleanChunked = values.as_ref().as_ref();
150
if !ca.all() {
151
self.values.set(group_idx as usize, false);
152
}
153
Ok(())
154
}
155
156
unsafe fn update_groups_while_evicting(
157
&mut self,
158
values: &[&Column],
159
subset: &[IdxSize],
160
group_idxs: &[EvictIdx],
161
_seq_id: u64,
162
) -> PolarsResult<()> {
163
let &[values] = values else { unreachable!() };
164
assert!(values.dtype() == &DataType::Boolean);
165
assert!(subset.len() == group_idxs.len());
166
let values = values.as_materialized_series(); // @scalar-opt
167
let ca: &BooleanChunked = values.as_ref().as_ref();
168
let arr = ca.downcast_as_array();
169
unsafe {
170
// SAFETY: indices are in-bounds guaranteed by trait.
171
for (i, g) in subset.iter().zip(group_idxs) {
172
let ov = arr.get_unchecked(*i as usize);
173
if g.should_evict() {
174
self.evicted_values.push(self.values.get_unchecked(g.idx()));
175
self.values.set_unchecked(g.idx(), ov.unwrap_or(true));
176
} else {
177
self.values.and_pos_unchecked(g.idx(), ov.unwrap_or(true));
178
}
179
}
180
}
181
Ok(())
182
}
183
184
unsafe fn combine_subset(
185
&mut self,
186
other: &dyn GroupedReduction,
187
subset: &[IdxSize],
188
group_idxs: &[IdxSize],
189
) -> PolarsResult<()> {
190
let other = other.as_any().downcast_ref::<Self>().unwrap();
191
assert!(subset.len() == group_idxs.len());
192
unsafe {
193
// SAFETY: indices are in-bounds guaranteed by trait.
194
for (i, g) in subset.iter().zip(group_idxs) {
195
self.values
196
.and_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));
197
}
198
}
199
Ok(())
200
}
201
202
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
203
Box::new(Self {
204
values: core::mem::take(&mut self.evicted_values).into_mut(),
205
evicted_values: BitmapBuilder::new(),
206
})
207
}
208
209
fn finalize(&mut self) -> PolarsResult<Series> {
210
let v = core::mem::take(&mut self.values);
211
let arr = BooleanArray::from(v.freeze());
212
Ok(Series::from_array(PlSmallStr::EMPTY, arr))
213
}
214
215
fn as_any(&self) -> &dyn Any {
216
self
217
}
218
}
219
220
#[derive(Default)]
221
struct AnyKleeneNullGroupedReduction {
222
seen_true: MutableBitmap,
223
seen_null: MutableBitmap,
224
evicted_values: BitmapBuilder,
225
evicted_mask: BitmapBuilder,
226
}
227
228
impl GroupedReduction for AnyKleeneNullGroupedReduction {
229
fn new_empty(&self) -> Box<dyn GroupedReduction> {
230
Box::new(Self::default())
231
}
232
233
fn reserve(&mut self, additional: usize) {
234
self.seen_true.reserve(additional);
235
self.seen_null.reserve(additional)
236
}
237
238
fn resize(&mut self, num_groups: IdxSize) {
239
self.seen_true.resize(num_groups as usize, false);
240
self.seen_null.resize(num_groups as usize, false);
241
}
242
243
fn update_group(
244
&mut self,
245
values: &[&Column],
246
group_idx: IdxSize,
247
_seq_id: u64,
248
) -> PolarsResult<()> {
249
let &[values] = values else { unreachable!() };
250
assert!(values.dtype() == &DataType::Boolean);
251
let values = values.as_materialized_series_maintain_scalar();
252
let ca: &BooleanChunked = values.as_ref().as_ref();
253
if ca.any() {
254
self.seen_true.set(group_idx as usize, true);
255
}
256
if ca.has_nulls() {
257
self.seen_null.set(group_idx as usize, true);
258
}
259
Ok(())
260
}
261
262
unsafe fn update_groups_while_evicting(
263
&mut self,
264
values: &[&Column],
265
subset: &[IdxSize],
266
group_idxs: &[EvictIdx],
267
_seq_id: u64,
268
) -> PolarsResult<()> {
269
let &[values] = values else { unreachable!() };
270
assert!(values.dtype() == &DataType::Boolean);
271
assert!(subset.len() == group_idxs.len());
272
let values = values.as_materialized_series(); // @scalar-opt
273
let ca: &BooleanChunked = values.as_ref().as_ref();
274
let arr = ca.downcast_as_array();
275
unsafe {
276
// SAFETY: indices are in-bounds guaranteed by trait.
277
for (i, g) in subset.iter().zip(group_idxs) {
278
let ov = arr.get_unchecked(*i as usize);
279
if g.should_evict() {
280
self.evicted_values
281
.push(self.seen_true.get_unchecked(g.idx()));
282
self.evicted_mask
283
.push(self.seen_null.get_unchecked(g.idx()));
284
self.seen_true.set_unchecked(g.idx(), ov.unwrap_or(false));
285
self.seen_null.set_unchecked(g.idx(), ov.is_none());
286
} else {
287
self.seen_true
288
.or_pos_unchecked(g.idx(), ov.unwrap_or(false));
289
self.seen_null.or_pos_unchecked(g.idx(), ov.is_none());
290
}
291
}
292
}
293
Ok(())
294
}
295
296
unsafe fn combine_subset(
297
&mut self,
298
other: &dyn GroupedReduction,
299
subset: &[IdxSize],
300
group_idxs: &[IdxSize],
301
) -> PolarsResult<()> {
302
let other = other.as_any().downcast_ref::<Self>().unwrap();
303
assert!(subset.len() == group_idxs.len());
304
unsafe {
305
// SAFETY: indices are in-bounds guaranteed by trait.
306
for (i, g) in subset.iter().zip(group_idxs) {
307
self.seen_true
308
.or_pos_unchecked(*g as usize, other.seen_true.get_unchecked(*i as usize));
309
self.seen_null
310
.or_pos_unchecked(*g as usize, other.seen_null.get_unchecked(*i as usize));
311
}
312
}
313
Ok(())
314
}
315
316
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
317
Box::new(Self {
318
seen_true: core::mem::take(&mut self.evicted_values).into_mut(),
319
seen_null: core::mem::take(&mut self.evicted_mask).into_mut(),
320
evicted_values: BitmapBuilder::new(),
321
evicted_mask: BitmapBuilder::new(),
322
})
323
}
324
325
fn finalize(&mut self) -> PolarsResult<Series> {
326
let seen_true = core::mem::take(&mut self.seen_true);
327
let mut mask = core::mem::take(&mut self.seen_null);
328
binary_assign_mut(&mut mask, &seen_true, |mi: u64, ti: u64| ti | !mi);
329
let arr = BooleanArray::from(seen_true.freeze())
330
.with_validity(Some(mask.freeze()))
331
.boxed();
332
Ok(unsafe {
333
Series::from_chunks_and_dtype_unchecked(
334
PlSmallStr::EMPTY,
335
vec![arr],
336
&DataType::Boolean,
337
)
338
})
339
}
340
341
fn as_any(&self) -> &dyn Any {
342
self
343
}
344
}
345
346
#[derive(Default)]
347
struct AllKleeneNullGroupedReduction {
348
seen_false: MutableBitmap,
349
seen_null: MutableBitmap,
350
evicted_values: BitmapBuilder,
351
evicted_mask: BitmapBuilder,
352
}
353
354
impl GroupedReduction for AllKleeneNullGroupedReduction {
355
fn new_empty(&self) -> Box<dyn GroupedReduction> {
356
Box::new(Self::default())
357
}
358
359
fn reserve(&mut self, additional: usize) {
360
self.seen_false.reserve(additional);
361
self.seen_null.reserve(additional)
362
}
363
364
fn resize(&mut self, num_groups: IdxSize) {
365
self.seen_false.resize(num_groups as usize, false);
366
self.seen_null.resize(num_groups as usize, false);
367
}
368
369
fn update_group(
370
&mut self,
371
values: &[&Column],
372
group_idx: IdxSize,
373
_seq_id: u64,
374
) -> PolarsResult<()> {
375
let &[values] = values else { unreachable!() };
376
assert!(values.dtype() == &DataType::Boolean);
377
let values = values.as_materialized_series_maintain_scalar();
378
let ca: &BooleanChunked = values.as_ref().as_ref();
379
if !ca.all() {
380
self.seen_false.set(group_idx as usize, true);
381
}
382
if ca.has_nulls() {
383
self.seen_null.set(group_idx as usize, true);
384
}
385
Ok(())
386
}
387
388
unsafe fn update_groups_while_evicting(
389
&mut self,
390
values: &[&Column],
391
subset: &[IdxSize],
392
group_idxs: &[EvictIdx],
393
_seq_id: u64,
394
) -> PolarsResult<()> {
395
let &[values] = values else { unreachable!() };
396
assert!(values.dtype() == &DataType::Boolean);
397
assert!(subset.len() == group_idxs.len());
398
let values = values.as_materialized_series(); // @scalar-opt
399
let ca: &BooleanChunked = values.as_ref().as_ref();
400
let arr = ca.downcast_as_array();
401
unsafe {
402
// SAFETY: indices are in-bounds guaranteed by trait.
403
for (i, g) in subset.iter().zip(group_idxs) {
404
let ov = arr.get_unchecked(*i as usize);
405
if g.should_evict() {
406
self.evicted_values
407
.push(self.seen_false.get_unchecked(g.idx()));
408
self.evicted_mask
409
.push(self.seen_null.get_unchecked(g.idx()));
410
self.seen_false.set_unchecked(g.idx(), !ov.unwrap_or(true));
411
self.seen_null.set_unchecked(g.idx(), ov.is_none());
412
} else {
413
self.seen_false
414
.or_pos_unchecked(g.idx(), !ov.unwrap_or(true));
415
self.seen_null.or_pos_unchecked(g.idx(), ov.is_none());
416
}
417
}
418
}
419
Ok(())
420
}
421
422
unsafe fn combine_subset(
423
&mut self,
424
other: &dyn GroupedReduction,
425
subset: &[IdxSize],
426
group_idxs: &[IdxSize],
427
) -> PolarsResult<()> {
428
let other = other.as_any().downcast_ref::<Self>().unwrap();
429
assert!(subset.len() == group_idxs.len());
430
unsafe {
431
// SAFETY: indices are in-bounds guaranteed by trait.
432
for (i, g) in subset.iter().zip(group_idxs) {
433
self.seen_false
434
.or_pos_unchecked(*g as usize, other.seen_false.get_unchecked(*i as usize));
435
self.seen_null
436
.or_pos_unchecked(*g as usize, other.seen_null.get_unchecked(*i as usize));
437
}
438
}
439
Ok(())
440
}
441
442
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
443
Box::new(Self {
444
seen_false: core::mem::take(&mut self.evicted_values).into_mut(),
445
seen_null: core::mem::take(&mut self.evicted_mask).into_mut(),
446
evicted_values: BitmapBuilder::new(),
447
evicted_mask: BitmapBuilder::new(),
448
})
449
}
450
451
fn finalize(&mut self) -> PolarsResult<Series> {
452
let seen_false = core::mem::take(&mut self.seen_false);
453
let mut mask = core::mem::take(&mut self.seen_null);
454
binary_assign_mut(&mut mask, &seen_false, |mi: u64, fi: u64| fi | !mi);
455
let arr = BooleanArray::from((!seen_false).freeze())
456
.with_validity(Some(mask.freeze()))
457
.boxed();
458
Ok(unsafe {
459
Series::from_chunks_and_dtype_unchecked(
460
PlSmallStr::EMPTY,
461
vec![arr],
462
&DataType::Boolean,
463
)
464
})
465
}
466
467
fn as_any(&self) -> &dyn Any {
468
self
469
}
470
}
471
472