Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/comparisons/array.rs
6939 views
1
use arrow::array::{
2
Array, BinaryArray, BinaryViewArray, BooleanArray, DictionaryArray, FixedSizeBinaryArray,
3
FixedSizeListArray, ListArray, NullArray, PrimitiveArray, StructArray, Utf8Array,
4
Utf8ViewArray,
5
};
6
use arrow::bitmap::Bitmap;
7
use arrow::bitmap::utils::count_zeros;
8
use arrow::datatypes::ArrowDataType;
9
use arrow::legacy::utils::CustomIterTools;
10
use arrow::types::{days_ms, f16, i256, months_days_ns};
11
12
use super::TotalEqKernel;
13
use crate::comparisons::dyn_array::{array_tot_eq_missing_kernel, array_tot_ne_missing_kernel};
14
15
/// Condenses a bitmap of n * width elements into one with n elements.
16
///
17
/// For each block of width bits a zero count is done. The block of bits is then
18
/// replaced with a single bit: the result of true_zero_count(zero_count).
19
fn agg_array_bitmap<F>(bm: Bitmap, width: usize, true_zero_count: F) -> Bitmap
20
where
21
F: Fn(usize) -> bool,
22
{
23
if bm.len() == 1 {
24
bm
25
} else {
26
assert!(width > 0 && bm.len().is_multiple_of(width));
27
28
let (slice, offset, _len) = bm.as_slice();
29
(0..bm.len() / width)
30
.map(|i| true_zero_count(count_zeros(slice, offset + i * width, width)))
31
.collect()
32
}
33
}
34
35
impl TotalEqKernel for FixedSizeListArray {
36
type Scalar = Box<dyn Array>;
37
38
fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
39
// Nested comparison always done with eq_missing, propagating doesn't
40
// make any sense.
41
42
assert_eq!(self.len(), other.len());
43
let ArrowDataType::FixedSizeList(self_type, self_width) = self.dtype().to_logical_type()
44
else {
45
panic!("array comparison called with non-array type");
46
};
47
let ArrowDataType::FixedSizeList(other_type, other_width) = other.dtype().to_logical_type()
48
else {
49
panic!("array comparison called with non-array type");
50
};
51
assert_eq!(self_type.dtype(), other_type.dtype());
52
53
if self_width != other_width {
54
return Bitmap::new_with_value(false, self.len());
55
}
56
57
if *self_width == 0 {
58
return Bitmap::new_with_value(true, self.len());
59
}
60
61
// @TODO: It is probably worth it to dispatch to a special kernel for when there are
62
// several nested arrays because that can be rather slow with this code.
63
let inner = array_tot_eq_missing_kernel(self.values().as_ref(), other.values().as_ref());
64
65
agg_array_bitmap(inner, self.size(), |zeroes| zeroes == 0)
66
}
67
68
fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
69
assert_eq!(self.len(), other.len());
70
let ArrowDataType::FixedSizeList(self_type, self_width) = self.dtype().to_logical_type()
71
else {
72
panic!("array comparison called with non-array type");
73
};
74
let ArrowDataType::FixedSizeList(other_type, other_width) = other.dtype().to_logical_type()
75
else {
76
panic!("array comparison called with non-array type");
77
};
78
assert_eq!(self_type.dtype(), other_type.dtype());
79
80
if self_width != other_width {
81
return Bitmap::new_with_value(true, self.len());
82
}
83
84
if *self_width == 0 {
85
return Bitmap::new_with_value(false, self.len());
86
}
87
88
// @TODO: It is probably worth it to dispatch to a special kernel for when there are
89
// several nested arrays because that can be rather slow with this code.
90
let inner = array_tot_ne_missing_kernel(self.values().as_ref(), other.values().as_ref());
91
92
agg_array_bitmap(inner, self.size(), |zeroes| zeroes < self.size())
93
}
94
95
fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
96
let ArrowDataType::FixedSizeList(self_type, width) = self.dtype().to_logical_type() else {
97
panic!("array comparison called with non-array type");
98
};
99
assert_eq!(self_type.dtype(), other.dtype().to_logical_type());
100
101
let width = *width;
102
103
if width != other.len() {
104
return Bitmap::new_with_value(false, self.len());
105
}
106
107
if width == 0 {
108
return Bitmap::new_with_value(true, self.len());
109
}
110
111
// @TODO: It is probably worth it to dispatch to a special kernel for when there are
112
// several nested arrays because that can be rather slow with this code.
113
array_fsl_tot_eq_missing_kernel(self.values().as_ref(), other.as_ref(), self.len(), width)
114
}
115
116
fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
117
let ArrowDataType::FixedSizeList(self_type, width) = self.dtype().to_logical_type() else {
118
panic!("array comparison called with non-array type");
119
};
120
assert_eq!(self_type.dtype(), other.dtype().to_logical_type());
121
122
let width = *width;
123
124
if width != other.len() {
125
return Bitmap::new_with_value(true, self.len());
126
}
127
128
if width == 0 {
129
return Bitmap::new_with_value(false, self.len());
130
}
131
132
// @TODO: It is probably worth it to dispatch to a special kernel for when there are
133
// several nested arrays because that can be rather slow with this code.
134
array_fsl_tot_ne_missing_kernel(self.values().as_ref(), other.as_ref(), self.len(), width)
135
}
136
}
137
138
macro_rules! compare {
139
($lhs:expr, $rhs:expr, $length:expr, $width:expr, $op:path, $true_op:expr) => {{
140
let lhs = $lhs;
141
let rhs = $rhs;
142
143
macro_rules! call_binary {
144
($T:ty) => {{
145
let values: &$T = $lhs.as_any().downcast_ref().unwrap();
146
let scalar: &$T = $rhs.as_any().downcast_ref().unwrap();
147
148
(0..$length)
149
.map(move |i| {
150
// @TODO: I feel like there is a better way to do this.
151
let mut values: $T = values.clone();
152
<$T>::slice(&mut values, i * $width, $width);
153
154
$true_op($op(&values, scalar))
155
})
156
.collect_trusted()
157
}};
158
}
159
160
assert_eq!(lhs.dtype(), rhs.dtype());
161
162
use arrow::datatypes::{IntegerType as I, PhysicalType as PH, PrimitiveType as PR};
163
match lhs.dtype().to_physical_type() {
164
PH::Boolean => call_binary!(BooleanArray),
165
PH::BinaryView => call_binary!(BinaryViewArray),
166
PH::Utf8View => call_binary!(Utf8ViewArray),
167
PH::Primitive(PR::Int8) => call_binary!(PrimitiveArray<i8>),
168
PH::Primitive(PR::Int16) => call_binary!(PrimitiveArray<i16>),
169
PH::Primitive(PR::Int32) => call_binary!(PrimitiveArray<i32>),
170
PH::Primitive(PR::Int64) => call_binary!(PrimitiveArray<i64>),
171
PH::Primitive(PR::Int128) => call_binary!(PrimitiveArray<i128>),
172
PH::Primitive(PR::UInt8) => call_binary!(PrimitiveArray<u8>),
173
PH::Primitive(PR::UInt16) => call_binary!(PrimitiveArray<u16>),
174
PH::Primitive(PR::UInt32) => call_binary!(PrimitiveArray<u32>),
175
PH::Primitive(PR::UInt64) => call_binary!(PrimitiveArray<u64>),
176
PH::Primitive(PR::UInt128) => call_binary!(PrimitiveArray<u128>),
177
PH::Primitive(PR::Float16) => call_binary!(PrimitiveArray<f16>),
178
PH::Primitive(PR::Float32) => call_binary!(PrimitiveArray<f32>),
179
PH::Primitive(PR::Float64) => call_binary!(PrimitiveArray<f64>),
180
PH::Primitive(PR::Int256) => call_binary!(PrimitiveArray<i256>),
181
PH::Primitive(PR::DaysMs) => call_binary!(PrimitiveArray<days_ms>),
182
PH::Primitive(PR::MonthDayNano) => {
183
call_binary!(PrimitiveArray<months_days_ns>)
184
},
185
186
#[cfg(feature = "dtype-array")]
187
PH::FixedSizeList => call_binary!(arrow::array::FixedSizeListArray),
188
#[cfg(not(feature = "dtype-array"))]
189
PH::FixedSizeList => todo!(
190
"Comparison of FixedSizeListArray is not supported without dtype-array feature"
191
),
192
193
PH::Null => call_binary!(NullArray),
194
PH::FixedSizeBinary => call_binary!(FixedSizeBinaryArray),
195
PH::Binary => call_binary!(BinaryArray<i32>),
196
PH::LargeBinary => call_binary!(BinaryArray<i64>),
197
PH::Utf8 => call_binary!(Utf8Array<i32>),
198
PH::LargeUtf8 => call_binary!(Utf8Array<i64>),
199
PH::List => call_binary!(ListArray<i32>),
200
PH::LargeList => call_binary!(ListArray<i64>),
201
PH::Struct => call_binary!(StructArray),
202
PH::Union => todo!("Comparison of UnionArrays is not yet supported"),
203
PH::Map => todo!("Comparison of MapArrays is not yet supported"),
204
PH::Dictionary(I::Int8) => call_binary!(DictionaryArray<i8>),
205
PH::Dictionary(I::Int16) => call_binary!(DictionaryArray<i16>),
206
PH::Dictionary(I::Int32) => call_binary!(DictionaryArray<i32>),
207
PH::Dictionary(I::Int64) => call_binary!(DictionaryArray<i64>),
208
PH::Dictionary(I::Int128) => call_binary!(DictionaryArray<i128>),
209
PH::Dictionary(I::UInt8) => call_binary!(DictionaryArray<u8>),
210
PH::Dictionary(I::UInt16) => call_binary!(DictionaryArray<u16>),
211
PH::Dictionary(I::UInt32) => call_binary!(DictionaryArray<u32>),
212
PH::Dictionary(I::UInt64) => call_binary!(DictionaryArray<u64>),
213
}
214
}};
215
}
216
217
fn array_fsl_tot_eq_missing_kernel(
218
values: &dyn Array,
219
scalar: &dyn Array,
220
length: usize,
221
width: usize,
222
) -> Bitmap {
223
// @NOTE: Zero-Width Array are handled before
224
debug_assert_eq!(values.len(), length * width);
225
debug_assert_eq!(scalar.len(), width);
226
227
compare!(
228
values,
229
scalar,
230
length,
231
width,
232
TotalEqKernel::tot_eq_missing_kernel,
233
|bm: Bitmap| bm.unset_bits() == 0
234
)
235
}
236
237
fn array_fsl_tot_ne_missing_kernel(
238
values: &dyn Array,
239
scalar: &dyn Array,
240
length: usize,
241
width: usize,
242
) -> Bitmap {
243
// @NOTE: Zero-Width Array are handled before
244
debug_assert_eq!(values.len(), length * width);
245
debug_assert_eq!(scalar.len(), width);
246
247
compare!(
248
values,
249
scalar,
250
length,
251
width,
252
TotalEqKernel::tot_ne_missing_kernel,
253
|bm: Bitmap| bm.set_bits() > 0
254
)
255
}
256
257