Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/comparison.rs
6940 views
1
//! Comparison operations on Series.
2
3
use polars_error::feature_gated;
4
5
use crate::prelude::*;
6
use crate::series::arithmetic::coerce_lhs_rhs;
7
use crate::series::nulls::replace_non_null;
8
9
macro_rules! impl_eq_compare {
10
($self:expr, $rhs:expr, $method:ident) => {{
11
use DataType::*;
12
let (lhs, rhs) = ($self, $rhs);
13
validate_types(lhs.dtype(), rhs.dtype())?;
14
15
polars_ensure!(
16
lhs.len() == rhs.len() ||
17
18
// Broadcast
19
lhs.len() == 1 ||
20
rhs.len() == 1,
21
ShapeMismatch: "could not compare between two series of different length ({} != {})",
22
lhs.len(),
23
rhs.len()
24
);
25
26
#[cfg(feature = "dtype-categorical")]
27
match (lhs.dtype(), rhs.dtype()) {
28
(Categorical(lcats, _), Categorical(rcats, _)) => {
29
ensure_same_categories(lcats, rcats)?;
30
return with_match_categorical_physical_type!(lcats.physical(), |$C| {
31
lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
32
})
33
},
34
(Enum(lfcats, _), Enum(rfcats, _)) => {
35
ensure_same_frozen_categories(lfcats, rfcats)?;
36
return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
37
lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
38
})
39
},
40
(Categorical(_, _) | Enum(_, _), String) => {
41
return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
42
Ok(lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap()))
43
})
44
},
45
(String, Categorical(_, _) | Enum(_, _)) => {
46
return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
47
Ok(rhs.cat::<$C>().unwrap().$method(lhs.str().unwrap()))
48
})
49
},
50
_ => (),
51
};
52
53
let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs)
54
.map_err(|_| polars_err!(
55
SchemaMismatch: "could not evaluate comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
56
lhs.name(), lhs.dtype(), rhs.name(), rhs.dtype()
57
))?;
58
let lhs = lhs.to_physical_repr();
59
let rhs = rhs.to_physical_repr();
60
let mut out = match lhs.dtype() {
61
Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
62
Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
63
String => lhs.str().unwrap().$method(rhs.str().unwrap()),
64
Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
65
BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),
66
UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
67
UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
68
UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
69
UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
70
Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
71
Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
72
Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
73
Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
74
Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
75
Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
76
Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
77
List(_) => lhs.list().unwrap().$method(rhs.list().unwrap()),
78
#[cfg(feature = "dtype-array")]
79
Array(_, _) => lhs.array().unwrap().$method(rhs.array().unwrap()),
80
#[cfg(feature = "dtype-struct")]
81
Struct(_) => lhs.struct_().unwrap().$method(rhs.struct_().unwrap()),
82
83
dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
84
};
85
out.rename(lhs.name().clone());
86
PolarsResult::Ok(out)
87
}};
88
}
89
90
macro_rules! bail_invalid_ineq {
91
($lhs:expr, $rhs:expr, $op:literal) => {
92
polars_bail!(
93
InvalidOperation: "cannot perform '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
94
$op,
95
$lhs.name(), $lhs.dtype(),
96
$rhs.name(), $rhs.dtype(),
97
)
98
};
99
}
100
101
macro_rules! impl_ineq_compare {
102
($self:expr, $rhs:expr, $method:ident, $op:literal, $rev_method:ident) => {{
103
use DataType::*;
104
let (lhs, rhs) = ($self, $rhs);
105
validate_types(lhs.dtype(), rhs.dtype())?;
106
107
polars_ensure!(
108
lhs.len() == rhs.len() ||
109
110
// Broadcast
111
lhs.len() == 1 ||
112
rhs.len() == 1,
113
ShapeMismatch:
114
"could not perform '{}' comparison between series '{}' of length: {} and series '{}' of length: {}, because they have different lengths",
115
$op,
116
lhs.name(), lhs.len(),
117
rhs.name(), rhs.len()
118
);
119
120
#[cfg(feature = "dtype-categorical")]
121
match (lhs.dtype(), rhs.dtype()) {
122
(Categorical(lcats, _), Categorical(rcats, _)) => {
123
ensure_same_categories(lcats, rcats)?;
124
return with_match_categorical_physical_type!(lcats.physical(), |$C| {
125
lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
126
})
127
},
128
(Enum(lfcats, _), Enum(rfcats, _)) => {
129
ensure_same_frozen_categories(lfcats, rfcats)?;
130
return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
131
lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
132
})
133
},
134
(Categorical(_, _) | Enum(_, _), String) => {
135
return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
136
lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap())
137
})
138
},
139
(String, Categorical(_, _) | Enum(_, _)) => {
140
return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
141
// We use the reverse method as string <-> enum comparisons are only implemented one-way.
142
rhs.cat::<$C>().unwrap().$rev_method(lhs.str().unwrap())
143
})
144
},
145
_ => (),
146
};
147
148
let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs).map_err(|_|
149
polars_err!(
150
SchemaMismatch: "could not evaluate '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
151
$op,
152
lhs.name(), lhs.dtype(),
153
rhs.name(), rhs.dtype()
154
)
155
)?;
156
let lhs = lhs.to_physical_repr();
157
let rhs = rhs.to_physical_repr();
158
let mut out = match lhs.dtype() {
159
Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
160
Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
161
String => lhs.str().unwrap().$method(rhs.str().unwrap()),
162
Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
163
BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),
164
UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
165
UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
166
UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
167
UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
168
Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
169
Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
170
Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
171
Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
172
Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
173
Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
174
Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
175
List(_) => bail_invalid_ineq!(lhs, rhs, $op),
176
#[cfg(feature = "dtype-array")]
177
Array(_, _) => bail_invalid_ineq!(lhs, rhs, $op),
178
#[cfg(feature = "dtype-struct")]
179
Struct(_) => bail_invalid_ineq!(lhs, rhs, $op),
180
181
dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
182
};
183
out.rename(lhs.name().clone());
184
PolarsResult::Ok(out)
185
}};
186
}
187
188
fn validate_types(left: &DataType, right: &DataType) -> PolarsResult<()> {
189
use DataType::*;
190
191
match (left, right) {
192
(String, dt) | (dt, String) if dt.is_primitive_numeric() => {
193
polars_bail!(ComputeError: "cannot compare string with numeric type ({})", dt)
194
},
195
#[cfg(feature = "dtype-categorical")]
196
(Categorical(_, _) | Enum(_, _), dt) | (dt, Categorical(_, _) | Enum(_, _))
197
if !(dt.is_categorical() | dt.is_string() | dt.is_enum()) =>
198
{
199
polars_bail!(ComputeError: "cannot compare categorical with {}", dt);
200
},
201
_ => (),
202
};
203
Ok(())
204
}
205
206
impl ChunkCompareEq<&Series> for Series {
207
type Item = PolarsResult<BooleanChunked>;
208
209
/// Create a boolean mask by checking for equality.
210
fn equal(&self, rhs: &Series) -> Self::Item {
211
impl_eq_compare!(self, rhs, equal)
212
}
213
214
/// Create a boolean mask by checking for equality.
215
fn equal_missing(&self, rhs: &Series) -> Self::Item {
216
impl_eq_compare!(self, rhs, equal_missing)
217
}
218
219
/// Create a boolean mask by checking for inequality.
220
fn not_equal(&self, rhs: &Series) -> Self::Item {
221
impl_eq_compare!(self, rhs, not_equal)
222
}
223
224
/// Create a boolean mask by checking for inequality.
225
fn not_equal_missing(&self, rhs: &Series) -> Self::Item {
226
impl_eq_compare!(self, rhs, not_equal_missing)
227
}
228
}
229
230
impl ChunkCompareIneq<&Series> for Series {
231
type Item = PolarsResult<BooleanChunked>;
232
233
/// Create a boolean mask by checking if self > rhs.
234
fn gt(&self, rhs: &Series) -> Self::Item {
235
impl_ineq_compare!(self, rhs, gt, ">", lt)
236
}
237
238
/// Create a boolean mask by checking if self >= rhs.
239
fn gt_eq(&self, rhs: &Series) -> Self::Item {
240
impl_ineq_compare!(self, rhs, gt_eq, ">=", lt_eq)
241
}
242
243
/// Create a boolean mask by checking if self < rhs.
244
fn lt(&self, rhs: &Series) -> Self::Item {
245
impl_ineq_compare!(self, rhs, lt, "<", gt)
246
}
247
248
/// Create a boolean mask by checking if self <= rhs.
249
fn lt_eq(&self, rhs: &Series) -> Self::Item {
250
impl_ineq_compare!(self, rhs, lt_eq, "<=", gt_eq)
251
}
252
}
253
254
impl<Rhs> ChunkCompareEq<Rhs> for Series
255
where
256
Rhs: NumericNative,
257
{
258
type Item = PolarsResult<BooleanChunked>;
259
260
fn equal(&self, rhs: Rhs) -> Self::Item {
261
validate_types(self.dtype(), &DataType::Int8)?;
262
let s = self.to_physical_repr();
263
Ok(apply_method_physical_numeric!(&s, equal, rhs))
264
}
265
266
fn equal_missing(&self, rhs: Rhs) -> Self::Item {
267
validate_types(self.dtype(), &DataType::Int8)?;
268
let s = self.to_physical_repr();
269
Ok(apply_method_physical_numeric!(&s, equal_missing, rhs))
270
}
271
272
fn not_equal(&self, rhs: Rhs) -> Self::Item {
273
validate_types(self.dtype(), &DataType::Int8)?;
274
let s = self.to_physical_repr();
275
Ok(apply_method_physical_numeric!(&s, not_equal, rhs))
276
}
277
278
fn not_equal_missing(&self, rhs: Rhs) -> Self::Item {
279
validate_types(self.dtype(), &DataType::Int8)?;
280
let s = self.to_physical_repr();
281
Ok(apply_method_physical_numeric!(&s, not_equal_missing, rhs))
282
}
283
}
284
285
impl<Rhs> ChunkCompareIneq<Rhs> for Series
286
where
287
Rhs: NumericNative,
288
{
289
type Item = PolarsResult<BooleanChunked>;
290
291
fn gt(&self, rhs: Rhs) -> Self::Item {
292
validate_types(self.dtype(), &DataType::Int8)?;
293
let s = self.to_physical_repr();
294
Ok(apply_method_physical_numeric!(&s, gt, rhs))
295
}
296
297
fn gt_eq(&self, rhs: Rhs) -> Self::Item {
298
validate_types(self.dtype(), &DataType::Int8)?;
299
let s = self.to_physical_repr();
300
Ok(apply_method_physical_numeric!(&s, gt_eq, rhs))
301
}
302
303
fn lt(&self, rhs: Rhs) -> Self::Item {
304
validate_types(self.dtype(), &DataType::Int8)?;
305
let s = self.to_physical_repr();
306
Ok(apply_method_physical_numeric!(&s, lt, rhs))
307
}
308
309
fn lt_eq(&self, rhs: Rhs) -> Self::Item {
310
validate_types(self.dtype(), &DataType::Int8)?;
311
let s = self.to_physical_repr();
312
Ok(apply_method_physical_numeric!(&s, lt_eq, rhs))
313
}
314
}
315
316
impl ChunkCompareEq<&str> for Series {
317
type Item = PolarsResult<BooleanChunked>;
318
319
fn equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
320
validate_types(self.dtype(), &DataType::String)?;
321
match self.dtype() {
322
DataType::String => Ok(self.str().unwrap().equal(rhs)),
323
#[cfg(feature = "dtype-categorical")]
324
DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
325
with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
326
self.cat::<$C>().unwrap().equal(rhs)
327
}),
328
),
329
_ => Ok(BooleanChunked::full(self.name().clone(), false, self.len())),
330
}
331
}
332
333
fn equal_missing(&self, rhs: &str) -> Self::Item {
334
validate_types(self.dtype(), &DataType::String)?;
335
match self.dtype() {
336
DataType::String => Ok(self.str().unwrap().equal_missing(rhs)),
337
#[cfg(feature = "dtype-categorical")]
338
DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
339
with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
340
self.cat::<$C>().unwrap().equal_missing(rhs)
341
}),
342
),
343
_ => Ok(replace_non_null(
344
self.name().clone(),
345
self.0.chunks(),
346
false,
347
)),
348
}
349
}
350
351
fn not_equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
352
validate_types(self.dtype(), &DataType::String)?;
353
match self.dtype() {
354
DataType::String => Ok(self.str().unwrap().not_equal(rhs)),
355
#[cfg(feature = "dtype-categorical")]
356
DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
357
with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
358
self.cat::<$C>().unwrap().not_equal(rhs)
359
}),
360
),
361
_ => Ok(BooleanChunked::full(self.name().clone(), true, self.len())),
362
}
363
}
364
365
fn not_equal_missing(&self, rhs: &str) -> Self::Item {
366
validate_types(self.dtype(), &DataType::String)?;
367
match self.dtype() {
368
DataType::String => Ok(self.str().unwrap().not_equal_missing(rhs)),
369
#[cfg(feature = "dtype-categorical")]
370
DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
371
with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
372
self.cat::<$C>().unwrap().not_equal_missing(rhs)
373
}),
374
),
375
_ => Ok(replace_non_null(self.name().clone(), self.0.chunks(), true)),
376
}
377
}
378
}
379
380
impl ChunkCompareIneq<&str> for Series {
381
type Item = PolarsResult<BooleanChunked>;
382
383
fn gt(&self, rhs: &str) -> Self::Item {
384
validate_types(self.dtype(), &DataType::String)?;
385
match self.dtype() {
386
DataType::String => Ok(self.str().unwrap().gt(rhs)),
387
#[cfg(feature = "dtype-categorical")]
388
DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
389
with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
390
self.cat::<$C>().unwrap().gt(rhs)
391
}),
392
),
393
_ => polars_bail!(
394
ComputeError: "cannot compare str value to series of type {}", self.dtype(),
395
),
396
}
397
}
398
399
fn gt_eq(&self, rhs: &str) -> Self::Item {
400
validate_types(self.dtype(), &DataType::String)?;
401
match self.dtype() {
402
DataType::String => Ok(self.str().unwrap().gt_eq(rhs)),
403
#[cfg(feature = "dtype-categorical")]
404
DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
405
with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
406
self.cat::<$C>().unwrap().gt_eq(rhs)
407
}),
408
),
409
_ => polars_bail!(
410
ComputeError: "cannot compare str value to series of type {}", self.dtype(),
411
),
412
}
413
}
414
415
fn lt(&self, rhs: &str) -> Self::Item {
416
validate_types(self.dtype(), &DataType::String)?;
417
match self.dtype() {
418
DataType::String => Ok(self.str().unwrap().lt(rhs)),
419
#[cfg(feature = "dtype-categorical")]
420
DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
421
with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
422
self.cat::<$C>().unwrap().lt(rhs)
423
}),
424
),
425
_ => polars_bail!(
426
ComputeError: "cannot compare str value to series of type {}", self.dtype(),
427
),
428
}
429
}
430
431
fn lt_eq(&self, rhs: &str) -> Self::Item {
432
validate_types(self.dtype(), &DataType::String)?;
433
match self.dtype() {
434
DataType::String => Ok(self.str().unwrap().lt_eq(rhs)),
435
#[cfg(feature = "dtype-categorical")]
436
DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
437
with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
438
self.cat::<$C>().unwrap().lt_eq(rhs)
439
}),
440
),
441
_ => polars_bail!(
442
ComputeError: "cannot compare str value to series of type {}", self.dtype(),
443
),
444
}
445
}
446
}
447
448