Path: blob/main/crates/polars-core/src/series/comparison.rs
8431 views
//! Comparison operations on Series.12use polars_error::feature_gated;34use crate::prelude::*;5use crate::series::arithmetic::coerce_lhs_rhs;6use crate::series::nulls::replace_non_null;78macro_rules! impl_eq_compare {9($self:expr, $rhs:expr, $method:ident) => {{10use DataType::*;11let (lhs, rhs) = ($self, $rhs);12validate_types(lhs.dtype(), rhs.dtype())?;1314polars_ensure!(15lhs.len() == rhs.len() ||1617// Broadcast18lhs.len() == 1 ||19rhs.len() == 1,20ShapeMismatch: "could not compare between two series of different length ({} != {})",21lhs.len(),22rhs.len()23);2425match (lhs.dtype(), rhs.dtype()) {26#[cfg(feature = "dtype-categorical")]27(Categorical(lcats, _), Categorical(rcats, _)) => {28ensure_same_categories(lcats, rcats)?;29return with_match_categorical_physical_type!(lcats.physical(), |$C| {30lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())31})32},33#[cfg(feature = "dtype-categorical")]34(Enum(lfcats, _), Enum(rfcats, _)) => {35ensure_same_frozen_categories(lfcats, rfcats)?;36return with_match_categorical_physical_type!(lfcats.physical(), |$C| {37lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())38})39},40#[cfg(feature = "dtype-categorical")]41(Categorical(_, _) | Enum(_, _), String) => {42return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {43Ok(lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap()))44})45},46#[cfg(feature = "dtype-categorical")]47(String, Categorical(_, _) | Enum(_, _)) => {48return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {49Ok(rhs.cat::<$C>().unwrap().$method(lhs.str().unwrap()))50})51},5253#[cfg(feature = "dtype-extension")]54(le @ Extension(_, _), re @ Extension(_, _)) if le == re => {55let lhs = lhs.ext().unwrap();56let rhs = rhs.ext().unwrap();57return lhs.storage().$method(rhs.storage());58},5960#[cfg(feature = "dtype-extension")]61(Extension(_, storage), rdt) if **storage == *rdt => {62let lhs = lhs.ext().unwrap();63return lhs.storage().$method(rhs);64},6566#[cfg(feature = "dtype-extension")]67(ldt, Extension(_, storage)) if *ldt == **storage => {68let rhs = rhs.ext().unwrap();69return lhs.$method(rhs.storage());70},71_ => (),72};7374let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs)75.map_err(|_| polars_err!(76SchemaMismatch: "could not evaluate comparison between series '{}' of dtype: {:?} and series '{}' of dtype: {:?}",77lhs.name(), lhs.dtype(), rhs.name(), rhs.dtype()78))?;79let lhs = lhs.to_physical_repr();80let rhs = rhs.to_physical_repr();81let mut out = match lhs.dtype() {82Null => lhs.null().unwrap().$method(rhs.null().unwrap()),83Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),84String => lhs.str().unwrap().$method(rhs.str().unwrap()),85Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),86BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),87UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),88UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),89UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),90UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),91UInt128 => feature_gated!("dtype-u128", lhs.u128().unwrap().$method(rhs.u128().unwrap())),92Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),93Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),94Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),95Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),96Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),97Float16 => feature_gated!("dtype-f16", lhs.f16().unwrap().$method(rhs.f16().unwrap())),98Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),99Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),100List(_) => lhs.list().unwrap().$method(rhs.list().unwrap()),101#[cfg(feature = "dtype-array")]102Array(_, _) => lhs.array().unwrap().$method(rhs.array().unwrap()),103#[cfg(feature = "dtype-struct")]104Struct(_) => lhs.struct_().unwrap().$method(rhs.struct_().unwrap()),105106dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),107};108out.rename(lhs.name().clone());109PolarsResult::Ok(out)110}};111}112113macro_rules! bail_invalid_ineq {114($lhs:expr, $rhs:expr, $op:literal) => {115polars_bail!(116InvalidOperation: "cannot perform '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",117$op,118$lhs.name(), $lhs.dtype(),119$rhs.name(), $rhs.dtype(),120)121};122}123124macro_rules! impl_ineq_compare {125($self:expr, $rhs:expr, $method:ident, $op:literal, $rev_method:ident) => {{126use DataType::*;127let (lhs, rhs) = ($self, $rhs);128validate_types(lhs.dtype(), rhs.dtype())?;129130polars_ensure!(131lhs.len() == rhs.len() ||132133// Broadcast134lhs.len() == 1 ||135rhs.len() == 1,136ShapeMismatch:137"could not perform '{}' comparison between series '{}' of length: {} and series '{}' of length: {}, because they have different lengths",138$op,139lhs.name(), lhs.len(),140rhs.name(), rhs.len()141);142143match (lhs.dtype(), rhs.dtype()) {144#[cfg(feature = "dtype-categorical")]145(Categorical(lcats, _), Categorical(rcats, _)) => {146ensure_same_categories(lcats, rcats)?;147return with_match_categorical_physical_type!(lcats.physical(), |$C| {148lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())149})150},151#[cfg(feature = "dtype-categorical")]152(Enum(lfcats, _), Enum(rfcats, _)) => {153ensure_same_frozen_categories(lfcats, rfcats)?;154return with_match_categorical_physical_type!(lfcats.physical(), |$C| {155lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())156})157},158#[cfg(feature = "dtype-categorical")]159(Categorical(_, _) | Enum(_, _), String) => {160return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {161lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap())162})163},164#[cfg(feature = "dtype-categorical")]165(String, Categorical(_, _) | Enum(_, _)) => {166return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {167// We use the reverse method as string <-> enum comparisons are only implemented one-way.168rhs.cat::<$C>().unwrap().$rev_method(lhs.str().unwrap())169})170},171#[cfg(feature = "dtype-extension")]172(le @ Extension(_, _), re @ Extension(_, _)) if le == re => {173let lhs = lhs.ext().unwrap();174let rhs = rhs.ext().unwrap();175return lhs.storage().$method(rhs.storage());176},177178#[cfg(feature = "dtype-extension")]179(Extension(_, storage), rdt) if **storage == *rdt => {180let lhs = lhs.ext().unwrap();181return lhs.storage().$method(rhs);182},183184#[cfg(feature = "dtype-extension")]185(ldt, Extension(_, storage)) if *ldt == **storage => {186let rhs = rhs.ext().unwrap();187return lhs.$method(rhs.storage());188},189_ => (),190};191192let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs).map_err(|_|193polars_err!(194SchemaMismatch: "could not evaluate '{}' comparison between series '{}' of dtype: {:?} and series '{}' of dtype: {:?}",195$op,196lhs.name(), lhs.dtype(),197rhs.name(), rhs.dtype()198)199)?;200let lhs = lhs.to_physical_repr();201let rhs = rhs.to_physical_repr();202let mut out = match lhs.dtype() {203Null => lhs.null().unwrap().$method(rhs.null().unwrap()),204Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),205String => lhs.str().unwrap().$method(rhs.str().unwrap()),206Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),207BinaryOffset => lhs.binary_offset().unwrap().$method(rhs.binary_offset().unwrap()),208UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),209UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),210UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),211UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),212UInt128 => feature_gated!("dtype-u128", lhs.u128().unwrap().$method(rhs.u128().unwrap())),213Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),214Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),215Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),216Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),217Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),218Float16 => feature_gated!("dtype-f16", lhs.f16().unwrap().$method(rhs.f16().unwrap())),219Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),220Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),221List(_) => bail_invalid_ineq!(lhs, rhs, $op),222#[cfg(feature = "dtype-array")]223Array(_, _) => bail_invalid_ineq!(lhs, rhs, $op),224#[cfg(feature = "dtype-struct")]225Struct(_) => bail_invalid_ineq!(lhs, rhs, $op),226227dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}'; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),228};229out.rename(lhs.name().clone());230PolarsResult::Ok(out)231}};232}233234fn validate_types(left: &DataType, right: &DataType) -> PolarsResult<()> {235use DataType::*;236237match (left, right) {238(String, dt) | (dt, String) if dt.is_primitive_numeric() => {239polars_bail!(ComputeError: "cannot compare string with numeric type ({})", dt)240},241#[cfg(feature = "dtype-categorical")]242(Categorical(_, _) | Enum(_, _), dt) | (dt, Categorical(_, _) | Enum(_, _))243if !(dt.is_categorical() | dt.is_string() | dt.is_enum()) =>244{245polars_bail!(ComputeError: "cannot compare categorical with {}", dt)246},247#[cfg(feature = "dtype-duration")]248(Date, Duration(_)) | (Duration(_), Date) => {249polars_bail!(ComputeError: "cannot compare date with duration")250},251_ => (),252};253Ok(())254}255256impl ChunkCompareEq<&Series> for Series {257type Item = PolarsResult<BooleanChunked>;258259/// Create a boolean mask by checking for equality.260fn equal(&self, rhs: &Series) -> Self::Item {261impl_eq_compare!(self, rhs, equal)262}263264/// Create a boolean mask by checking for equality.265fn equal_missing(&self, rhs: &Series) -> Self::Item {266impl_eq_compare!(self, rhs, equal_missing)267}268269/// Create a boolean mask by checking for inequality.270fn not_equal(&self, rhs: &Series) -> Self::Item {271impl_eq_compare!(self, rhs, not_equal)272}273274/// Create a boolean mask by checking for inequality.275fn not_equal_missing(&self, rhs: &Series) -> Self::Item {276impl_eq_compare!(self, rhs, not_equal_missing)277}278}279280impl ChunkCompareIneq<&Series> for Series {281type Item = PolarsResult<BooleanChunked>;282283/// Create a boolean mask by checking if self > rhs.284fn gt(&self, rhs: &Series) -> Self::Item {285impl_ineq_compare!(self, rhs, gt, ">", lt)286}287288/// Create a boolean mask by checking if self >= rhs.289fn gt_eq(&self, rhs: &Series) -> Self::Item {290impl_ineq_compare!(self, rhs, gt_eq, ">=", lt_eq)291}292293/// Create a boolean mask by checking if self < rhs.294fn lt(&self, rhs: &Series) -> Self::Item {295impl_ineq_compare!(self, rhs, lt, "<", gt)296}297298/// Create a boolean mask by checking if self <= rhs.299fn lt_eq(&self, rhs: &Series) -> Self::Item {300impl_ineq_compare!(self, rhs, lt_eq, "<=", gt_eq)301}302}303304impl<Rhs> ChunkCompareEq<Rhs> for Series305where306Rhs: NumericNative,307{308type Item = PolarsResult<BooleanChunked>;309310fn equal(&self, rhs: Rhs) -> Self::Item {311validate_types(self.dtype(), &DataType::Int8)?;312let s = self.to_physical_repr();313Ok(apply_method_physical_numeric!(&s, equal, rhs))314}315316fn equal_missing(&self, rhs: Rhs) -> Self::Item {317validate_types(self.dtype(), &DataType::Int8)?;318let s = self.to_physical_repr();319Ok(apply_method_physical_numeric!(&s, equal_missing, rhs))320}321322fn not_equal(&self, rhs: Rhs) -> Self::Item {323validate_types(self.dtype(), &DataType::Int8)?;324let s = self.to_physical_repr();325Ok(apply_method_physical_numeric!(&s, not_equal, rhs))326}327328fn not_equal_missing(&self, rhs: Rhs) -> Self::Item {329validate_types(self.dtype(), &DataType::Int8)?;330let s = self.to_physical_repr();331Ok(apply_method_physical_numeric!(&s, not_equal_missing, rhs))332}333}334335impl<Rhs> ChunkCompareIneq<Rhs> for Series336where337Rhs: NumericNative,338{339type Item = PolarsResult<BooleanChunked>;340341fn gt(&self, rhs: Rhs) -> Self::Item {342validate_types(self.dtype(), &DataType::Int8)?;343let s = self.to_physical_repr();344Ok(apply_method_physical_numeric!(&s, gt, rhs))345}346347fn gt_eq(&self, rhs: Rhs) -> Self::Item {348validate_types(self.dtype(), &DataType::Int8)?;349let s = self.to_physical_repr();350Ok(apply_method_physical_numeric!(&s, gt_eq, rhs))351}352353fn lt(&self, rhs: Rhs) -> Self::Item {354validate_types(self.dtype(), &DataType::Int8)?;355let s = self.to_physical_repr();356Ok(apply_method_physical_numeric!(&s, lt, rhs))357}358359fn lt_eq(&self, rhs: Rhs) -> Self::Item {360validate_types(self.dtype(), &DataType::Int8)?;361let s = self.to_physical_repr();362Ok(apply_method_physical_numeric!(&s, lt_eq, rhs))363}364}365366impl ChunkCompareEq<&str> for Series {367type Item = PolarsResult<BooleanChunked>;368369fn equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {370validate_types(self.dtype(), &DataType::String)?;371match self.dtype() {372DataType::String => Ok(self.str().unwrap().equal(rhs)),373#[cfg(feature = "dtype-categorical")]374DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(375with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {376self.cat::<$C>().unwrap().equal(rhs)377}),378),379#[cfg(feature = "dtype-extension")]380DataType::Extension(_, _) => self.ext().unwrap().storage().equal(rhs),381_ => Ok(BooleanChunked::full(self.name().clone(), false, self.len())),382}383}384385fn equal_missing(&self, rhs: &str) -> Self::Item {386validate_types(self.dtype(), &DataType::String)?;387match self.dtype() {388DataType::String => Ok(self.str().unwrap().equal_missing(rhs)),389#[cfg(feature = "dtype-categorical")]390DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(391with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {392self.cat::<$C>().unwrap().equal_missing(rhs)393}),394),395#[cfg(feature = "dtype-extension")]396DataType::Extension(_, _) => self.ext().unwrap().storage().equal_missing(rhs),397_ => Ok(replace_non_null(398self.name().clone(),399self.0.chunks(),400false,401)),402}403}404405fn not_equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {406validate_types(self.dtype(), &DataType::String)?;407match self.dtype() {408DataType::String => Ok(self.str().unwrap().not_equal(rhs)),409#[cfg(feature = "dtype-categorical")]410DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(411with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {412self.cat::<$C>().unwrap().not_equal(rhs)413}),414),415#[cfg(feature = "dtype-extension")]416DataType::Extension(_, _) => self.ext().unwrap().storage().not_equal(rhs),417_ => Ok(BooleanChunked::full(self.name().clone(), true, self.len())),418}419}420421fn not_equal_missing(&self, rhs: &str) -> Self::Item {422validate_types(self.dtype(), &DataType::String)?;423match self.dtype() {424DataType::String => Ok(self.str().unwrap().not_equal_missing(rhs)),425#[cfg(feature = "dtype-categorical")]426DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(427with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {428self.cat::<$C>().unwrap().not_equal_missing(rhs)429}),430),431#[cfg(feature = "dtype-extension")]432DataType::Extension(_, _) => self.ext().unwrap().storage().not_equal_missing(rhs),433_ => Ok(replace_non_null(self.name().clone(), self.0.chunks(), true)),434}435}436}437438impl ChunkCompareIneq<&str> for Series {439type Item = PolarsResult<BooleanChunked>;440441fn gt(&self, rhs: &str) -> Self::Item {442validate_types(self.dtype(), &DataType::String)?;443match self.dtype() {444DataType::String => Ok(self.str().unwrap().gt(rhs)),445#[cfg(feature = "dtype-categorical")]446DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(447with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {448self.cat::<$C>().unwrap().gt(rhs)449}),450),451#[cfg(feature = "dtype-extension")]452DataType::Extension(_, _) => self.ext().unwrap().storage().gt(rhs),453_ => polars_bail!(454ComputeError: "cannot compare str value to series of type {}", self.dtype(),455),456}457}458459fn gt_eq(&self, rhs: &str) -> Self::Item {460validate_types(self.dtype(), &DataType::String)?;461match self.dtype() {462DataType::String => Ok(self.str().unwrap().gt_eq(rhs)),463#[cfg(feature = "dtype-categorical")]464DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(465with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {466self.cat::<$C>().unwrap().gt_eq(rhs)467}),468),469#[cfg(feature = "dtype-extension")]470DataType::Extension(_, _) => self.ext().unwrap().storage().gt_eq(rhs),471_ => polars_bail!(472ComputeError: "cannot compare str value to series of type {}", self.dtype(),473),474}475}476477fn lt(&self, rhs: &str) -> Self::Item {478validate_types(self.dtype(), &DataType::String)?;479match self.dtype() {480DataType::String => Ok(self.str().unwrap().lt(rhs)),481#[cfg(feature = "dtype-categorical")]482DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(483with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {484self.cat::<$C>().unwrap().lt(rhs)485}),486),487#[cfg(feature = "dtype-extension")]488DataType::Extension(_, _) => self.ext().unwrap().storage().lt(rhs),489_ => polars_bail!(490ComputeError: "cannot compare str value to series of type {}", self.dtype(),491),492}493}494495fn lt_eq(&self, rhs: &str) -> Self::Item {496validate_types(self.dtype(), &DataType::String)?;497match self.dtype() {498DataType::String => Ok(self.str().unwrap().lt_eq(rhs)),499#[cfg(feature = "dtype-categorical")]500DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(501with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {502self.cat::<$C>().unwrap().lt_eq(rhs)503}),504),505#[cfg(feature = "dtype-extension")]506DataType::Extension(_, _) => self.ext().unwrap().storage().lt_eq(rhs),507_ => polars_bail!(508ComputeError: "cannot compare str value to series of type {}", self.dtype(),509),510}511}512}513514515