Path: blob/main/crates/polars-compute/src/comparisons/view.rs
6939 views
use arrow::array::{BinaryViewArray, Utf8ViewArray};1use arrow::bitmap::Bitmap;23use super::TotalEqKernel;4use crate::comparisons::TotalOrdKernel;56// If s fits in 12 bytes, returns the view encoding it would have in a7// BinaryViewArray.8fn small_view_encoding(s: &[u8]) -> Option<u128> {9if s.len() > 12 {10return None;11}1213let mut tmp = [0u8; 16];14tmp[0] = s.len() as u8;15tmp[4..4 + s.len()].copy_from_slice(s);16Some(u128::from_le_bytes(tmp))17}1819// Loads (up to) the first 4 bytes of s as little-endian, padded with zeros.20fn load_prefix(s: &[u8]) -> u32 {21let start = &s[..s.len().min(4)];22let mut tmp = [0u8; 4];23tmp[..start.len()].copy_from_slice(start);24u32::from_le_bytes(tmp)25}2627fn broadcast_inequality(28arr: &BinaryViewArray,29scalar: &[u8],30cmp_prefix: impl Fn(u32, u32) -> bool,31cmp_str: impl Fn(&[u8], &[u8]) -> bool,32) -> Bitmap {33let views = arr.views().as_slice();34let prefix = load_prefix(scalar);35let be_prefix = prefix.to_be();36Bitmap::from_trusted_len_iter((0..arr.len()).map(|i| unsafe {37let v_prefix = (views.get_unchecked(i).as_u128() >> 32) as u32;38if v_prefix != prefix {39cmp_prefix(v_prefix.to_be(), be_prefix)40} else {41cmp_str(arr.value_unchecked(i), scalar)42}43}))44}4546impl TotalEqKernel for BinaryViewArray {47type Scalar = [u8];4849fn tot_eq_kernel(&self, other: &Self) -> Bitmap {50debug_assert!(self.len() == other.len());5152let slf_views = self.views().as_slice();53let other_views = other.views().as_slice();5455Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {56let av = slf_views.get_unchecked(i).as_u128();57let bv = other_views.get_unchecked(i).as_u128();5859// First 64 bits contain length and prefix.60let a_len_prefix = av as u64;61let b_len_prefix = bv as u64;62if a_len_prefix != b_len_prefix {63return false;64}6566let alen = av as u32;67if alen <= 12 {68// String is fully inlined, compare top 64 bits. Bottom bits were69// tested equal before, which also ensures the lengths are equal.70(av >> 64) as u64 == (bv >> 64) as u6471} else {72self.value_unchecked(i) == other.value_unchecked(i)73}74}))75}7677fn tot_ne_kernel(&self, other: &Self) -> Bitmap {78debug_assert!(self.len() == other.len());7980let slf_views = self.views().as_slice();81let other_views = other.views().as_slice();8283Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {84let av = slf_views.get_unchecked(i).as_u128();85let bv = other_views.get_unchecked(i).as_u128();8687// First 64 bits contain length and prefix.88let a_len_prefix = av as u64;89let b_len_prefix = bv as u64;90if a_len_prefix != b_len_prefix {91return true;92}9394let alen = av as u32;95if alen <= 12 {96// String is fully inlined, compare top 64 bits. Bottom bits were97// tested equal before, which also ensures the lengths are equal.98(av >> 64) as u64 != (bv >> 64) as u6499} else {100self.value_unchecked(i) != other.value_unchecked(i)101}102}))103}104105fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {106if let Some(val) = small_view_encoding(other) {107Bitmap::from_trusted_len_iter(self.views().iter().map(|v| v.as_u128() == val))108} else {109let slf_views = self.views().as_slice();110let prefix = u32::from_le_bytes(other[..4].try_into().unwrap());111let prefix_len = ((prefix as u64) << 32) | other.len() as u64;112Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {113let v_prefix_len = slf_views.get_unchecked(i).as_u128() as u64;114if v_prefix_len != prefix_len {115false116} else {117self.value_unchecked(i) == other118}119}))120}121}122123fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {124if let Some(val) = small_view_encoding(other) {125Bitmap::from_trusted_len_iter(self.views().iter().map(|v| v.as_u128() != val))126} else {127let slf_views = self.views().as_slice();128let prefix = u32::from_le_bytes(other[..4].try_into().unwrap());129let prefix_len = ((prefix as u64) << 32) | other.len() as u64;130Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {131let v_prefix_len = slf_views.get_unchecked(i).as_u128() as u64;132if v_prefix_len != prefix_len {133true134} else {135self.value_unchecked(i) != other136}137}))138}139}140}141142impl TotalOrdKernel for BinaryViewArray {143type Scalar = [u8];144145fn tot_lt_kernel(&self, other: &Self) -> Bitmap {146debug_assert!(self.len() == other.len());147148let slf_views = self.views().as_slice();149let other_views = other.views().as_slice();150151Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {152let av = slf_views.get_unchecked(i).as_u128();153let bv = other_views.get_unchecked(i).as_u128();154155// First 64 bits contain length and prefix.156// Only check prefix.157let a_prefix = (av >> 32) as u32;158let b_prefix = (bv >> 32) as u32;159if a_prefix != b_prefix {160a_prefix.to_be() < b_prefix.to_be()161} else {162self.value_unchecked(i) < other.value_unchecked(i)163}164}))165}166167fn tot_le_kernel(&self, other: &Self) -> Bitmap {168debug_assert!(self.len() == other.len());169170let slf_views = self.views().as_slice();171let other_views = other.views().as_slice();172173Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {174let av = slf_views.get_unchecked(i).as_u128();175let bv = other_views.get_unchecked(i).as_u128();176177// First 64 bits contain length and prefix.178// Only check prefix.179let a_prefix = (av >> 32) as u32;180let b_prefix = (bv >> 32) as u32;181if a_prefix != b_prefix {182a_prefix.to_be() < b_prefix.to_be()183} else {184self.value_unchecked(i) <= other.value_unchecked(i)185}186}))187}188189fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {190broadcast_inequality(self, other, |a, b| a < b, |a, b| a < b)191}192193fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {194broadcast_inequality(self, other, |a, b| a <= b, |a, b| a <= b)195}196197fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {198broadcast_inequality(self, other, |a, b| a > b, |a, b| a > b)199}200201fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {202broadcast_inequality(self, other, |a, b| a >= b, |a, b| a >= b)203}204}205206impl TotalEqKernel for Utf8ViewArray {207type Scalar = str;208209fn tot_eq_kernel(&self, other: &Self) -> Bitmap {210self.to_binview().tot_eq_kernel(&other.to_binview())211}212213fn tot_ne_kernel(&self, other: &Self) -> Bitmap {214self.to_binview().tot_ne_kernel(&other.to_binview())215}216217fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {218self.to_binview().tot_eq_kernel_broadcast(other.as_bytes())219}220221fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {222self.to_binview().tot_ne_kernel_broadcast(other.as_bytes())223}224}225226impl TotalOrdKernel for Utf8ViewArray {227type Scalar = str;228229fn tot_lt_kernel(&self, other: &Self) -> Bitmap {230self.to_binview().tot_lt_kernel(&other.to_binview())231}232233fn tot_le_kernel(&self, other: &Self) -> Bitmap {234self.to_binview().tot_le_kernel(&other.to_binview())235}236237fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {238self.to_binview().tot_lt_kernel_broadcast(other.as_bytes())239}240241fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {242self.to_binview().tot_le_kernel_broadcast(other.as_bytes())243}244245fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {246self.to_binview().tot_gt_kernel_broadcast(other.as_bytes())247}248249fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {250self.to_binview().tot_ge_kernel_broadcast(other.as_bytes())251}252}253254255