Path: blob/main/crates/polars-ops/src/chunked_array/binary/namespace.rs
6939 views
#[cfg(feature = "binary_encoding")]1use std::borrow::Cow;23#[cfg(feature = "binary_encoding")]4use arrow::array::Array;5#[cfg(feature = "binary_encoding")]6use base64::Engine as _;7#[cfg(feature = "binary_encoding")]8use base64::engine::general_purpose;9use memchr::memmem::find;10use polars_compute::cast::{binview_to_fixed_size_list_dyn, binview_to_primitive_dyn};11use polars_compute::size::binary_size_bytes;12use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};1314use super::*;1516pub trait BinaryNameSpaceImpl: AsBinary {17/// Check if binary contains given literal18fn contains(&self, lit: &[u8]) -> BooleanChunked {19let ca = self.as_binary();20let f = |s: &[u8]| find(s, lit).is_some();21unary_elementwise_values(ca, f)22}2324fn contains_chunked(&self, lit: &BinaryChunked) -> PolarsResult<BooleanChunked> {25let ca = self.as_binary();26Ok(match lit.len() {271 => match lit.get(0) {28Some(lit) => ca.contains(lit),29None => BooleanChunked::full_null(ca.name().clone(), ca.len()),30},31_ => {32polars_ensure!(33ca.len() == lit.len() || ca.len() == 1,34length_mismatch = "bin.contains",35ca.len(),36lit.len()37);38broadcast_binary_elementwise_values(ca, lit, |src, lit| find(src, lit).is_some())39},40})41}4243/// Check if strings ends with a substring44fn ends_with(&self, sub: &[u8]) -> BooleanChunked {45let ca = self.as_binary();46let f = |s: &[u8]| s.ends_with(sub);47ca.apply_nonnull_values_generic(DataType::Boolean, f)48}4950/// Check if strings starts with a substring51fn starts_with(&self, sub: &[u8]) -> BooleanChunked {52let ca = self.as_binary();53let f = |s: &[u8]| s.starts_with(sub);54ca.apply_nonnull_values_generic(DataType::Boolean, f)55}5657fn starts_with_chunked(&self, prefix: &BinaryChunked) -> PolarsResult<BooleanChunked> {58let ca = self.as_binary();59Ok(match prefix.len() {601 => match prefix.get(0) {61Some(s) => self.starts_with(s),62None => BooleanChunked::full_null(ca.name().clone(), ca.len()),63},64_ => {65polars_ensure!(66ca.len() == prefix.len() || ca.len() == 1,67length_mismatch = "bin.starts_with",68ca.len(),69prefix.len()70);71broadcast_binary_elementwise_values(ca, prefix, |s, sub| s.starts_with(sub))72},73})74}7576fn ends_with_chunked(&self, suffix: &BinaryChunked) -> PolarsResult<BooleanChunked> {77let ca = self.as_binary();78Ok(match suffix.len() {791 => match suffix.get(0) {80Some(s) => self.ends_with(s),81None => BooleanChunked::full_null(ca.name().clone(), ca.len()),82},83_ => {84polars_ensure!(85ca.len() == suffix.len() || ca.len() == 1,86length_mismatch = "bin.ends_with",87ca.len(),88suffix.len()89);90broadcast_binary_elementwise_values(ca, suffix, |s, sub| s.ends_with(sub))91},92})93}9495/// Get the size of the binary values in bytes.96fn size_bytes(&self) -> UInt32Chunked {97let ca = self.as_binary();98ca.apply_kernel_cast(&binary_size_bytes)99}100101#[cfg(feature = "binary_encoding")]102fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {103let ca = self.as_binary();104if strict {105ca.try_apply_nonnull_values_generic(|s| {106hex::decode(s).map_err(|_| {107polars_err!(108ComputeError:109"invalid `hex` encoding found; try setting `strict=false` to ignore"110)111})112})113} else {114Ok(ca.apply(|opt_s| opt_s.and_then(|s| hex::decode(s).ok().map(Cow::Owned))))115}116}117118#[cfg(feature = "binary_encoding")]119fn hex_encode(&self) -> Series {120let ca = self.as_binary();121unsafe {122ca.apply_values(|s| hex::encode(s).into_bytes().into())123.cast_unchecked(&DataType::String)124.unwrap()125}126}127128#[cfg(feature = "binary_encoding")]129fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {130let ca = self.as_binary();131if strict {132ca.try_apply_nonnull_values_generic(|s| {133general_purpose::STANDARD.decode(s).map_err(|_e| {134polars_err!(135ComputeError:136"invalid `base64` encoding found; try setting `strict=false` to ignore"137)138})139})140} else {141Ok(ca.apply(|opt_s| {142opt_s.and_then(|s| general_purpose::STANDARD.decode(s).ok().map(Cow::Owned))143}))144}145}146147#[cfg(feature = "binary_encoding")]148fn base64_encode(&self) -> Series {149let ca = self.as_binary();150unsafe {151ca.apply_values(|s| general_purpose::STANDARD.encode(s).into_bytes().into())152.cast_unchecked(&DataType::String)153.unwrap()154}155}156157#[cfg(feature = "binary_encoding")]158fn reinterpret(&self, dtype: &DataType, is_little_endian: bool) -> PolarsResult<Series> {159unsafe {160Ok(Series::from_chunks_and_dtype_unchecked(161self.as_binary().name().clone(),162self._reinterpret_inner(dtype, is_little_endian)?,163dtype,164))165}166}167168#[cfg(feature = "binary_encoding")]169fn _reinterpret_inner(170&self,171dtype: &DataType,172is_little_endian: bool,173) -> PolarsResult<Vec<Box<dyn Array>>> {174use polars_core::with_match_physical_numeric_polars_type;175176let ca = self.as_binary();177178match dtype {179dtype if dtype.is_primitive_numeric() || dtype.is_temporal() => {180let dtype = dtype.to_physical();181let arrow_data_type = dtype182.to_arrow(CompatLevel::newest())183.underlying_physical_type();184with_match_physical_numeric_polars_type!(dtype, |$T| {185unsafe {186ca.chunks().iter().map(|chunk| {187binview_to_primitive_dyn::<<$T as PolarsNumericType>::Native>(188&**chunk,189&arrow_data_type,190is_little_endian,191)192}).collect()193}194})195},196#[cfg(feature = "dtype-array")]197DataType::Array(inner_dtype, array_width)198if inner_dtype.is_primitive_numeric() || inner_dtype.is_temporal() =>199{200let inner_dtype = inner_dtype.to_physical();201let result: Vec<ArrayRef> = with_match_physical_numeric_polars_type!(inner_dtype, |$T| {202unsafe {203ca.chunks().iter().map(|chunk| {204binview_to_fixed_size_list_dyn::<<$T as PolarsNumericType>::Native>(205&**chunk,206*array_width,207is_little_endian208)209}).collect::<Result<Vec<ArrayRef>, _>>()210}211})?;212Ok(result)213},214_ => Err(215polars_err!(InvalidOperation: "unsupported data type {:?} in reinterpret. Only numeric or temporal types, or Arrays of those, are allowed.", dtype),216),217}218}219}220221impl BinaryNameSpaceImpl for BinaryChunked {}222223224