Path: blob/main/crates/polars-ops/src/chunked_array/binary/namespace.rs
8375 views
#[cfg(feature = "binary_encoding")]1use std::borrow::Cow;23#[cfg(feature = "binary_encoding")]4use arrow::array::Array;5#[cfg(feature = "binary_encoding")]6use base64::Engine as _;7#[cfg(feature = "binary_encoding")]8use base64::engine::general_purpose;9use memchr::memmem::find;10use polars_compute::cast::{binview_to_fixed_size_list_dyn, binview_to_primitive_dyn};11use polars_compute::size::binary_size_bytes;12use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};1314use super::*;1516pub trait BinaryNameSpaceImpl: AsBinary {17/// Slice the binary values.18///19/// Determines a slice starting from `offset` and with length `length` of each of the elements.20/// `offset` can be negative, in which case the start counts from the end of the bytes.21fn bin_slice(&self, offset: &Column, length: &Column) -> PolarsResult<BinaryChunked> {22let ca = self.as_binary();23let offset = offset.cast(&DataType::Int64)?;24let length = length.strict_cast(&DataType::UInt64)?;2526Ok(super::slice::slice(ca, offset.i64()?, length.u64()?))27}28/// Slice the first `n` bytes of the binary value.29///30/// Determines a slice starting at the beginning of the binary data up to offset `n` of each31/// element. `n` can be negative, in which case the slice ends `n` bytes from the end.32fn bin_head(&self, n: &Column) -> PolarsResult<BinaryChunked> {33let ca = self.as_binary();34let n = n.strict_cast(&DataType::Int64)?;3536super::slice::head(ca, n.i64()?)37}3839/// Slice the last `n` bytes of the binary value.40///41/// Determines a slice starting at offset `n` of each element. `n` can be42/// negative, in which case the slice begins `n` bytes from the start.43fn bin_tail(&self, n: &Column) -> PolarsResult<BinaryChunked> {44let ca = self.as_binary();45let n = n.strict_cast(&DataType::Int64)?;4647super::slice::tail(ca, n.i64()?)48}4950/// Check if binary contains given literal51fn contains(&self, lit: &[u8]) -> BooleanChunked {52let ca = self.as_binary();53let f = |s: &[u8]| find(s, lit).is_some();54unary_elementwise_values(ca, f)55}5657fn contains_chunked(&self, lit: &BinaryChunked) -> PolarsResult<BooleanChunked> {58let ca = self.as_binary();59Ok(match lit.len() {601 => match lit.get(0) {61Some(lit) => ca.contains(lit),62None => BooleanChunked::full_null(ca.name().clone(), ca.len()),63},64_ => {65polars_ensure!(66ca.len() == lit.len() || ca.len() == 1,67length_mismatch = "bin.contains",68ca.len(),69lit.len()70);71broadcast_binary_elementwise_values(ca, lit, |src, lit| find(src, lit).is_some())72},73})74}7576/// Check if strings ends with a substring77fn ends_with(&self, sub: &[u8]) -> BooleanChunked {78let ca = self.as_binary();79let f = |s: &[u8]| s.ends_with(sub);80ca.apply_nonnull_values_generic(DataType::Boolean, f)81}8283/// Check if strings starts with a substring84fn starts_with(&self, sub: &[u8]) -> BooleanChunked {85let ca = self.as_binary();86let f = |s: &[u8]| s.starts_with(sub);87ca.apply_nonnull_values_generic(DataType::Boolean, f)88}8990fn starts_with_chunked(&self, prefix: &BinaryChunked) -> PolarsResult<BooleanChunked> {91let ca = self.as_binary();92Ok(match prefix.len() {931 => match prefix.get(0) {94Some(s) => self.starts_with(s),95None => BooleanChunked::full_null(ca.name().clone(), ca.len()),96},97_ => {98polars_ensure!(99ca.len() == prefix.len() || ca.len() == 1,100length_mismatch = "bin.starts_with",101ca.len(),102prefix.len()103);104broadcast_binary_elementwise_values(ca, prefix, |s, sub| s.starts_with(sub))105},106})107}108109fn ends_with_chunked(&self, suffix: &BinaryChunked) -> PolarsResult<BooleanChunked> {110let ca = self.as_binary();111Ok(match suffix.len() {1121 => match suffix.get(0) {113Some(s) => self.ends_with(s),114None => BooleanChunked::full_null(ca.name().clone(), ca.len()),115},116_ => {117polars_ensure!(118ca.len() == suffix.len() || ca.len() == 1,119length_mismatch = "bin.ends_with",120ca.len(),121suffix.len()122);123broadcast_binary_elementwise_values(ca, suffix, |s, sub| s.ends_with(sub))124},125})126}127128/// Get the size of the binary values in bytes.129fn size_bytes(&self) -> UInt32Chunked {130let ca = self.as_binary();131ca.apply_kernel_cast(&binary_size_bytes)132}133134#[cfg(feature = "binary_encoding")]135fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {136let ca = self.as_binary();137if strict {138ca.try_apply_nonnull_values_generic(|s| {139hex::decode(s).map_err(|_| {140polars_err!(141ComputeError:142"invalid `hex` encoding found; try setting `strict=false` to ignore"143)144})145})146} else {147Ok(ca.apply(|opt_s| opt_s.and_then(|s| hex::decode(s).ok().map(Cow::Owned))))148}149}150151#[cfg(feature = "binary_encoding")]152fn hex_encode(&self) -> Series {153let ca = self.as_binary();154unsafe {155ca.apply_values(|s| hex::encode(s).into_bytes().into())156.cast_unchecked(&DataType::String)157.unwrap()158}159}160161#[cfg(feature = "binary_encoding")]162fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {163let ca = self.as_binary();164if strict {165ca.try_apply_nonnull_values_generic(|s| {166general_purpose::STANDARD.decode(s).map_err(|_e| {167polars_err!(168ComputeError:169"invalid `base64` encoding found; try setting `strict=false` to ignore"170)171})172})173} else {174Ok(ca.apply(|opt_s| {175opt_s.and_then(|s| general_purpose::STANDARD.decode(s).ok().map(Cow::Owned))176}))177}178}179180#[cfg(feature = "binary_encoding")]181fn base64_encode(&self) -> Series {182let ca = self.as_binary();183unsafe {184ca.apply_values(|s| general_purpose::STANDARD.encode(s).into_bytes().into())185.cast_unchecked(&DataType::String)186.unwrap()187}188}189190#[cfg(feature = "binary_encoding")]191fn reinterpret(&self, dtype: &DataType, is_little_endian: bool) -> PolarsResult<Series> {192unsafe {193Ok(Series::from_chunks_and_dtype_unchecked(194self.as_binary().name().clone(),195self._reinterpret_inner(dtype, is_little_endian)?,196dtype,197))198}199}200201#[cfg(feature = "binary_encoding")]202fn _reinterpret_inner(203&self,204dtype: &DataType,205is_little_endian: bool,206) -> PolarsResult<Vec<Box<dyn Array>>> {207use polars_core::with_match_physical_numeric_polars_type;208209let ca = self.as_binary();210211match dtype {212dtype if dtype.is_primitive_numeric() || dtype.is_temporal() => {213let dtype = dtype.to_physical();214let arrow_data_type = dtype215.to_arrow(CompatLevel::newest())216.underlying_physical_type();217with_match_physical_numeric_polars_type!(dtype, |$T| {218unsafe {219ca.chunks().iter().map(|chunk| {220binview_to_primitive_dyn::<<$T as PolarsNumericType>::Native>(221&**chunk,222&arrow_data_type,223is_little_endian,224)225}).collect()226}227})228},229#[cfg(feature = "dtype-array")]230DataType::Array(inner_dtype, array_width)231if inner_dtype.is_primitive_numeric() || inner_dtype.is_temporal() =>232{233let inner_dtype = inner_dtype.to_physical();234let result: Vec<ArrayRef> = with_match_physical_numeric_polars_type!(inner_dtype, |$T| {235unsafe {236ca.chunks().iter().map(|chunk| {237binview_to_fixed_size_list_dyn::<<$T as PolarsNumericType>::Native>(238&**chunk,239*array_width,240is_little_endian241)242}).collect::<Result<Vec<ArrayRef>, _>>()243}244})?;245Ok(result)246},247_ => Err(248polars_err!(InvalidOperation: "unsupported data type {:?} in reinterpret. Only numeric or temporal types, or Arrays of those, are allowed.", dtype),249),250}251}252}253254impl BinaryNameSpaceImpl for BinaryChunked {}255256257