Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/binary/namespace.rs
6939 views
1
#[cfg(feature = "binary_encoding")]
2
use std::borrow::Cow;
3
4
#[cfg(feature = "binary_encoding")]
5
use arrow::array::Array;
6
#[cfg(feature = "binary_encoding")]
7
use base64::Engine as _;
8
#[cfg(feature = "binary_encoding")]
9
use base64::engine::general_purpose;
10
use memchr::memmem::find;
11
use polars_compute::cast::{binview_to_fixed_size_list_dyn, binview_to_primitive_dyn};
12
use polars_compute::size::binary_size_bytes;
13
use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};
14
15
use super::*;
16
17
pub trait BinaryNameSpaceImpl: AsBinary {
18
/// Check if binary contains given literal
19
fn contains(&self, lit: &[u8]) -> BooleanChunked {
20
let ca = self.as_binary();
21
let f = |s: &[u8]| find(s, lit).is_some();
22
unary_elementwise_values(ca, f)
23
}
24
25
fn contains_chunked(&self, lit: &BinaryChunked) -> PolarsResult<BooleanChunked> {
26
let ca = self.as_binary();
27
Ok(match lit.len() {
28
1 => match lit.get(0) {
29
Some(lit) => ca.contains(lit),
30
None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
31
},
32
_ => {
33
polars_ensure!(
34
ca.len() == lit.len() || ca.len() == 1,
35
length_mismatch = "bin.contains",
36
ca.len(),
37
lit.len()
38
);
39
broadcast_binary_elementwise_values(ca, lit, |src, lit| find(src, lit).is_some())
40
},
41
})
42
}
43
44
/// Check if strings ends with a substring
45
fn ends_with(&self, sub: &[u8]) -> BooleanChunked {
46
let ca = self.as_binary();
47
let f = |s: &[u8]| s.ends_with(sub);
48
ca.apply_nonnull_values_generic(DataType::Boolean, f)
49
}
50
51
/// Check if strings starts with a substring
52
fn starts_with(&self, sub: &[u8]) -> BooleanChunked {
53
let ca = self.as_binary();
54
let f = |s: &[u8]| s.starts_with(sub);
55
ca.apply_nonnull_values_generic(DataType::Boolean, f)
56
}
57
58
fn starts_with_chunked(&self, prefix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
59
let ca = self.as_binary();
60
Ok(match prefix.len() {
61
1 => match prefix.get(0) {
62
Some(s) => self.starts_with(s),
63
None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
64
},
65
_ => {
66
polars_ensure!(
67
ca.len() == prefix.len() || ca.len() == 1,
68
length_mismatch = "bin.starts_with",
69
ca.len(),
70
prefix.len()
71
);
72
broadcast_binary_elementwise_values(ca, prefix, |s, sub| s.starts_with(sub))
73
},
74
})
75
}
76
77
fn ends_with_chunked(&self, suffix: &BinaryChunked) -> PolarsResult<BooleanChunked> {
78
let ca = self.as_binary();
79
Ok(match suffix.len() {
80
1 => match suffix.get(0) {
81
Some(s) => self.ends_with(s),
82
None => BooleanChunked::full_null(ca.name().clone(), ca.len()),
83
},
84
_ => {
85
polars_ensure!(
86
ca.len() == suffix.len() || ca.len() == 1,
87
length_mismatch = "bin.ends_with",
88
ca.len(),
89
suffix.len()
90
);
91
broadcast_binary_elementwise_values(ca, suffix, |s, sub| s.ends_with(sub))
92
},
93
})
94
}
95
96
/// Get the size of the binary values in bytes.
97
fn size_bytes(&self) -> UInt32Chunked {
98
let ca = self.as_binary();
99
ca.apply_kernel_cast(&binary_size_bytes)
100
}
101
102
#[cfg(feature = "binary_encoding")]
103
fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
104
let ca = self.as_binary();
105
if strict {
106
ca.try_apply_nonnull_values_generic(|s| {
107
hex::decode(s).map_err(|_| {
108
polars_err!(
109
ComputeError:
110
"invalid `hex` encoding found; try setting `strict=false` to ignore"
111
)
112
})
113
})
114
} else {
115
Ok(ca.apply(|opt_s| opt_s.and_then(|s| hex::decode(s).ok().map(Cow::Owned))))
116
}
117
}
118
119
#[cfg(feature = "binary_encoding")]
120
fn hex_encode(&self) -> Series {
121
let ca = self.as_binary();
122
unsafe {
123
ca.apply_values(|s| hex::encode(s).into_bytes().into())
124
.cast_unchecked(&DataType::String)
125
.unwrap()
126
}
127
}
128
129
#[cfg(feature = "binary_encoding")]
130
fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
131
let ca = self.as_binary();
132
if strict {
133
ca.try_apply_nonnull_values_generic(|s| {
134
general_purpose::STANDARD.decode(s).map_err(|_e| {
135
polars_err!(
136
ComputeError:
137
"invalid `base64` encoding found; try setting `strict=false` to ignore"
138
)
139
})
140
})
141
} else {
142
Ok(ca.apply(|opt_s| {
143
opt_s.and_then(|s| general_purpose::STANDARD.decode(s).ok().map(Cow::Owned))
144
}))
145
}
146
}
147
148
#[cfg(feature = "binary_encoding")]
149
fn base64_encode(&self) -> Series {
150
let ca = self.as_binary();
151
unsafe {
152
ca.apply_values(|s| general_purpose::STANDARD.encode(s).into_bytes().into())
153
.cast_unchecked(&DataType::String)
154
.unwrap()
155
}
156
}
157
158
#[cfg(feature = "binary_encoding")]
159
fn reinterpret(&self, dtype: &DataType, is_little_endian: bool) -> PolarsResult<Series> {
160
unsafe {
161
Ok(Series::from_chunks_and_dtype_unchecked(
162
self.as_binary().name().clone(),
163
self._reinterpret_inner(dtype, is_little_endian)?,
164
dtype,
165
))
166
}
167
}
168
169
#[cfg(feature = "binary_encoding")]
170
fn _reinterpret_inner(
171
&self,
172
dtype: &DataType,
173
is_little_endian: bool,
174
) -> PolarsResult<Vec<Box<dyn Array>>> {
175
use polars_core::with_match_physical_numeric_polars_type;
176
177
let ca = self.as_binary();
178
179
match dtype {
180
dtype if dtype.is_primitive_numeric() || dtype.is_temporal() => {
181
let dtype = dtype.to_physical();
182
let arrow_data_type = dtype
183
.to_arrow(CompatLevel::newest())
184
.underlying_physical_type();
185
with_match_physical_numeric_polars_type!(dtype, |$T| {
186
unsafe {
187
ca.chunks().iter().map(|chunk| {
188
binview_to_primitive_dyn::<<$T as PolarsNumericType>::Native>(
189
&**chunk,
190
&arrow_data_type,
191
is_little_endian,
192
)
193
}).collect()
194
}
195
})
196
},
197
#[cfg(feature = "dtype-array")]
198
DataType::Array(inner_dtype, array_width)
199
if inner_dtype.is_primitive_numeric() || inner_dtype.is_temporal() =>
200
{
201
let inner_dtype = inner_dtype.to_physical();
202
let result: Vec<ArrayRef> = with_match_physical_numeric_polars_type!(inner_dtype, |$T| {
203
unsafe {
204
ca.chunks().iter().map(|chunk| {
205
binview_to_fixed_size_list_dyn::<<$T as PolarsNumericType>::Native>(
206
&**chunk,
207
*array_width,
208
is_little_endian
209
)
210
}).collect::<Result<Vec<ArrayRef>, _>>()
211
}
212
})?;
213
Ok(result)
214
},
215
_ => Err(
216
polars_err!(InvalidOperation: "unsupported data type {:?} in reinterpret. Only numeric or temporal types, or Arrays of those, are allowed.", dtype),
217
),
218
}
219
}
220
}
221
222
impl BinaryNameSpaceImpl for BinaryChunked {}
223
224