Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/strings/namespace.rs
8396 views
1
use arrow::array::ValueSize;
2
use arrow::legacy::kernels::string::*;
3
#[cfg(feature = "string_encoding")]
4
use base64::Engine as _;
5
#[cfg(feature = "string_encoding")]
6
use base64::engine::general_purpose;
7
#[cfg(feature = "string_to_integer")]
8
use num_traits::Num;
9
use polars_core::prelude::arity::*;
10
use polars_utils::regex_cache::{compile_regex, with_regex_cache};
11
12
use super::*;
13
#[cfg(feature = "binary_encoding")]
14
use crate::chunked_array::binary::BinaryNameSpaceImpl;
15
#[cfg(feature = "string_normalize")]
16
use crate::prelude::strings::normalize::UnicodeForm;
17
18
// We need this to infer the right lifetimes for the match closure.
19
#[inline(always)]
20
fn infer_re_match<F>(f: F) -> F
21
where
22
F: for<'a, 'b> FnMut(Option<&'a str>, Option<&'b str>) -> Option<bool>,
23
{
24
f
25
}
26
27
#[cfg(feature = "string_to_integer")]
28
// This is a helper function used in the `to_integer` method of the StringNameSpaceImpl trait.
29
fn parse_integer<T>(
30
ca: &ChunkedArray<StringType>,
31
base: &UInt32Chunked,
32
strict: bool,
33
) -> PolarsResult<Series>
34
where
35
T: PolarsIntegerType,
36
T::Native: Num,
37
ChunkedArray<T>: IntoSeries,
38
<<T as polars_core::datatypes::PolarsNumericType>::Native as num_traits::Num>::FromStrRadixErr:
39
std::fmt::Display,
40
{
41
let f = |opt_s: Option<&str>, opt_base: Option<u32>| -> PolarsResult<Option<T::Native>> {
42
let (Some(s), Some(base)) = (opt_s, opt_base) else {
43
return Ok(None);
44
};
45
46
if !(2..=36).contains(&base) {
47
polars_bail!(ComputeError: "`to_integer` called with invalid base '{base}'");
48
}
49
50
Ok(T::Native::from_str_radix(s, base).ok())
51
};
52
let out: ChunkedArray<T> = broadcast_try_binary_elementwise(ca, base, f)?;
53
if strict && ca.null_count() != out.null_count() {
54
let failure_mask = ca.is_not_null() & out.is_null() & base.is_not_null();
55
let n_failures = failure_mask.num_trues();
56
if n_failures == 0 {
57
return Ok(out.into_series());
58
}
59
60
let some_failures = if ca.len() == 1 {
61
ca.clone()
62
} else {
63
let all_failures = ca.filter(&failure_mask)?;
64
// `.unique()` does not necessarily preserve the original order.
65
let unique_failures_args = all_failures.arg_unique()?;
66
all_failures.take(&unique_failures_args.slice(0, 10))?
67
};
68
let some_error_msg = match base.len() {
69
1 => {
70
// we can ensure that base is not null.
71
let base = base.get(0).unwrap();
72
some_failures
73
.get(0)
74
.and_then(|s| T::Native::from_str_radix(s, base).err())
75
.map_or_else(
76
|| unreachable!("failed to extract ParseIntError"),
77
|e| format!("{e}"),
78
)
79
},
80
_ => {
81
let base_failures = base.filter(&failure_mask)?;
82
some_failures
83
.get(0)
84
.zip(base_failures.get(0))
85
.and_then(|(s, base)| T::Native::from_str_radix(s, base).err())
86
.map_or_else(
87
|| unreachable!("failed to extract ParseIntError"),
88
|e| format!("{e}"),
89
)
90
},
91
};
92
polars_bail!(
93
ComputeError:
94
"strict integer parsing failed for {} value(s): {}; error message for the \
95
first shown value: '{}' (consider non-strict parsing)",
96
n_failures,
97
some_failures.into_series().fmt_list(),
98
some_error_msg
99
);
100
}
101
102
Ok(out.into_series())
103
}
104
105
pub trait StringNameSpaceImpl: AsString {
106
#[cfg(not(feature = "binary_encoding"))]
107
fn hex_decode(&self) -> PolarsResult<StringChunked> {
108
panic!("activate 'binary_encoding' feature")
109
}
110
111
#[cfg(feature = "binary_encoding")]
112
fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
113
let ca = self.as_string();
114
ca.as_binary().hex_decode(strict)
115
}
116
117
#[must_use]
118
#[cfg(feature = "string_encoding")]
119
fn hex_encode(&self) -> StringChunked {
120
let ca = self.as_string();
121
ca.apply_values(|s| hex::encode(s).into())
122
}
123
124
#[cfg(not(feature = "binary_encoding"))]
125
fn base64_decode(&self) -> PolarsResult<StringChunked> {
126
panic!("activate 'binary_encoding' feature")
127
}
128
129
#[cfg(feature = "binary_encoding")]
130
fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
131
let ca = self.as_string();
132
ca.as_binary().base64_decode(strict)
133
}
134
135
#[must_use]
136
#[cfg(feature = "string_encoding")]
137
fn base64_encode(&self) -> StringChunked {
138
let ca = self.as_string();
139
ca.apply_values(|s| general_purpose::STANDARD.encode(s).into())
140
}
141
142
#[cfg(feature = "string_to_integer")]
143
// Parse a string number with base _radix_ into a decimal dtype
144
fn to_integer(
145
&self,
146
base: &UInt32Chunked,
147
dtype: Option<DataType>,
148
strict: bool,
149
) -> PolarsResult<Series> {
150
let ca = self.as_string();
151
152
polars_ensure!(
153
ca.len() == base.len() || ca.len() == 1 || base.len() == 1,
154
length_mismatch = "str.to_integer",
155
ca.len(),
156
base.len()
157
);
158
159
match dtype.unwrap_or(DataType::Int64) {
160
DataType::Int8 => parse_integer::<Int8Type>(ca, base, strict),
161
DataType::Int16 => parse_integer::<Int16Type>(ca, base, strict),
162
DataType::Int32 => parse_integer::<Int32Type>(ca, base, strict),
163
DataType::Int64 => parse_integer::<Int64Type>(ca, base, strict),
164
DataType::Int128 => parse_integer::<Int128Type>(ca, base, strict),
165
DataType::UInt8 => parse_integer::<UInt8Type>(ca, base, strict),
166
DataType::UInt16 => parse_integer::<UInt16Type>(ca, base, strict),
167
DataType::UInt32 => parse_integer::<UInt32Type>(ca, base, strict),
168
DataType::UInt64 => parse_integer::<UInt64Type>(ca, base, strict),
169
DataType::UInt128 => parse_integer::<UInt128Type>(ca, base, strict),
170
dtype => polars_bail!(InvalidOperation: "Invalid dtype {:?}", dtype),
171
}
172
}
173
174
fn contains_chunked(
175
&self,
176
pat: &StringChunked,
177
literal: bool,
178
strict: bool,
179
) -> PolarsResult<BooleanChunked> {
180
let ca = self.as_string();
181
match (ca.len(), pat.len()) {
182
(_, 1) => match pat.get(0) {
183
Some(pat) => {
184
if literal {
185
ca.contains_literal(pat)
186
} else {
187
ca.contains(pat, strict)
188
}
189
},
190
None => Ok(BooleanChunked::full_null(ca.name().clone(), ca.len())),
191
},
192
(1, _) if ca.null_count() == 1 => Ok(BooleanChunked::full_null(
193
ca.name().clone(),
194
ca.len().max(pat.len()),
195
)),
196
_ => {
197
if literal {
198
Ok(broadcast_binary_elementwise_values(ca, pat, |src, pat| {
199
src.contains(pat)
200
}))
201
} else if strict {
202
with_regex_cache(|reg_cache| {
203
broadcast_try_binary_elementwise(ca, pat, |opt_src, opt_pat| {
204
match (opt_src, opt_pat) {
205
(Some(src), Some(pat)) => {
206
let reg = reg_cache.compile(pat)?;
207
Ok(Some(reg.is_match(src)))
208
},
209
_ => Ok(None),
210
}
211
})
212
})
213
} else {
214
with_regex_cache(|reg_cache| {
215
Ok(broadcast_binary_elementwise(
216
ca,
217
pat,
218
infer_re_match(|src, pat| {
219
let reg = reg_cache.compile(pat?).ok()?;
220
Some(reg.is_match(src?))
221
}),
222
))
223
})
224
}
225
},
226
}
227
}
228
229
fn find_chunked(
230
&self,
231
pat: &StringChunked,
232
literal: bool,
233
strict: bool,
234
) -> PolarsResult<UInt32Chunked> {
235
let ca = self.as_string();
236
if pat.len() == 1 {
237
return if let Some(pat) = pat.get(0) {
238
if literal {
239
ca.find_literal(pat)
240
} else {
241
ca.find(pat, strict)
242
}
243
} else {
244
Ok(UInt32Chunked::full_null(ca.name().clone(), ca.len()))
245
};
246
} else if ca.len() == 1 && ca.null_count() == 1 {
247
return Ok(UInt32Chunked::full_null(
248
ca.name().clone(),
249
ca.len().max(pat.len()),
250
));
251
}
252
if literal {
253
Ok(broadcast_binary_elementwise(
254
ca,
255
pat,
256
|src: Option<&str>, pat: Option<&str>| src?.find(pat?).map(|idx| idx as u32),
257
))
258
} else {
259
with_regex_cache(|reg_cache| {
260
let matcher = |src: Option<&str>, pat: Option<&str>| -> PolarsResult<Option<u32>> {
261
if let (Some(src), Some(pat)) = (src, pat) {
262
let re = reg_cache.compile(pat)?;
263
return Ok(re.find(src).map(|m| m.start() as u32));
264
}
265
Ok(None)
266
};
267
broadcast_try_binary_elementwise(ca, pat, matcher)
268
})
269
}
270
}
271
272
/// Get the length of the string values as number of chars.
273
fn str_len_chars(&self) -> UInt32Chunked {
274
let ca = self.as_string();
275
ca.apply_kernel_cast(&string_len_chars)
276
}
277
278
/// Get the length of the string values as number of bytes.
279
fn str_len_bytes(&self) -> UInt32Chunked {
280
let ca = self.as_string();
281
ca.apply_kernel_cast(&utf8view_len_bytes)
282
}
283
284
/// Pad the start of the string until it reaches the given length.
285
///
286
/// Padding is done using the specified `fill_char`.
287
/// Strings with length equal to or greater than the given length are
288
/// returned as-is.
289
#[cfg(feature = "string_pad")]
290
fn pad_start(&self, length: &UInt64Chunked, fill_char: char) -> StringChunked {
291
let ca = self.as_string();
292
pad::pad_start(ca, length, fill_char)
293
}
294
295
/// Pad the end of the string until it reaches the given length.
296
///
297
/// Padding is done using the specified `fill_char`.
298
/// Strings with length equal to or greater than the given length are
299
/// returned as-is.
300
#[cfg(feature = "string_pad")]
301
fn pad_end(&self, length: &UInt64Chunked, fill_char: char) -> StringChunked {
302
let ca = self.as_string();
303
pad::pad_end(ca, length, fill_char)
304
}
305
306
/// Pad the start of the string with zeros until it reaches the given length.
307
///
308
/// A sign prefix (`-`) is handled by inserting the padding after the sign
309
/// character rather than before.
310
/// Strings with length equal to or greater than the given length are
311
/// returned as-is.
312
#[cfg(feature = "string_pad")]
313
fn zfill(&self, length: &UInt64Chunked) -> StringChunked {
314
let ca = self.as_string();
315
pad::zfill(ca, length)
316
}
317
318
/// Check if strings contain a regex pattern.
319
fn contains(&self, pat: &str, strict: bool) -> PolarsResult<BooleanChunked> {
320
let ca = self.as_string();
321
let res_reg = polars_utils::regex_cache::compile_regex(pat);
322
let opt_reg = if strict { Some(res_reg?) } else { res_reg.ok() };
323
let out: BooleanChunked = if let Some(reg) = opt_reg {
324
unary_elementwise_values(ca, |s| reg.is_match(s))
325
} else {
326
BooleanChunked::full_null(ca.name().clone(), ca.len())
327
};
328
Ok(out)
329
}
330
331
/// Check if strings contain a given literal
332
fn contains_literal(&self, lit: &str) -> PolarsResult<BooleanChunked> {
333
// note: benchmarking shows that the regex engine is actually
334
// faster at finding literal matches than str::contains.
335
// ref: https://github.com/pola-rs/polars/pull/6811
336
self.contains(regex::escape(lit).as_str(), true)
337
}
338
339
/// Return the index position of a literal substring in the target string.
340
fn find_literal(&self, lit: &str) -> PolarsResult<UInt32Chunked> {
341
self.find(regex::escape(lit).as_str(), true)
342
}
343
344
/// Return the index position of a regular expression substring in the target string.
345
fn find(&self, pat: &str, strict: bool) -> PolarsResult<UInt32Chunked> {
346
let ca = self.as_string();
347
match polars_utils::regex_cache::compile_regex(pat) {
348
Ok(rx) => Ok(unary_elementwise(ca, |opt_s| {
349
opt_s.and_then(|s| rx.find(s)).map(|m| m.start() as u32)
350
})),
351
Err(_) if !strict => Ok(UInt32Chunked::full_null(ca.name().clone(), ca.len())),
352
Err(e) => Err(PolarsError::ComputeError(
353
format!("Invalid regular expression: {e}").into(),
354
)),
355
}
356
}
357
358
/// Replace the leftmost regex-matched (sub)string with another string
359
fn replace<'a>(&'a self, pat: &str, val: &str) -> PolarsResult<StringChunked> {
360
let reg = polars_utils::regex_cache::compile_regex(pat)?;
361
let f = |s: &'a str| reg.replace(s, val);
362
let ca = self.as_string();
363
Ok(ca.apply_values(f))
364
}
365
366
/// Replace the leftmost literal (sub)string with another string
367
fn replace_literal<'a>(
368
&'a self,
369
pat: &str,
370
val: &str,
371
n: usize,
372
) -> PolarsResult<StringChunked> {
373
let ca = self.as_string();
374
if ca.is_empty() {
375
return Ok(ca.clone());
376
}
377
378
// amortize allocation
379
let mut buf = String::new();
380
381
let f = move |s: &'a str| {
382
buf.clear();
383
let mut changed = false;
384
385
// See: str.replacen
386
let mut last_end = 0;
387
for (start, part) in s.match_indices(pat).take(n) {
388
changed = true;
389
buf.push_str(unsafe { s.get_unchecked(last_end..start) });
390
buf.push_str(val);
391
last_end = start + part.len();
392
}
393
buf.push_str(unsafe { s.get_unchecked(last_end..s.len()) });
394
395
if changed {
396
// extend lifetime
397
// lifetime is bound to 'a
398
let slice = buf.as_str();
399
unsafe { std::mem::transmute::<&str, &'a str>(slice) }
400
} else {
401
s
402
}
403
};
404
Ok(ca.apply_mut(f))
405
}
406
407
/// Replace all regex-matched (sub)strings with another string
408
fn replace_all(&self, pat: &str, val: &str) -> PolarsResult<StringChunked> {
409
let ca = self.as_string();
410
let reg = polars_utils::regex_cache::compile_regex(pat)?;
411
Ok(ca.apply_values(|s| reg.replace_all(s, val)))
412
}
413
414
/// Replace all matching literal (sub)strings with another string
415
fn replace_literal_all<'a>(&'a self, pat: &str, val: &str) -> PolarsResult<StringChunked> {
416
let ca = self.as_string();
417
if ca.is_empty() {
418
return Ok(ca.clone());
419
}
420
421
// Amortize allocation.
422
let mut buf = String::new();
423
424
let f = move |s: &'a str| {
425
buf.clear();
426
let mut changed = false;
427
428
// See: str.replace.
429
let mut last_end = 0;
430
for (start, part) in s.match_indices(pat) {
431
changed = true;
432
buf.push_str(unsafe { s.get_unchecked(last_end..start) });
433
buf.push_str(val);
434
last_end = start + part.len();
435
}
436
buf.push_str(unsafe { s.get_unchecked(last_end..s.len()) });
437
438
if changed {
439
// Extend lifetime, lifetime is bound to 'a.
440
let slice = buf.as_str();
441
unsafe { std::mem::transmute::<&str, &'a str>(slice) }
442
} else {
443
s
444
}
445
};
446
447
Ok(ca.apply_mut(f))
448
}
449
450
/// Extract the nth capture group from pattern.
451
fn extract(&self, pat: &StringChunked, group_index: usize) -> PolarsResult<StringChunked> {
452
let ca = self.as_string();
453
super::extract::extract_group(ca, pat, group_index)
454
}
455
456
/// Extract each successive non-overlapping regex match in an individual string as an array.
457
fn extract_all(&self, pat: &str) -> PolarsResult<ListChunked> {
458
let ca = self.as_string();
459
let reg = polars_utils::regex_cache::compile_regex(pat)?;
460
461
let mut builder =
462
ListStringChunkedBuilder::new(ca.name().clone(), ca.len(), ca.get_values_size());
463
for arr in ca.downcast_iter() {
464
for opt_s in arr {
465
match opt_s {
466
None => builder.append_null(),
467
Some(s) => builder.append_values_iter(reg.find_iter(s).map(|m| m.as_str())),
468
}
469
}
470
}
471
Ok(builder.finish())
472
}
473
474
fn strip_chars(&self, pat: &Column) -> PolarsResult<StringChunked> {
475
let ca = self.as_string();
476
if pat.dtype() == &DataType::Null {
477
Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim())))
478
} else {
479
Ok(strip_chars(ca, pat.str()?))
480
}
481
}
482
483
fn strip_chars_start(&self, pat: &Column) -> PolarsResult<StringChunked> {
484
let ca = self.as_string();
485
if pat.dtype() == &DataType::Null {
486
Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim_start())))
487
} else {
488
Ok(strip_chars_start(ca, pat.str()?))
489
}
490
}
491
492
fn strip_chars_end(&self, pat: &Column) -> PolarsResult<StringChunked> {
493
let ca = self.as_string();
494
if pat.dtype() == &DataType::Null {
495
Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim_end())))
496
} else {
497
Ok(strip_chars_end(ca, pat.str()?))
498
}
499
}
500
501
fn strip_prefix(&self, prefix: &StringChunked) -> StringChunked {
502
let ca = self.as_string();
503
strip_prefix(ca, prefix)
504
}
505
506
fn strip_suffix(&self, suffix: &StringChunked) -> StringChunked {
507
let ca = self.as_string();
508
strip_suffix(ca, suffix)
509
}
510
511
#[cfg(feature = "dtype-struct")]
512
fn split_exact(&self, by: &StringChunked, n: usize) -> PolarsResult<StructChunked> {
513
let ca = self.as_string();
514
515
split_to_struct(ca, by, n + 1, str::split, false)
516
}
517
518
#[cfg(feature = "dtype-struct")]
519
fn split_exact_inclusive(&self, by: &StringChunked, n: usize) -> PolarsResult<StructChunked> {
520
let ca = self.as_string();
521
522
split_to_struct(ca, by, n + 1, str::split_inclusive, false)
523
}
524
525
#[cfg(feature = "dtype-struct")]
526
fn splitn(&self, by: &StringChunked, n: usize) -> PolarsResult<StructChunked> {
527
let ca = self.as_string();
528
529
split_to_struct(ca, by, n, |s, by| s.splitn(n, by), true)
530
}
531
532
fn split(&self, by: &StringChunked) -> PolarsResult<ListChunked> {
533
let ca = self.as_string();
534
split_helper(ca, by, str::split)
535
}
536
537
fn split_inclusive(&self, by: &StringChunked) -> PolarsResult<ListChunked> {
538
let ca = self.as_string();
539
split_helper(ca, by, str::split_inclusive)
540
}
541
542
/// Extract each successive non-overlapping regex match in an individual string as an array.
543
fn extract_all_many(&self, pat: &StringChunked) -> PolarsResult<ListChunked> {
544
let ca = self.as_string();
545
polars_ensure!(
546
ca.len() == pat.len(),
547
ComputeError: "pattern's length: {} does not match that of the argument series: {}",
548
pat.len(), ca.len(),
549
);
550
551
let mut builder =
552
ListStringChunkedBuilder::new(ca.name().clone(), ca.len(), ca.get_values_size());
553
with_regex_cache(|re_cache| {
554
binary_elementwise_for_each(ca, pat, |opt_s, opt_pat| match (opt_s, opt_pat) {
555
(_, None) | (None, _) => builder.append_null(),
556
(Some(s), Some(pat)) => {
557
let re = re_cache.compile(pat).unwrap();
558
builder.append_values_iter(re.find_iter(s).map(|m| m.as_str()));
559
},
560
});
561
});
562
Ok(builder.finish())
563
}
564
565
#[cfg(feature = "extract_groups")]
566
/// Extract all capture groups from pattern and return as a struct.
567
fn extract_groups(&self, pat: &str, dtype: &DataType) -> PolarsResult<Series> {
568
let ca = self.as_string();
569
super::extract::extract_groups(ca, pat, dtype)
570
}
571
572
/// Count all successive non-overlapping regex matches.
573
fn count_matches(&self, pat: &str, literal: bool) -> PolarsResult<UInt32Chunked> {
574
let ca = self.as_string();
575
if literal {
576
Ok(unary_elementwise(ca, |opt_s| {
577
opt_s.map(|s| s.matches(pat).count() as u32)
578
}))
579
} else {
580
let re = compile_regex(pat)?;
581
Ok(unary_elementwise(ca, |opt_s| {
582
opt_s.map(|s| re.find_iter(s).count() as u32)
583
}))
584
}
585
}
586
587
/// Count all successive non-overlapping regex matches.
588
fn count_matches_many(
589
&self,
590
pat: &StringChunked,
591
literal: bool,
592
) -> PolarsResult<UInt32Chunked> {
593
let ca = self.as_string();
594
polars_ensure!(
595
ca.len() == pat.len(),
596
ComputeError: "pattern's length: {} does not match that of the argument series: {}",
597
pat.len(), ca.len(),
598
);
599
600
let out: UInt32Chunked = if literal {
601
broadcast_binary_elementwise(ca, pat, |s: Option<&str>, p: Option<&str>| {
602
Some(s?.matches(p?).count() as u32)
603
})
604
} else {
605
with_regex_cache(|re_cache| {
606
let op = move |opt_s: Option<&str>,
607
opt_pat: Option<&str>|
608
-> PolarsResult<Option<u32>> {
609
match (opt_s, opt_pat) {
610
(Some(s), Some(pat)) => {
611
let reg = re_cache.compile(pat)?;
612
Ok(Some(reg.find_iter(s).count() as u32))
613
},
614
_ => Ok(None),
615
}
616
};
617
broadcast_try_binary_elementwise(ca, pat, op)
618
})?
619
};
620
621
Ok(out.with_name(ca.name().clone()))
622
}
623
624
/// Modify the strings to their lowercase equivalent.
625
#[must_use]
626
fn to_lowercase(&self) -> StringChunked {
627
let ca = self.as_string();
628
case::to_lowercase(ca)
629
}
630
631
/// Modify the strings to their uppercase equivalent.
632
#[must_use]
633
fn to_uppercase(&self) -> StringChunked {
634
let ca = self.as_string();
635
case::to_uppercase(ca)
636
}
637
638
/// Modify the strings to their titlecase equivalent.
639
#[must_use]
640
#[cfg(feature = "nightly")]
641
fn to_titlecase(&self) -> StringChunked {
642
let ca = self.as_string();
643
case::to_titlecase(ca)
644
}
645
646
/// Concat with the values from a second StringChunked.
647
#[must_use]
648
fn concat(&self, other: &StringChunked) -> StringChunked {
649
let ca = self.as_string();
650
ca + other
651
}
652
653
/// Normalizes the string values
654
#[must_use]
655
#[cfg(feature = "string_normalize")]
656
fn str_normalize(&self, form: UnicodeForm) -> StringChunked {
657
let ca = self.as_string();
658
normalize::normalize(ca, form)
659
}
660
661
/// Reverses the string values
662
#[must_use]
663
#[cfg(feature = "string_reverse")]
664
fn str_reverse(&self) -> StringChunked {
665
let ca = self.as_string();
666
reverse::reverse(ca)
667
}
668
669
/// Slice the string values.
670
///
671
/// Determines a substring starting from `offset` and with length `length` of each of the elements in `array`.
672
/// `offset` can be negative, in which case the start counts from the end of the string.
673
fn str_slice(&self, offset: &Column, length: &Column) -> PolarsResult<StringChunked> {
674
let ca = self.as_string();
675
let offset = offset.cast(&DataType::Int64)?;
676
// We strict cast, otherwise negative value will be treated as a valid length.
677
let length = length.strict_cast(&DataType::UInt64)?;
678
679
Ok(substring::substring(ca, offset.i64()?, length.u64()?))
680
}
681
682
/// Slice the first `n` values of the string.
683
///
684
/// Determines a substring starting at the beginning of the string up to offset `n` of each
685
/// element in `array`. `n` can be negative, in which case the slice ends `n` characters from
686
/// the end of the string.
687
fn str_head(&self, n: &Column) -> PolarsResult<StringChunked> {
688
let ca = self.as_string();
689
let n = n.strict_cast(&DataType::Int64)?;
690
691
substring::head(ca, n.i64()?)
692
}
693
694
/// Slice the last `n` values of the string.
695
///
696
/// Determines a substring starting at offset `n` of each element in `array`. `n` can be
697
/// negative, in which case the slice begins `n` characters from the start of the string.
698
fn str_tail(&self, n: &Column) -> PolarsResult<StringChunked> {
699
let ca = self.as_string();
700
let n = n.strict_cast(&DataType::Int64)?;
701
702
substring::tail(ca, n.i64()?)
703
}
704
#[cfg(feature = "strings")]
705
/// Escapes all regular expression meta characters in the string.
706
fn str_escape_regex(&self) -> StringChunked {
707
let ca = self.as_string();
708
escape_regex::escape_regex(ca)
709
}
710
}
711
712
impl StringNameSpaceImpl for StringChunked {}
713
714