Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/strings/namespace.rs
6939 views
1
use arrow::array::ValueSize;
2
use arrow::legacy::kernels::string::*;
3
#[cfg(feature = "string_encoding")]
4
use base64::Engine as _;
5
#[cfg(feature = "string_encoding")]
6
use base64::engine::general_purpose;
7
#[cfg(feature = "string_to_integer")]
8
use num_traits::Num;
9
use polars_core::prelude::arity::*;
10
use polars_utils::regex_cache::{compile_regex, with_regex_cache};
11
12
use super::*;
13
#[cfg(feature = "binary_encoding")]
14
use crate::chunked_array::binary::BinaryNameSpaceImpl;
15
#[cfg(feature = "string_normalize")]
16
use crate::prelude::strings::normalize::UnicodeForm;
17
18
// We need this to infer the right lifetimes for the match closure.
19
#[inline(always)]
20
fn infer_re_match<F>(f: F) -> F
21
where
22
F: for<'a, 'b> FnMut(Option<&'a str>, Option<&'b str>) -> Option<bool>,
23
{
24
f
25
}
26
27
#[cfg(feature = "string_to_integer")]
28
// This is a helper function used in the `to_integer` method of the StringNameSpaceImpl trait.
29
fn parse_integer<T>(
30
ca: &ChunkedArray<StringType>,
31
base: &UInt32Chunked,
32
strict: bool,
33
) -> PolarsResult<Series>
34
where
35
T: PolarsIntegerType,
36
T::Native: Num,
37
ChunkedArray<T>: IntoSeries,
38
<<T as polars_core::datatypes::PolarsNumericType>::Native as num_traits::Num>::FromStrRadixErr:
39
std::fmt::Display,
40
{
41
let f = |opt_s: Option<&str>, opt_base: Option<u32>| -> PolarsResult<Option<T::Native>> {
42
let (Some(s), Some(base)) = (opt_s, opt_base) else {
43
return Ok(None);
44
};
45
46
if !(2..=36).contains(&base) {
47
polars_bail!(ComputeError: "`to_integer` called with invalid base '{base}'");
48
}
49
50
Ok(T::Native::from_str_radix(s, base).ok())
51
};
52
let out: ChunkedArray<T> = broadcast_try_binary_elementwise(ca, base, f)?;
53
if strict && ca.null_count() != out.null_count() {
54
let failure_mask = ca.is_not_null() & out.is_null() & base.is_not_null();
55
let n_failures = failure_mask.num_trues();
56
if n_failures == 0 {
57
return Ok(out.into_series());
58
}
59
60
let some_failures = if ca.len() == 1 {
61
ca.clone()
62
} else {
63
let all_failures = ca.filter(&failure_mask)?;
64
// `.unique()` does not necessarily preserve the original order.
65
let unique_failures_args = all_failures.arg_unique()?;
66
all_failures.take(&unique_failures_args.slice(0, 10))?
67
};
68
let some_error_msg = match base.len() {
69
1 => {
70
// we can ensure that base is not null.
71
let base = base.get(0).unwrap();
72
some_failures
73
.get(0)
74
.and_then(|s| T::Native::from_str_radix(s, base).err())
75
.map_or_else(
76
|| unreachable!("failed to extract ParseIntError"),
77
|e| format!("{e}"),
78
)
79
},
80
_ => {
81
let base_failures = base.filter(&failure_mask)?;
82
some_failures
83
.get(0)
84
.zip(base_failures.get(0))
85
.and_then(|(s, base)| T::Native::from_str_radix(s, base).err())
86
.map_or_else(
87
|| unreachable!("failed to extract ParseIntError"),
88
|e| format!("{e}"),
89
)
90
},
91
};
92
polars_bail!(
93
ComputeError:
94
"strict integer parsing failed for {} value(s): {}; error message for the \
95
first shown value: '{}' (consider non-strict parsing)",
96
n_failures,
97
some_failures.into_series().fmt_list(),
98
some_error_msg
99
);
100
}
101
102
Ok(out.into_series())
103
}
104
105
pub trait StringNameSpaceImpl: AsString {
106
#[cfg(not(feature = "binary_encoding"))]
107
fn hex_decode(&self) -> PolarsResult<StringChunked> {
108
panic!("activate 'binary_encoding' feature")
109
}
110
111
#[cfg(feature = "binary_encoding")]
112
fn hex_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
113
let ca = self.as_string();
114
ca.as_binary().hex_decode(strict)
115
}
116
117
#[must_use]
118
#[cfg(feature = "string_encoding")]
119
fn hex_encode(&self) -> StringChunked {
120
let ca = self.as_string();
121
ca.apply_values(|s| hex::encode(s).into())
122
}
123
124
#[cfg(not(feature = "binary_encoding"))]
125
fn base64_decode(&self) -> PolarsResult<StringChunked> {
126
panic!("activate 'binary_encoding' feature")
127
}
128
129
#[cfg(feature = "binary_encoding")]
130
fn base64_decode(&self, strict: bool) -> PolarsResult<BinaryChunked> {
131
let ca = self.as_string();
132
ca.as_binary().base64_decode(strict)
133
}
134
135
#[must_use]
136
#[cfg(feature = "string_encoding")]
137
fn base64_encode(&self) -> StringChunked {
138
let ca = self.as_string();
139
ca.apply_values(|s| general_purpose::STANDARD.encode(s).into())
140
}
141
142
#[cfg(feature = "string_to_integer")]
143
// Parse a string number with base _radix_ into a decimal dtype
144
fn to_integer(
145
&self,
146
base: &UInt32Chunked,
147
dtype: Option<DataType>,
148
strict: bool,
149
) -> PolarsResult<Series> {
150
let ca = self.as_string();
151
152
polars_ensure!(
153
ca.len() == base.len() || ca.len() == 1 || base.len() == 1,
154
length_mismatch = "str.to_integer",
155
ca.len(),
156
base.len()
157
);
158
159
match dtype.unwrap_or(DataType::Int64) {
160
DataType::Int8 => parse_integer::<Int8Type>(ca, base, strict),
161
DataType::Int16 => parse_integer::<Int16Type>(ca, base, strict),
162
DataType::Int32 => parse_integer::<Int32Type>(ca, base, strict),
163
DataType::Int64 => parse_integer::<Int64Type>(ca, base, strict),
164
DataType::Int128 => parse_integer::<Int128Type>(ca, base, strict),
165
DataType::UInt8 => parse_integer::<UInt8Type>(ca, base, strict),
166
DataType::UInt16 => parse_integer::<UInt16Type>(ca, base, strict),
167
DataType::UInt32 => parse_integer::<UInt32Type>(ca, base, strict),
168
DataType::UInt64 => parse_integer::<UInt64Type>(ca, base, strict),
169
dtype => polars_bail!(InvalidOperation: "Invalid dtype {:?}", dtype),
170
}
171
}
172
173
fn contains_chunked(
174
&self,
175
pat: &StringChunked,
176
literal: bool,
177
strict: bool,
178
) -> PolarsResult<BooleanChunked> {
179
let ca = self.as_string();
180
match (ca.len(), pat.len()) {
181
(_, 1) => match pat.get(0) {
182
Some(pat) => {
183
if literal {
184
ca.contains_literal(pat)
185
} else {
186
ca.contains(pat, strict)
187
}
188
},
189
None => Ok(BooleanChunked::full_null(ca.name().clone(), ca.len())),
190
},
191
(1, _) if ca.null_count() == 1 => Ok(BooleanChunked::full_null(
192
ca.name().clone(),
193
ca.len().max(pat.len()),
194
)),
195
_ => {
196
if literal {
197
Ok(broadcast_binary_elementwise_values(ca, pat, |src, pat| {
198
src.contains(pat)
199
}))
200
} else if strict {
201
with_regex_cache(|reg_cache| {
202
broadcast_try_binary_elementwise(ca, pat, |opt_src, opt_pat| {
203
match (opt_src, opt_pat) {
204
(Some(src), Some(pat)) => {
205
let reg = reg_cache.compile(pat)?;
206
Ok(Some(reg.is_match(src)))
207
},
208
_ => Ok(None),
209
}
210
})
211
})
212
} else {
213
with_regex_cache(|reg_cache| {
214
Ok(broadcast_binary_elementwise(
215
ca,
216
pat,
217
infer_re_match(|src, pat| {
218
let reg = reg_cache.compile(pat?).ok()?;
219
Some(reg.is_match(src?))
220
}),
221
))
222
})
223
}
224
},
225
}
226
}
227
228
fn find_chunked(
229
&self,
230
pat: &StringChunked,
231
literal: bool,
232
strict: bool,
233
) -> PolarsResult<UInt32Chunked> {
234
let ca = self.as_string();
235
if pat.len() == 1 {
236
return if let Some(pat) = pat.get(0) {
237
if literal {
238
ca.find_literal(pat)
239
} else {
240
ca.find(pat, strict)
241
}
242
} else {
243
Ok(UInt32Chunked::full_null(ca.name().clone(), ca.len()))
244
};
245
} else if ca.len() == 1 && ca.null_count() == 1 {
246
return Ok(UInt32Chunked::full_null(
247
ca.name().clone(),
248
ca.len().max(pat.len()),
249
));
250
}
251
if literal {
252
Ok(broadcast_binary_elementwise(
253
ca,
254
pat,
255
|src: Option<&str>, pat: Option<&str>| src?.find(pat?).map(|idx| idx as u32),
256
))
257
} else {
258
with_regex_cache(|reg_cache| {
259
let matcher = |src: Option<&str>, pat: Option<&str>| -> PolarsResult<Option<u32>> {
260
if let (Some(src), Some(pat)) = (src, pat) {
261
let re = reg_cache.compile(pat)?;
262
return Ok(re.find(src).map(|m| m.start() as u32));
263
}
264
Ok(None)
265
};
266
broadcast_try_binary_elementwise(ca, pat, matcher)
267
})
268
}
269
}
270
271
/// Get the length of the string values as number of chars.
272
fn str_len_chars(&self) -> UInt32Chunked {
273
let ca = self.as_string();
274
ca.apply_kernel_cast(&string_len_chars)
275
}
276
277
/// Get the length of the string values as number of bytes.
278
fn str_len_bytes(&self) -> UInt32Chunked {
279
let ca = self.as_string();
280
ca.apply_kernel_cast(&utf8view_len_bytes)
281
}
282
283
/// Pad the start of the string until it reaches the given length.
284
///
285
/// Padding is done using the specified `fill_char`.
286
/// Strings with length equal to or greater than the given length are
287
/// returned as-is.
288
#[cfg(feature = "string_pad")]
289
fn pad_start(&self, length: &UInt64Chunked, fill_char: char) -> StringChunked {
290
let ca = self.as_string();
291
pad::pad_start(ca, length, fill_char)
292
}
293
294
/// Pad the end of the string until it reaches the given length.
295
///
296
/// Padding is done using the specified `fill_char`.
297
/// Strings with length equal to or greater than the given length are
298
/// returned as-is.
299
#[cfg(feature = "string_pad")]
300
fn pad_end(&self, length: &UInt64Chunked, fill_char: char) -> StringChunked {
301
let ca = self.as_string();
302
pad::pad_end(ca, length, fill_char)
303
}
304
305
/// Pad the start of the string with zeros until it reaches the given length.
306
///
307
/// A sign prefix (`-`) is handled by inserting the padding after the sign
308
/// character rather than before.
309
/// Strings with length equal to or greater than the given length are
310
/// returned as-is.
311
#[cfg(feature = "string_pad")]
312
fn zfill(&self, length: &UInt64Chunked) -> StringChunked {
313
let ca = self.as_string();
314
pad::zfill(ca, length)
315
}
316
317
/// Check if strings contain a regex pattern.
318
fn contains(&self, pat: &str, strict: bool) -> PolarsResult<BooleanChunked> {
319
let ca = self.as_string();
320
let res_reg = polars_utils::regex_cache::compile_regex(pat);
321
let opt_reg = if strict { Some(res_reg?) } else { res_reg.ok() };
322
let out: BooleanChunked = if let Some(reg) = opt_reg {
323
unary_elementwise_values(ca, |s| reg.is_match(s))
324
} else {
325
BooleanChunked::full_null(ca.name().clone(), ca.len())
326
};
327
Ok(out)
328
}
329
330
/// Check if strings contain a given literal
331
fn contains_literal(&self, lit: &str) -> PolarsResult<BooleanChunked> {
332
// note: benchmarking shows that the regex engine is actually
333
// faster at finding literal matches than str::contains.
334
// ref: https://github.com/pola-rs/polars/pull/6811
335
self.contains(regex::escape(lit).as_str(), true)
336
}
337
338
/// Return the index position of a literal substring in the target string.
339
fn find_literal(&self, lit: &str) -> PolarsResult<UInt32Chunked> {
340
self.find(regex::escape(lit).as_str(), true)
341
}
342
343
/// Return the index position of a regular expression substring in the target string.
344
fn find(&self, pat: &str, strict: bool) -> PolarsResult<UInt32Chunked> {
345
let ca = self.as_string();
346
match polars_utils::regex_cache::compile_regex(pat) {
347
Ok(rx) => Ok(unary_elementwise(ca, |opt_s| {
348
opt_s.and_then(|s| rx.find(s)).map(|m| m.start() as u32)
349
})),
350
Err(_) if !strict => Ok(UInt32Chunked::full_null(ca.name().clone(), ca.len())),
351
Err(e) => Err(PolarsError::ComputeError(
352
format!("Invalid regular expression: {e}").into(),
353
)),
354
}
355
}
356
357
/// Replace the leftmost regex-matched (sub)string with another string
358
fn replace<'a>(&'a self, pat: &str, val: &str) -> PolarsResult<StringChunked> {
359
let reg = polars_utils::regex_cache::compile_regex(pat)?;
360
let f = |s: &'a str| reg.replace(s, val);
361
let ca = self.as_string();
362
Ok(ca.apply_values(f))
363
}
364
365
/// Replace the leftmost literal (sub)string with another string
366
fn replace_literal<'a>(
367
&'a self,
368
pat: &str,
369
val: &str,
370
n: usize,
371
) -> PolarsResult<StringChunked> {
372
let ca = self.as_string();
373
if ca.is_empty() {
374
return Ok(ca.clone());
375
}
376
377
// amortize allocation
378
let mut buf = String::new();
379
380
let f = move |s: &'a str| {
381
buf.clear();
382
let mut changed = false;
383
384
// See: str.replacen
385
let mut last_end = 0;
386
for (start, part) in s.match_indices(pat).take(n) {
387
changed = true;
388
buf.push_str(unsafe { s.get_unchecked(last_end..start) });
389
buf.push_str(val);
390
last_end = start + part.len();
391
}
392
buf.push_str(unsafe { s.get_unchecked(last_end..s.len()) });
393
394
if changed {
395
// extend lifetime
396
// lifetime is bound to 'a
397
let slice = buf.as_str();
398
unsafe { std::mem::transmute::<&str, &'a str>(slice) }
399
} else {
400
s
401
}
402
};
403
Ok(ca.apply_mut(f))
404
}
405
406
/// Replace all regex-matched (sub)strings with another string
407
fn replace_all(&self, pat: &str, val: &str) -> PolarsResult<StringChunked> {
408
let ca = self.as_string();
409
let reg = polars_utils::regex_cache::compile_regex(pat)?;
410
Ok(ca.apply_values(|s| reg.replace_all(s, val)))
411
}
412
413
/// Replace all matching literal (sub)strings with another string
414
fn replace_literal_all<'a>(&'a self, pat: &str, val: &str) -> PolarsResult<StringChunked> {
415
let ca = self.as_string();
416
if ca.is_empty() {
417
return Ok(ca.clone());
418
}
419
420
// Amortize allocation.
421
let mut buf = String::new();
422
423
let f = move |s: &'a str| {
424
buf.clear();
425
let mut changed = false;
426
427
// See: str.replace.
428
let mut last_end = 0;
429
for (start, part) in s.match_indices(pat) {
430
changed = true;
431
buf.push_str(unsafe { s.get_unchecked(last_end..start) });
432
buf.push_str(val);
433
last_end = start + part.len();
434
}
435
buf.push_str(unsafe { s.get_unchecked(last_end..s.len()) });
436
437
if changed {
438
// Extend lifetime, lifetime is bound to 'a.
439
let slice = buf.as_str();
440
unsafe { std::mem::transmute::<&str, &'a str>(slice) }
441
} else {
442
s
443
}
444
};
445
446
Ok(ca.apply_mut(f))
447
}
448
449
/// Extract the nth capture group from pattern.
450
fn extract(&self, pat: &StringChunked, group_index: usize) -> PolarsResult<StringChunked> {
451
let ca = self.as_string();
452
super::extract::extract_group(ca, pat, group_index)
453
}
454
455
/// Extract each successive non-overlapping regex match in an individual string as an array.
456
fn extract_all(&self, pat: &str) -> PolarsResult<ListChunked> {
457
let ca = self.as_string();
458
let reg = polars_utils::regex_cache::compile_regex(pat)?;
459
460
let mut builder =
461
ListStringChunkedBuilder::new(ca.name().clone(), ca.len(), ca.get_values_size());
462
for arr in ca.downcast_iter() {
463
for opt_s in arr {
464
match opt_s {
465
None => builder.append_null(),
466
Some(s) => builder.append_values_iter(reg.find_iter(s).map(|m| m.as_str())),
467
}
468
}
469
}
470
Ok(builder.finish())
471
}
472
473
fn strip_chars(&self, pat: &Column) -> PolarsResult<StringChunked> {
474
let ca = self.as_string();
475
if pat.dtype() == &DataType::Null {
476
Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim())))
477
} else {
478
Ok(strip_chars(ca, pat.str()?))
479
}
480
}
481
482
fn strip_chars_start(&self, pat: &Column) -> PolarsResult<StringChunked> {
483
let ca = self.as_string();
484
if pat.dtype() == &DataType::Null {
485
Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim_start())))
486
} else {
487
Ok(strip_chars_start(ca, pat.str()?))
488
}
489
}
490
491
fn strip_chars_end(&self, pat: &Column) -> PolarsResult<StringChunked> {
492
let ca = self.as_string();
493
if pat.dtype() == &DataType::Null {
494
Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim_end())))
495
} else {
496
Ok(strip_chars_end(ca, pat.str()?))
497
}
498
}
499
500
fn strip_prefix(&self, prefix: &StringChunked) -> StringChunked {
501
let ca = self.as_string();
502
strip_prefix(ca, prefix)
503
}
504
505
fn strip_suffix(&self, suffix: &StringChunked) -> StringChunked {
506
let ca = self.as_string();
507
strip_suffix(ca, suffix)
508
}
509
510
#[cfg(feature = "dtype-struct")]
511
fn split_exact(&self, by: &StringChunked, n: usize) -> PolarsResult<StructChunked> {
512
let ca = self.as_string();
513
514
split_to_struct(ca, by, n + 1, str::split, false)
515
}
516
517
#[cfg(feature = "dtype-struct")]
518
fn split_exact_inclusive(&self, by: &StringChunked, n: usize) -> PolarsResult<StructChunked> {
519
let ca = self.as_string();
520
521
split_to_struct(ca, by, n + 1, str::split_inclusive, false)
522
}
523
524
#[cfg(feature = "dtype-struct")]
525
fn splitn(&self, by: &StringChunked, n: usize) -> PolarsResult<StructChunked> {
526
let ca = self.as_string();
527
528
split_to_struct(ca, by, n, |s, by| s.splitn(n, by), true)
529
}
530
531
fn split(&self, by: &StringChunked) -> PolarsResult<ListChunked> {
532
let ca = self.as_string();
533
split_helper(ca, by, str::split)
534
}
535
536
fn split_inclusive(&self, by: &StringChunked) -> PolarsResult<ListChunked> {
537
let ca = self.as_string();
538
split_helper(ca, by, str::split_inclusive)
539
}
540
541
/// Extract each successive non-overlapping regex match in an individual string as an array.
542
fn extract_all_many(&self, pat: &StringChunked) -> PolarsResult<ListChunked> {
543
let ca = self.as_string();
544
polars_ensure!(
545
ca.len() == pat.len(),
546
ComputeError: "pattern's length: {} does not match that of the argument series: {}",
547
pat.len(), ca.len(),
548
);
549
550
let mut builder =
551
ListStringChunkedBuilder::new(ca.name().clone(), ca.len(), ca.get_values_size());
552
with_regex_cache(|re_cache| {
553
binary_elementwise_for_each(ca, pat, |opt_s, opt_pat| match (opt_s, opt_pat) {
554
(_, None) | (None, _) => builder.append_null(),
555
(Some(s), Some(pat)) => {
556
let re = re_cache.compile(pat).unwrap();
557
builder.append_values_iter(re.find_iter(s).map(|m| m.as_str()));
558
},
559
});
560
});
561
Ok(builder.finish())
562
}
563
564
#[cfg(feature = "extract_groups")]
565
/// Extract all capture groups from pattern and return as a struct.
566
fn extract_groups(&self, pat: &str, dtype: &DataType) -> PolarsResult<Series> {
567
let ca = self.as_string();
568
super::extract::extract_groups(ca, pat, dtype)
569
}
570
571
/// Count all successive non-overlapping regex matches.
572
fn count_matches(&self, pat: &str, literal: bool) -> PolarsResult<UInt32Chunked> {
573
let ca = self.as_string();
574
if literal {
575
Ok(unary_elementwise(ca, |opt_s| {
576
opt_s.map(|s| s.matches(pat).count() as u32)
577
}))
578
} else {
579
let re = compile_regex(pat)?;
580
Ok(unary_elementwise(ca, |opt_s| {
581
opt_s.map(|s| re.find_iter(s).count() as u32)
582
}))
583
}
584
}
585
586
/// Count all successive non-overlapping regex matches.
587
fn count_matches_many(
588
&self,
589
pat: &StringChunked,
590
literal: bool,
591
) -> PolarsResult<UInt32Chunked> {
592
let ca = self.as_string();
593
polars_ensure!(
594
ca.len() == pat.len(),
595
ComputeError: "pattern's length: {} does not match that of the argument series: {}",
596
pat.len(), ca.len(),
597
);
598
599
let out: UInt32Chunked = if literal {
600
broadcast_binary_elementwise(ca, pat, |s: Option<&str>, p: Option<&str>| {
601
Some(s?.matches(p?).count() as u32)
602
})
603
} else {
604
with_regex_cache(|re_cache| {
605
let op = move |opt_s: Option<&str>,
606
opt_pat: Option<&str>|
607
-> PolarsResult<Option<u32>> {
608
match (opt_s, opt_pat) {
609
(Some(s), Some(pat)) => {
610
let reg = re_cache.compile(pat)?;
611
Ok(Some(reg.find_iter(s).count() as u32))
612
},
613
_ => Ok(None),
614
}
615
};
616
broadcast_try_binary_elementwise(ca, pat, op)
617
})?
618
};
619
620
Ok(out.with_name(ca.name().clone()))
621
}
622
623
/// Modify the strings to their lowercase equivalent.
624
#[must_use]
625
fn to_lowercase(&self) -> StringChunked {
626
let ca = self.as_string();
627
case::to_lowercase(ca)
628
}
629
630
/// Modify the strings to their uppercase equivalent.
631
#[must_use]
632
fn to_uppercase(&self) -> StringChunked {
633
let ca = self.as_string();
634
case::to_uppercase(ca)
635
}
636
637
/// Modify the strings to their titlecase equivalent.
638
#[must_use]
639
#[cfg(feature = "nightly")]
640
fn to_titlecase(&self) -> StringChunked {
641
let ca = self.as_string();
642
case::to_titlecase(ca)
643
}
644
645
/// Concat with the values from a second StringChunked.
646
#[must_use]
647
fn concat(&self, other: &StringChunked) -> StringChunked {
648
let ca = self.as_string();
649
ca + other
650
}
651
652
/// Normalizes the string values
653
#[must_use]
654
#[cfg(feature = "string_normalize")]
655
fn str_normalize(&self, form: UnicodeForm) -> StringChunked {
656
let ca = self.as_string();
657
normalize::normalize(ca, form)
658
}
659
660
/// Reverses the string values
661
#[must_use]
662
#[cfg(feature = "string_reverse")]
663
fn str_reverse(&self) -> StringChunked {
664
let ca = self.as_string();
665
reverse::reverse(ca)
666
}
667
668
/// Slice the string values.
669
///
670
/// Determines a substring starting from `offset` and with length `length` of each of the elements in `array`.
671
/// `offset` can be negative, in which case the start counts from the end of the string.
672
fn str_slice(&self, offset: &Column, length: &Column) -> PolarsResult<StringChunked> {
673
let ca = self.as_string();
674
let offset = offset.cast(&DataType::Int64)?;
675
// We strict cast, otherwise negative value will be treated as a valid length.
676
let length = length.strict_cast(&DataType::UInt64)?;
677
678
Ok(substring::substring(ca, offset.i64()?, length.u64()?))
679
}
680
681
/// Slice the first `n` values of the string.
682
///
683
/// Determines a substring starting at the beginning of the string up to offset `n` of each
684
/// element in `array`. `n` can be negative, in which case the slice ends `n` characters from
685
/// the end of the string.
686
fn str_head(&self, n: &Column) -> PolarsResult<StringChunked> {
687
let ca = self.as_string();
688
let n = n.strict_cast(&DataType::Int64)?;
689
690
substring::head(ca, n.i64()?)
691
}
692
693
/// Slice the last `n` values of the string.
694
///
695
/// Determines a substring starting at offset `n` of each element in `array`. `n` can be
696
/// negative, in which case the slice begins `n` characters from the start of the string.
697
fn str_tail(&self, n: &Column) -> PolarsResult<StringChunked> {
698
let ca = self.as_string();
699
let n = n.strict_cast(&DataType::Int64)?;
700
701
substring::tail(ca, n.i64()?)
702
}
703
#[cfg(feature = "strings")]
704
/// Escapes all regular expression meta characters in the string.
705
fn str_escape_regex(&self) -> StringChunked {
706
let ca = self.as_string();
707
escape_regex::escape_regex(ca)
708
}
709
}
710
711
impl StringNameSpaceImpl for StringChunked {}
712
713