Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-python/src/expr/string.rs
7889 views
1
use polars::prelude::*;
2
use pyo3::prelude::*;
3
4
use super::datatype::PyDataTypeExpr;
5
use crate::PyExpr;
6
use crate::conversion::Wrap;
7
use crate::error::PyPolarsErr;
8
9
#[pymethods]
10
impl PyExpr {
11
fn str_join(&self, delimiter: &str, ignore_nulls: bool) -> Self {
12
self.inner
13
.clone()
14
.str()
15
.join(delimiter, ignore_nulls)
16
.into()
17
}
18
19
#[pyo3(signature = (format, strict, exact, cache))]
20
fn str_to_date(&self, format: Option<String>, strict: bool, exact: bool, cache: bool) -> Self {
21
let format = format.map(|x| x.into());
22
23
let options = StrptimeOptions {
24
format,
25
strict,
26
exact,
27
cache,
28
};
29
self.inner.clone().str().to_date(options).into()
30
}
31
32
#[pyo3(signature = (format, time_unit, time_zone, strict, exact, cache, ambiguous))]
33
fn str_to_datetime(
34
&self,
35
format: Option<String>,
36
time_unit: Option<Wrap<TimeUnit>>,
37
time_zone: Wrap<Option<TimeZone>>,
38
strict: bool,
39
exact: bool,
40
cache: bool,
41
ambiguous: Self,
42
) -> Self {
43
let format = format.map(|x| x.into());
44
let time_zone = time_zone.0;
45
46
let options = StrptimeOptions {
47
format,
48
strict,
49
exact,
50
cache,
51
};
52
self.inner
53
.clone()
54
.str()
55
.to_datetime(
56
time_unit.map(|tu| tu.0),
57
time_zone,
58
options,
59
ambiguous.inner,
60
)
61
.into()
62
}
63
64
#[pyo3(signature = (format, strict, cache))]
65
fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
66
let format = format.map(|x| x.into());
67
68
let options = StrptimeOptions {
69
format,
70
strict,
71
cache,
72
exact: true,
73
};
74
self.inner.clone().str().to_time(options).into()
75
}
76
77
fn str_strip_chars(&self, matches: Self) -> Self {
78
self.inner.clone().str().strip_chars(matches.inner).into()
79
}
80
81
fn str_strip_chars_start(&self, matches: Self) -> Self {
82
self.inner
83
.clone()
84
.str()
85
.strip_chars_start(matches.inner)
86
.into()
87
}
88
89
fn str_strip_chars_end(&self, matches: Self) -> Self {
90
self.inner
91
.clone()
92
.str()
93
.strip_chars_end(matches.inner)
94
.into()
95
}
96
97
fn str_strip_prefix(&self, prefix: Self) -> Self {
98
self.inner.clone().str().strip_prefix(prefix.inner).into()
99
}
100
101
fn str_strip_suffix(&self, suffix: Self) -> Self {
102
self.inner.clone().str().strip_suffix(suffix.inner).into()
103
}
104
105
fn str_slice(&self, offset: Self, length: Self) -> Self {
106
self.inner
107
.clone()
108
.str()
109
.slice(offset.inner, length.inner)
110
.into()
111
}
112
113
fn str_head(&self, n: Self) -> Self {
114
self.inner.clone().str().head(n.inner).into()
115
}
116
117
fn str_tail(&self, n: Self) -> Self {
118
self.inner.clone().str().tail(n.inner).into()
119
}
120
121
fn str_to_uppercase(&self) -> Self {
122
self.inner.clone().str().to_uppercase().into()
123
}
124
125
fn str_to_lowercase(&self) -> Self {
126
self.inner.clone().str().to_lowercase().into()
127
}
128
129
#[cfg(feature = "nightly")]
130
fn str_to_titlecase(&self) -> Self {
131
self.inner.clone().str().to_titlecase().into()
132
}
133
134
fn str_len_bytes(&self) -> Self {
135
self.inner.clone().str().len_bytes().into()
136
}
137
138
fn str_len_chars(&self) -> Self {
139
self.inner.clone().str().len_chars().into()
140
}
141
142
#[cfg(feature = "regex")]
143
fn str_replace_n(&self, pat: Self, val: Self, literal: bool, n: i64) -> Self {
144
self.inner
145
.clone()
146
.str()
147
.replace_n(pat.inner, val.inner, literal, n)
148
.into()
149
}
150
151
#[cfg(feature = "regex")]
152
fn str_replace_all(&self, pat: Self, val: Self, literal: bool) -> Self {
153
self.inner
154
.clone()
155
.str()
156
.replace_all(pat.inner, val.inner, literal)
157
.into()
158
}
159
160
fn str_normalize(&self, form: Wrap<UnicodeForm>) -> Self {
161
self.inner.clone().str().normalize(form.0).into()
162
}
163
164
fn str_reverse(&self) -> Self {
165
self.inner.clone().str().reverse().into()
166
}
167
168
fn str_pad_start(&self, length: PyExpr, fill_char: char) -> Self {
169
self.inner
170
.clone()
171
.str()
172
.pad_start(length.inner, fill_char)
173
.into()
174
}
175
176
fn str_pad_end(&self, length: PyExpr, fill_char: char) -> Self {
177
self.inner
178
.clone()
179
.str()
180
.pad_end(length.inner, fill_char)
181
.into()
182
}
183
184
fn str_zfill(&self, length: PyExpr) -> Self {
185
self.inner.clone().str().zfill(length.inner).into()
186
}
187
188
#[pyo3(signature = (pat, literal, strict))]
189
#[cfg(feature = "regex")]
190
fn str_contains(&self, pat: Self, literal: Option<bool>, strict: bool) -> Self {
191
match literal {
192
Some(true) => self.inner.clone().str().contains_literal(pat.inner).into(),
193
_ => self.inner.clone().str().contains(pat.inner, strict).into(),
194
}
195
}
196
197
#[pyo3(signature = (pat, literal, strict))]
198
#[cfg(feature = "regex")]
199
fn str_find(&self, pat: Self, literal: Option<bool>, strict: bool) -> Self {
200
match literal {
201
Some(true) => self.inner.clone().str().find_literal(pat.inner).into(),
202
_ => self.inner.clone().str().find(pat.inner, strict).into(),
203
}
204
}
205
206
fn str_ends_with(&self, sub: Self) -> Self {
207
self.inner.clone().str().ends_with(sub.inner).into()
208
}
209
210
fn str_starts_with(&self, sub: Self) -> Self {
211
self.inner.clone().str().starts_with(sub.inner).into()
212
}
213
214
fn str_hex_encode(&self) -> Self {
215
self.inner.clone().str().hex_encode().into()
216
}
217
218
#[cfg(feature = "binary_encoding")]
219
fn str_hex_decode(&self, strict: bool) -> Self {
220
self.inner.clone().str().hex_decode(strict).into()
221
}
222
223
fn str_base64_encode(&self) -> Self {
224
self.inner.clone().str().base64_encode().into()
225
}
226
227
#[cfg(feature = "binary_encoding")]
228
fn str_base64_decode(&self, strict: bool) -> Self {
229
self.inner.clone().str().base64_decode(strict).into()
230
}
231
232
#[pyo3(signature = (base, dtype=Some(Wrap(DataType::Int64)), strict=true))]
233
fn str_to_integer(&self, base: Self, dtype: Option<Wrap<DataType>>, strict: bool) -> Self {
234
self.inner
235
.clone()
236
.str()
237
.to_integer(base.inner, dtype.map(|wrap| wrap.0), strict)
238
.into()
239
}
240
241
#[cfg(feature = "extract_jsonpath")]
242
fn str_json_decode(&self, dtype: PyDataTypeExpr) -> Self {
243
self.inner.clone().str().json_decode(dtype.inner).into()
244
}
245
246
#[cfg(feature = "extract_jsonpath")]
247
fn str_json_path_match(&self, pat: Self) -> Self {
248
self.inner.clone().str().json_path_match(pat.inner).into()
249
}
250
251
fn str_extract(&self, pat: Self, group_index: usize) -> Self {
252
self.inner
253
.clone()
254
.str()
255
.extract(pat.inner, group_index)
256
.into()
257
}
258
259
fn str_extract_all(&self, pat: Self) -> Self {
260
self.inner.clone().str().extract_all(pat.inner).into()
261
}
262
263
#[cfg(feature = "extract_groups")]
264
fn str_extract_groups(&self, pat: &str) -> PyResult<Self> {
265
Ok(self
266
.inner
267
.clone()
268
.str()
269
.extract_groups(pat)
270
.map_err(PyPolarsErr::from)?
271
.into())
272
}
273
274
fn str_count_matches(&self, pat: Self, literal: bool) -> Self {
275
self.inner
276
.clone()
277
.str()
278
.count_matches(pat.inner, literal)
279
.into()
280
}
281
282
fn str_split(&self, by: Self) -> Self {
283
self.inner.clone().str().split(by.inner).into()
284
}
285
286
fn str_split_inclusive(&self, by: Self) -> Self {
287
self.inner.clone().str().split_inclusive(by.inner).into()
288
}
289
290
fn str_split_exact(&self, by: Self, n: usize) -> Self {
291
self.inner.clone().str().split_exact(by.inner, n).into()
292
}
293
294
fn str_split_exact_inclusive(&self, by: Self, n: usize) -> Self {
295
self.inner
296
.clone()
297
.str()
298
.split_exact_inclusive(by.inner, n)
299
.into()
300
}
301
302
fn str_splitn(&self, by: Self, n: usize) -> Self {
303
self.inner.clone().str().splitn(by.inner, n).into()
304
}
305
306
fn str_to_decimal(&self, scale: usize) -> Self {
307
self.inner.clone().str().to_decimal(scale).into()
308
}
309
310
#[cfg(feature = "find_many")]
311
fn str_contains_any(&self, patterns: PyExpr, ascii_case_insensitive: bool) -> Self {
312
self.inner
313
.clone()
314
.str()
315
.contains_any(patterns.inner, ascii_case_insensitive)
316
.into()
317
}
318
#[cfg(feature = "find_many")]
319
fn str_replace_many(
320
&self,
321
patterns: PyExpr,
322
replace_with: PyExpr,
323
ascii_case_insensitive: bool,
324
leftmost: bool,
325
) -> Self {
326
self.inner
327
.clone()
328
.str()
329
.replace_many(
330
patterns.inner,
331
replace_with.inner,
332
ascii_case_insensitive,
333
leftmost,
334
)
335
.into()
336
}
337
338
#[cfg(feature = "find_many")]
339
fn str_extract_many(
340
&self,
341
patterns: PyExpr,
342
ascii_case_insensitive: bool,
343
overlapping: bool,
344
leftmost: bool,
345
) -> Self {
346
self.inner
347
.clone()
348
.str()
349
.extract_many(
350
patterns.inner,
351
ascii_case_insensitive,
352
overlapping,
353
leftmost,
354
)
355
.into()
356
}
357
358
#[cfg(feature = "find_many")]
359
fn str_find_many(
360
&self,
361
patterns: PyExpr,
362
ascii_case_insensitive: bool,
363
overlapping: bool,
364
leftmost: bool,
365
) -> Self {
366
self.inner
367
.clone()
368
.str()
369
.find_many(
370
patterns.inner,
371
ascii_case_insensitive,
372
overlapping,
373
leftmost,
374
)
375
.into()
376
}
377
378
#[cfg(feature = "regex")]
379
fn str_escape_regex(&self) -> Self {
380
self.inner.clone().str().escape_regex().into()
381
}
382
383
#[staticmethod]
384
fn str_format(f_string: String, exprs: Vec<PyExpr>) -> PyResult<Self> {
385
let exprs = exprs.into_iter().map(|e| e.inner).collect::<Vec<_>>();
386
Ok(format_str(&f_string, exprs)
387
.map_err(PyPolarsErr::from)?
388
.into())
389
}
390
}
391
392