Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/strings.py
7890 views
1
# --8<-- [start:df]
2
import polars as pl
3
4
df = pl.DataFrame(
5
{
6
"language": ["English", "Dutch", "Portuguese", "Finish"],
7
"fruit": ["pear", "peer", "pêra", "päärynä"],
8
}
9
)
10
11
result = df.with_columns(
12
pl.col("fruit").str.len_bytes().alias("byte_count"),
13
pl.col("fruit").str.len_chars().alias("letter_count"),
14
)
15
print(result)
16
# --8<-- [end:df]
17
18
# --8<-- [start:existence]
19
result = df.select(
20
pl.col("fruit"),
21
pl.col("fruit").str.starts_with("p").alias("starts_with_p"),
22
pl.col("fruit").str.contains("p..r").alias("p..r"),
23
pl.col("fruit").str.contains("e+").alias("e+"),
24
pl.col("fruit").str.ends_with("r").alias("ends_with_r"),
25
)
26
print(result)
27
# --8<-- [end:existence]
28
29
# --8<-- [start:extract]
30
df = pl.DataFrame(
31
{
32
"urls": [
33
"http://vote.com/ballon_dor?candidate=messi&ref=polars",
34
"http://vote.com/ballon_dor?candidat=jorginho&ref=polars",
35
"http://vote.com/ballon_dor?candidate=ronaldo&ref=polars",
36
]
37
}
38
)
39
result = df.select(
40
pl.col("urls").str.extract(r"candidate=(\w+)", group_index=1),
41
)
42
print(result)
43
# --8<-- [end:extract]
44
45
46
# --8<-- [start:extract_all]
47
df = pl.DataFrame({"text": ["123 bla 45 asd", "xyz 678 910t"]})
48
result = df.select(
49
pl.col("text").str.extract_all(r"(\d+)").alias("extracted_nrs"),
50
)
51
print(result)
52
# --8<-- [end:extract_all]
53
54
55
# --8<-- [start:replace]
56
df = pl.DataFrame({"text": ["123abc", "abc456"]})
57
result = df.with_columns(
58
pl.col("text").str.replace(r"\d", "-"),
59
pl.col("text").str.replace_all(r"\d", "-").alias("text_replace_all"),
60
)
61
print(result)
62
# --8<-- [end:replace]
63
64
# --8<-- [start:casing]
65
addresses = pl.DataFrame(
66
{
67
"addresses": [
68
"128 PERF st",
69
"Rust blVD, 158",
70
"PoLaRs Av, 12",
71
"1042 Query sq",
72
]
73
}
74
)
75
76
addresses = addresses.select(
77
pl.col("addresses").alias("originals"),
78
pl.col("addresses").str.to_titlecase(),
79
pl.col("addresses").str.to_lowercase().alias("lower"),
80
pl.col("addresses").str.to_uppercase().alias("upper"),
81
)
82
print(addresses)
83
# --8<-- [end:casing]
84
85
# --8<-- [start:strip]
86
addr = pl.col("addresses")
87
chars = ", 0123456789"
88
result = addresses.select(
89
addr.str.strip_chars(chars).alias("strip"),
90
addr.str.strip_chars_end(chars).alias("end"),
91
addr.str.strip_chars_start(chars).alias("start"),
92
addr.str.strip_prefix("128 ").alias("prefix"),
93
addr.str.strip_suffix(", 158").alias("suffix"),
94
)
95
print(result)
96
# --8<-- [end:strip]
97
98
# --8<-- [start:slice]
99
df = pl.DataFrame(
100
{
101
"fruits": ["pear", "mango", "dragonfruit", "passionfruit"],
102
"n": [1, -1, 4, -4],
103
}
104
)
105
106
result = df.with_columns(
107
pl.col("fruits").str.slice(pl.col("n")).alias("slice"),
108
pl.col("fruits").str.head(pl.col("n")).alias("head"),
109
pl.col("fruits").str.tail(pl.col("n")).alias("tail"),
110
)
111
print(result)
112
# --8<-- [end:slice]
113
114