Path: blob/main/docs/source/src/python/user-guide/expressions/strings.py
7890 views
# --8<-- [start:df]1import polars as pl23df = pl.DataFrame(4{5"language": ["English", "Dutch", "Portuguese", "Finish"],6"fruit": ["pear", "peer", "pêra", "päärynä"],7}8)910result = df.with_columns(11pl.col("fruit").str.len_bytes().alias("byte_count"),12pl.col("fruit").str.len_chars().alias("letter_count"),13)14print(result)15# --8<-- [end:df]1617# --8<-- [start:existence]18result = df.select(19pl.col("fruit"),20pl.col("fruit").str.starts_with("p").alias("starts_with_p"),21pl.col("fruit").str.contains("p..r").alias("p..r"),22pl.col("fruit").str.contains("e+").alias("e+"),23pl.col("fruit").str.ends_with("r").alias("ends_with_r"),24)25print(result)26# --8<-- [end:existence]2728# --8<-- [start:extract]29df = pl.DataFrame(30{31"urls": [32"http://vote.com/ballon_dor?candidate=messi&ref=polars",33"http://vote.com/ballon_dor?candidat=jorginho&ref=polars",34"http://vote.com/ballon_dor?candidate=ronaldo&ref=polars",35]36}37)38result = df.select(39pl.col("urls").str.extract(r"candidate=(\w+)", group_index=1),40)41print(result)42# --8<-- [end:extract]434445# --8<-- [start:extract_all]46df = pl.DataFrame({"text": ["123 bla 45 asd", "xyz 678 910t"]})47result = df.select(48pl.col("text").str.extract_all(r"(\d+)").alias("extracted_nrs"),49)50print(result)51# --8<-- [end:extract_all]525354# --8<-- [start:replace]55df = pl.DataFrame({"text": ["123abc", "abc456"]})56result = df.with_columns(57pl.col("text").str.replace(r"\d", "-"),58pl.col("text").str.replace_all(r"\d", "-").alias("text_replace_all"),59)60print(result)61# --8<-- [end:replace]6263# --8<-- [start:casing]64addresses = pl.DataFrame(65{66"addresses": [67"128 PERF st",68"Rust blVD, 158",69"PoLaRs Av, 12",70"1042 Query sq",71]72}73)7475addresses = addresses.select(76pl.col("addresses").alias("originals"),77pl.col("addresses").str.to_titlecase(),78pl.col("addresses").str.to_lowercase().alias("lower"),79pl.col("addresses").str.to_uppercase().alias("upper"),80)81print(addresses)82# --8<-- [end:casing]8384# --8<-- [start:strip]85addr = pl.col("addresses")86chars = ", 0123456789"87result = addresses.select(88addr.str.strip_chars(chars).alias("strip"),89addr.str.strip_chars_end(chars).alias("end"),90addr.str.strip_chars_start(chars).alias("start"),91addr.str.strip_prefix("128 ").alias("prefix"),92addr.str.strip_suffix(", 158").alias("suffix"),93)94print(result)95# --8<-- [end:strip]9697# --8<-- [start:slice]98df = pl.DataFrame(99{100"fruits": ["pear", "mango", "dragonfruit", "passionfruit"],101"n": [1, -1, 4, -4],102}103)104105result = df.with_columns(106pl.col("fruits").str.slice(pl.col("n")).alias("slice"),107pl.col("fruits").str.head(pl.col("n")).alias("head"),108pl.col("fruits").str.tail(pl.col("n")).alias("tail"),109)110print(result)111# --8<-- [end:slice]112113114