Path: blob/main/crates/polars-expr/src/dispatch/strings.rs
8362 views
use std::borrow::Cow;1use std::sync::Arc;23use polars_core::prelude::*;4use polars_core::utils::{CustomIterTools, handle_casting_failures};5#[cfg(feature = "regex")]6use polars_ops::chunked_array::strings::split_regex_helper;7use polars_ops::prelude::{BinaryNameSpaceImpl, StringNameSpaceImpl};8#[cfg(feature = "temporal")]9use polars_plan::dsl::StrptimeOptions;10use polars_plan::dsl::{ColumnsUdf, SpecialEq};11use polars_plan::plans::IRStringFunction;12use polars_time::prelude::StringMethods;13#[cfg(feature = "regex")]14use regex::{NoExpand, escape};1516use super::*;1718pub fn function_expr_to_udf(func: IRStringFunction) -> SpecialEq<Arc<dyn ColumnsUdf>> {19use IRStringFunction::*;20match func {21Format { format, insertions } => {22map_as_slice!(strings::format, format.as_str(), insertions.as_ref())23},24#[cfg(feature = "regex")]25Contains { literal, strict } => map_as_slice!(strings::contains, literal, strict),26CountMatches(literal) => {27map_as_slice!(strings::count_matches, literal)28},29EndsWith => map_as_slice!(strings::ends_with),30StartsWith => map_as_slice!(strings::starts_with),31Extract(group_index) => map_as_slice!(strings::extract, group_index),32ExtractAll => {33map_as_slice!(strings::extract_all)34},35#[cfg(feature = "extract_groups")]36ExtractGroups { pat, dtype } => {37map!(strings::extract_groups, &pat, &dtype)38},39#[cfg(feature = "regex")]40Find { literal, strict } => map_as_slice!(strings::find, literal, strict),41LenBytes => map!(strings::len_bytes),42LenChars => map!(strings::len_chars),43#[cfg(feature = "string_pad")]44PadEnd { fill_char } => {45map_as_slice!(strings::pad_end, fill_char)46},47#[cfg(feature = "string_pad")]48PadStart { fill_char } => {49map_as_slice!(strings::pad_start, fill_char)50},51#[cfg(feature = "string_pad")]52ZFill => {53map_as_slice!(strings::zfill)54},55#[cfg(feature = "temporal")]56Strptime(dtype, options) => {57map_as_slice!(strings::strptime, dtype.clone(), &options)58},59Split(inclusive) => {60map_as_slice!(strings::split, inclusive)61},62#[cfg(feature = "regex")]63SplitRegex { inclusive, strict } => {64map_as_slice!(strings::split_regex, inclusive, strict)65},66#[cfg(feature = "dtype-struct")]67SplitExact { n, inclusive } => map_as_slice!(strings::split_exact, n, inclusive),68#[cfg(feature = "dtype-struct")]69SplitN(n) => map_as_slice!(strings::splitn, n),70#[cfg(feature = "concat_str")]71ConcatVertical {72delimiter,73ignore_nulls,74} => map!(strings::join, &delimiter, ignore_nulls),75#[cfg(feature = "concat_str")]76ConcatHorizontal {77delimiter,78ignore_nulls,79} => map_as_slice!(strings::concat_hor, &delimiter, ignore_nulls),80#[cfg(feature = "regex")]81Replace { n, literal } => map_as_slice!(strings::replace, literal, n),82#[cfg(feature = "string_normalize")]83Normalize { form } => map!(strings::normalize, form.clone()),84#[cfg(feature = "string_reverse")]85Reverse => map!(strings::reverse),86Uppercase => map!(uppercase),87Lowercase => map!(lowercase),88#[cfg(feature = "nightly")]89Titlecase => map!(strings::titlecase),90StripChars => map_as_slice!(strings::strip_chars),91StripCharsStart => map_as_slice!(strings::strip_chars_start),92StripCharsEnd => map_as_slice!(strings::strip_chars_end),93StripPrefix => map_as_slice!(strings::strip_prefix),94StripSuffix => map_as_slice!(strings::strip_suffix),95#[cfg(feature = "string_to_integer")]96ToInteger { dtype, strict } => {97map_as_slice!(strings::to_integer, dtype.clone(), strict)98},99Slice => map_as_slice!(strings::str_slice),100Head => map_as_slice!(strings::str_head),101Tail => map_as_slice!(strings::str_tail),102#[cfg(feature = "string_encoding")]103HexEncode => map!(strings::hex_encode),104#[cfg(feature = "binary_encoding")]105HexDecode(strict) => map!(strings::hex_decode, strict),106#[cfg(feature = "string_encoding")]107Base64Encode => map!(strings::base64_encode),108#[cfg(feature = "binary_encoding")]109Base64Decode(strict) => map!(strings::base64_decode, strict),110#[cfg(feature = "dtype-decimal")]111ToDecimal { scale } => map!(strings::to_decimal, scale),112#[cfg(feature = "extract_jsonpath")]113JsonDecode(dtype) => map!(strings::json_decode, dtype.clone()),114#[cfg(feature = "extract_jsonpath")]115JsonPathMatch => map_as_slice!(strings::json_path_match),116#[cfg(feature = "find_many")]117ContainsAny {118ascii_case_insensitive,119} => {120map_as_slice!(contains_any, ascii_case_insensitive)121},122#[cfg(feature = "find_many")]123ReplaceMany {124ascii_case_insensitive,125leftmost,126} => {127map_as_slice!(replace_many, ascii_case_insensitive, leftmost)128},129#[cfg(feature = "find_many")]130ExtractMany {131ascii_case_insensitive,132overlapping,133leftmost,134} => {135map_as_slice!(extract_many, ascii_case_insensitive, overlapping, leftmost)136},137#[cfg(feature = "find_many")]138FindMany {139ascii_case_insensitive,140overlapping,141leftmost,142} => {143map_as_slice!(find_many, ascii_case_insensitive, overlapping, leftmost)144},145#[cfg(feature = "regex")]146EscapeRegex => map!(escape_regex),147}148}149150#[cfg(feature = "find_many")]151fn contains_any(s: &[Column], ascii_case_insensitive: bool) -> PolarsResult<Column> {152let ca = s[0].str()?;153let patterns = s[1].list()?;154polars_ops::chunked_array::strings::contains_any(ca, patterns, ascii_case_insensitive)155.map(|out| out.into_column())156}157158#[cfg(feature = "find_many")]159fn replace_many(160s: &[Column],161ascii_case_insensitive: bool,162leftmost: bool,163) -> PolarsResult<Column> {164let ca = s[0].str()?;165let patterns = s[1].list()?;166let replace_with = s[2].list()?;167polars_ops::chunked_array::strings::replace_all(168ca,169patterns,170replace_with,171ascii_case_insensitive,172leftmost,173)174.map(|out| out.into_column())175}176177#[cfg(feature = "find_many")]178fn extract_many(179s: &[Column],180ascii_case_insensitive: bool,181overlapping: bool,182leftmost: bool,183) -> PolarsResult<Column> {184let ca = s[0].str()?;185let patterns = s[1].list()?;186187polars_ops::chunked_array::strings::extract_many(188ca,189patterns,190ascii_case_insensitive,191overlapping,192leftmost,193)194.map(|out| out.into_column())195}196197#[cfg(feature = "find_many")]198fn find_many(199s: &[Column],200ascii_case_insensitive: bool,201overlapping: bool,202leftmost: bool,203) -> PolarsResult<Column> {204let ca = s[0].str()?;205let patterns = s[1].list()?;206207polars_ops::chunked_array::strings::find_many(208ca,209patterns,210ascii_case_insensitive,211overlapping,212leftmost,213)214.map(|out| out.into_column())215}216217fn uppercase(s: &Column) -> PolarsResult<Column> {218let ca = s.str()?;219Ok(ca.to_uppercase().into_column())220}221222fn lowercase(s: &Column) -> PolarsResult<Column> {223let ca = s.str()?;224Ok(ca.to_lowercase().into_column())225}226227#[cfg(feature = "nightly")]228pub(super) fn titlecase(s: &Column) -> PolarsResult<Column> {229let ca = s.str()?;230Ok(ca.to_titlecase().into_column())231}232233pub(super) fn len_chars(s: &Column) -> PolarsResult<Column> {234let ca = s.str()?;235Ok(ca.str_len_chars().into_column())236}237238pub(super) fn len_bytes(s: &Column) -> PolarsResult<Column> {239let ca = s.str()?;240Ok(ca.str_len_bytes().into_column())241}242243#[cfg(feature = "regex")]244pub(super) fn contains(s: &[Column], literal: bool, strict: bool) -> PolarsResult<Column> {245_check_same_length(s, "contains")?;246let ca = s[0].str()?;247let pat = s[1].str()?;248ca.contains_chunked(pat, literal, strict)249.map(|ok| ok.into_column())250}251252#[cfg(feature = "regex")]253pub(super) fn find(s: &[Column], literal: bool, strict: bool) -> PolarsResult<Column> {254_check_same_length(s, "find")?;255let ca = s[0].str()?;256let pat = s[1].str()?;257ca.find_chunked(pat, literal, strict)258.map(|ok| ok.into_column())259}260261pub(super) fn ends_with(s: &[Column]) -> PolarsResult<Column> {262_check_same_length(s, "ends_with")?;263let ca = s[0].str()?.as_binary();264let suffix = s[1].str()?.as_binary();265266Ok(ca.ends_with_chunked(&suffix)?.into_column())267}268269pub(super) fn starts_with(s: &[Column]) -> PolarsResult<Column> {270_check_same_length(s, "starts_with")?;271let ca = s[0].str()?.as_binary();272let prefix = s[1].str()?.as_binary();273Ok(ca.starts_with_chunked(&prefix)?.into_column())274}275276/// Extract a regex pattern from the a string value.277pub(super) fn extract(s: &[Column], group_index: usize) -> PolarsResult<Column> {278let ca = s[0].str()?;279let pat = s[1].str()?;280ca.extract(pat, group_index).map(|ca| ca.into_column())281}282283#[cfg(feature = "extract_groups")]284/// Extract all capture groups from a regex pattern as a struct285pub(super) fn extract_groups(s: &Column, pat: &str, dtype: &DataType) -> PolarsResult<Column> {286let ca = s.str()?;287ca.extract_groups(pat, dtype).map(Column::from)288}289290#[cfg(feature = "string_pad")]291pub(super) fn pad_start(s: &[Column], fill_char: char) -> PolarsResult<Column> {292let s1 = s[0].as_materialized_series();293let length = &s[1];294polars_ensure!(295s1.len() == 1 || length.len() == 1 || s1.len() == length.len(),296ShapeMismatch: "cannot pad_start with 'length' array of length {}", length.len()297);298let length = length.as_materialized_series().u64()?;299let ca = s1.str()?;300Ok(ca.pad_start(length, fill_char).into_column())301}302303#[cfg(feature = "string_pad")]304pub(super) fn pad_end(s: &[Column], fill_char: char) -> PolarsResult<Column> {305let s1 = s[0].as_materialized_series();306let length = &s[1];307polars_ensure!(308s1.len() == 1 || length.len() == 1 || s1.len() == length.len(),309ShapeMismatch: "cannot pad_end with 'length' array of length {}", length.len()310);311let length = length.as_materialized_series().u64()?;312let ca = s1.str()?;313Ok(ca.pad_end(length, fill_char).into_column())314}315316#[cfg(feature = "string_pad")]317pub(super) fn zfill(s: &[Column]) -> PolarsResult<Column> {318let s1 = s[0].as_materialized_series();319let length = &s[1];320polars_ensure!(321s1.len() == 1 || length.len() == 1 || s1.len() == length.len(),322ShapeMismatch: "cannot zfill with 'length' array of length {}", length.len()323);324let length = length.as_materialized_series().u64()?;325let ca = s1.str()?;326Ok(ca.zfill(length).into_column())327}328329pub(super) fn strip_chars(s: &[Column]) -> PolarsResult<Column> {330_check_same_length(s, "strip_chars")?;331let ca = s[0].str()?;332let pat_s = &s[1];333ca.strip_chars(pat_s).map(|ok| ok.into_column())334}335336pub(super) fn strip_chars_start(s: &[Column]) -> PolarsResult<Column> {337_check_same_length(s, "strip_chars_start")?;338let ca = s[0].str()?;339let pat_s = &s[1];340ca.strip_chars_start(pat_s).map(|ok| ok.into_column())341}342343pub(super) fn strip_chars_end(s: &[Column]) -> PolarsResult<Column> {344_check_same_length(s, "strip_chars_end")?;345let ca = s[0].str()?;346let pat_s = &s[1];347ca.strip_chars_end(pat_s).map(|ok| ok.into_column())348}349350pub(super) fn strip_prefix(s: &[Column]) -> PolarsResult<Column> {351_check_same_length(s, "strip_prefix")?;352let ca = s[0].str()?;353let prefix = s[1].str()?;354Ok(ca.strip_prefix(prefix).into_column())355}356357pub(super) fn strip_suffix(s: &[Column]) -> PolarsResult<Column> {358_check_same_length(s, "strip_suffix")?;359let ca = s[0].str()?;360let suffix = s[1].str()?;361Ok(ca.strip_suffix(suffix).into_column())362}363364pub(super) fn extract_all(args: &[Column]) -> PolarsResult<Column> {365let s = &args[0];366let pat = &args[1];367368let ca = s.str()?;369let pat = pat.str()?;370371if pat.len() == 1 {372if let Some(pat) = pat.get(0) {373ca.extract_all(pat).map(|ca| ca.into_column())374} else {375Ok(Column::full_null(376ca.name().clone(),377ca.len(),378&DataType::List(Box::new(DataType::String)),379))380}381} else {382ca.extract_all_many(pat).map(|ca| ca.into_column())383}384}385386pub(super) fn count_matches(args: &[Column], literal: bool) -> PolarsResult<Column> {387let s = &args[0];388let pat = &args[1];389390let ca = s.str()?;391let pat = pat.str()?;392if pat.len() == 1 {393if let Some(pat) = pat.get(0) {394ca.count_matches(pat, literal).map(|ca| ca.into_column())395} else {396Ok(Column::full_null(397ca.name().clone(),398ca.len(),399&DataType::UInt32,400))401}402} else {403ca.count_matches_many(pat, literal)404.map(|ca| ca.into_column())405}406}407408#[cfg(feature = "temporal")]409pub(super) fn strptime(410s: &[Column],411dtype: DataType,412options: &StrptimeOptions,413) -> PolarsResult<Column> {414match dtype {415#[cfg(feature = "dtype-date")]416DataType::Date => to_date(&s[0], options),417#[cfg(feature = "dtype-datetime")]418DataType::Datetime(time_unit, time_zone) => {419to_datetime(s, &time_unit, time_zone.as_ref(), options)420},421#[cfg(feature = "dtype-time")]422DataType::Time => to_time(&s[0], options),423dt => polars_bail!(ComputeError: "not implemented for dtype {}", dt),424}425}426427#[cfg(feature = "dtype-struct")]428pub(super) fn split_exact(s: &[Column], n: usize, inclusive: bool) -> PolarsResult<Column> {429let ca = s[0].str()?;430let by = s[1].str()?;431432if inclusive {433ca.split_exact_inclusive(by, n).map(|ca| ca.into_column())434} else {435ca.split_exact(by, n).map(|ca| ca.into_column())436}437}438439#[cfg(feature = "dtype-struct")]440pub(super) fn splitn(s: &[Column], n: usize) -> PolarsResult<Column> {441let ca = s[0].str()?;442let by = s[1].str()?;443444ca.splitn(by, n).map(|ca| ca.into_column())445}446447pub(super) fn split(s: &[Column], inclusive: bool) -> PolarsResult<Column> {448let ca = s[0].str()?;449let by = s[1].str()?;450451if inclusive {452Ok(ca.split_inclusive(by)?.into_column())453} else {454Ok(ca.split(by)?.into_column())455}456}457458#[cfg(feature = "regex")]459pub(super) fn split_regex(s: &[Column], inclusive: bool, strict: bool) -> PolarsResult<Column> {460let ca = s[0].str()?;461let by = s[1].str()?;462463let out = split_regex_helper(ca, by, inclusive, strict)?;464Ok(out.into_column())465}466467#[cfg(feature = "dtype-date")]468fn to_date(s: &Column, options: &StrptimeOptions) -> PolarsResult<Column> {469let ca = s.str()?;470let out = {471if options.exact {472ca.as_date(options.format.as_deref(), options.cache)?473.into_column()474} else {475ca.as_date_not_exact(options.format.as_deref())?476.into_column()477}478};479480if options.strict && ca.null_count() != out.null_count() {481handle_casting_failures(s.as_materialized_series(), out.as_materialized_series())?;482}483Ok(out.into_column())484}485486#[cfg(feature = "dtype-datetime")]487fn to_datetime(488s: &[Column],489time_unit: &TimeUnit,490time_zone: Option<&TimeZone>,491options: &StrptimeOptions,492) -> PolarsResult<Column> {493let datetime_strings = &s[0].str()?;494let ambiguous = &s[1].str()?;495496polars_ensure!(497datetime_strings.len() == ambiguous.len()498|| datetime_strings.len() == 1499|| ambiguous.len() == 1,500length_mismatch = "str.strptime",501datetime_strings.len(),502ambiguous.len()503);504505let tz_aware = match &options.format {506#[cfg(all(feature = "regex", feature = "timezones"))]507Some(format) => polars_plan::plans::TZ_AWARE_RE.is_match(format),508_ => false,509};510511let out = if options.exact {512datetime_strings513.as_datetime(514options.format.as_deref(),515*time_unit,516options.cache,517tz_aware,518time_zone,519ambiguous,520)?521.into_column()522} else {523datetime_strings524.as_datetime_not_exact(525options.format.as_deref(),526*time_unit,527tz_aware,528time_zone,529ambiguous,530true,531)?532.into_column()533};534535if options.strict && datetime_strings.null_count() != out.null_count() {536handle_casting_failures(s[0].as_materialized_series(), out.as_materialized_series())?;537}538Ok(out.into_column())539}540541#[cfg(feature = "dtype-time")]542fn to_time(s: &Column, options: &StrptimeOptions) -> PolarsResult<Column> {543polars_ensure!(544options.exact, ComputeError: "non-exact not implemented for Time data type"545);546547let ca = s.str()?;548let out = ca549.as_time(options.format.as_deref(), options.cache)?550.into_column();551552if options.strict && ca.null_count() != out.null_count() {553handle_casting_failures(s.as_materialized_series(), out.as_materialized_series())?;554}555Ok(out.into_column())556}557558#[cfg(feature = "concat_str")]559pub(super) fn join(s: &Column, delimiter: &str, ignore_nulls: bool) -> PolarsResult<Column> {560let str_s = s.cast(&DataType::String)?;561let joined = polars_ops::chunked_array::str_join(str_s.str()?, delimiter, ignore_nulls);562Ok(joined.into_column())563}564565#[cfg(feature = "concat_str")]566pub(super) fn concat_hor(567series: &[Column],568delimiter: &str,569ignore_nulls: bool,570) -> PolarsResult<Column> {571let str_series: Vec<_> = series572.iter()573.map(|s| s.cast(&DataType::String))574.collect::<PolarsResult<_>>()?;575let cas: Vec<_> = str_series.iter().map(|s| s.str().unwrap()).collect();576Ok(polars_ops::chunked_array::hor_str_concat(&cas, delimiter, ignore_nulls)?.into_column())577}578579#[cfg(feature = "regex")]580fn get_pat(pat: &StringChunked) -> PolarsResult<&str> {581pat.get(0).ok_or_else(582|| polars_err!(ComputeError: "pattern cannot be 'null' in 'replace' expression"),583)584}585586// used only if feature="regex"587#[allow(dead_code)]588fn iter_and_replace<'a, F>(ca: &'a StringChunked, val: &'a StringChunked, f: F) -> StringChunked589where590F: Fn(&'a str, &'a str) -> Cow<'a, str>,591{592let mut out: StringChunked = ca593.into_iter()594.zip(val)595.map(|(opt_src, opt_val)| match (opt_src, opt_val) {596(Some(src), Some(val)) => Some(f(src, val)),597(Some(src), None) => Some(Cow::from(src)),598_ => None,599})600.collect_trusted();601602out.rename(ca.name().clone());603out604}605606#[cfg(feature = "regex")]607fn is_literal_pat(pat: &str) -> bool {608pat.chars().all(|c| !c.is_ascii_punctuation())609}610611#[cfg(feature = "regex")]612fn replace_n<'a>(613ca: &'a StringChunked,614pat: &'a StringChunked,615val: &'a StringChunked,616literal: bool,617n: usize,618) -> PolarsResult<StringChunked> {619match (pat.len(), val.len()) {620(1, 1) => {621let pat = get_pat(pat)?;622let Some(val) = val.get(0) else {623return Ok(ca.clone());624};625let literal = literal || is_literal_pat(pat);626627match literal {628true => ca.replace_literal(pat, val, n),629false => {630if n > 1 {631polars_bail!(ComputeError: "regex replacement with 'n > 1' not yet supported")632}633ca.replace(pat, val)634},635}636},637(1, len_val) => {638if n > 1 {639polars_bail!(ComputeError: "multivalue replacement with 'n > 1' not yet supported")640}641642if n == 0 {643return Ok(ca.clone());644};645646// from here on, we know that n == 1647let mut pat = get_pat(pat)?.to_string();648polars_ensure!(649len_val == ca.len(),650ComputeError:651"replacement value length ({}) does not match string column length ({})",652len_val, ca.len(),653);654let lit = is_literal_pat(&pat);655let literal_pat = literal || lit;656657if literal_pat {658pat = escape(&pat)659}660661let reg = polars_utils::regex_cache::compile_regex(&pat)?;662663let f = |s: &'a str, val: &'a str| {664if literal {665reg.replace(s, NoExpand(val))666} else {667reg.replace(s, val)668}669};670671Ok(iter_and_replace(ca, val, f))672},673_ => polars_bail!(674ComputeError: "dynamic pattern length in 'str.replace' expressions is not supported yet"675),676}677}678679#[cfg(feature = "regex")]680fn replace_all<'a>(681ca: &'a StringChunked,682pat: &'a StringChunked,683val: &'a StringChunked,684literal: bool,685) -> PolarsResult<StringChunked> {686match (pat.len(), val.len()) {687(1, 1) => {688let pat = get_pat(pat)?;689let val = val.get(0).ok_or_else(690|| polars_err!(ComputeError: "value cannot be 'null' in 'replace' expression"),691)?;692let literal = literal || is_literal_pat(pat);693694match literal {695true => ca.replace_literal_all(pat, val),696false => ca.replace_all(pat, val),697}698},699(1, len_val) => {700let mut pat = get_pat(pat)?.to_string();701polars_ensure!(702len_val == ca.len(),703ComputeError:704"replacement value length ({}) does not match string column length ({})",705len_val, ca.len(),706);707708let literal_pat = literal || is_literal_pat(&pat);709710if literal_pat {711pat = escape(&pat)712}713714let reg = polars_utils::regex_cache::compile_regex(&pat)?;715716let f = |s: &'a str, val: &'a str| {717// According to the docs for replace_all718// when literal = True then capture groups are ignored.719if literal {720reg.replace_all(s, NoExpand(val))721} else {722reg.replace_all(s, val)723}724};725726Ok(iter_and_replace(ca, val, f))727},728_ => polars_bail!(729ComputeError: "dynamic pattern length in 'str.replace' expressions is not supported yet"730),731}732}733734pub(super) fn format(s: &mut [Column], format: &str, insertions: &[usize]) -> PolarsResult<Column> {735polars_ops::series::str_format(s, format, insertions)736}737738#[cfg(feature = "regex")]739pub(super) fn replace(s: &[Column], literal: bool, n: i64) -> PolarsResult<Column> {740let column = &s[0];741let pat = &s[1];742let val = &s[2];743let all = n < 0;744745let column = column.str()?;746let pat = pat.str()?;747let val = val.str()?;748749if all {750replace_all(column, pat, val, literal)751} else {752replace_n(column, pat, val, literal, n as usize)753}754.map(|ca| ca.into_column())755}756757#[cfg(feature = "string_normalize")]758pub(super) fn normalize(759s: &Column,760form: polars_ops::prelude::UnicodeForm,761) -> PolarsResult<Column> {762let ca = s.str()?;763Ok(ca.str_normalize(form).into_column())764}765766#[cfg(feature = "string_reverse")]767pub(super) fn reverse(s: &Column) -> PolarsResult<Column> {768let ca = s.str()?;769Ok(ca.str_reverse().into_column())770}771772#[cfg(feature = "string_to_integer")]773pub(super) fn to_integer(774s: &[Column],775dtype: Option<DataType>,776strict: bool,777) -> PolarsResult<Column> {778let ca = s[0].str()?;779let base = s[1].strict_cast(&DataType::UInt32)?;780ca.to_integer(base.u32()?, dtype, strict)781.map(|ok| ok.into_column())782}783784fn _ensure_lengths(s: &[Column]) -> bool {785// Calculate the post-broadcast length and ensure everything is consistent.786let len = s787.iter()788.map(|series| series.len())789.filter(|l| *l != 1)790.max()791.unwrap_or(1);792s.iter()793.all(|series| series.len() == 1 || series.len() == len)794}795796fn _check_same_length(s: &[Column], fn_name: &str) -> Result<(), PolarsError> {797polars_ensure!(798_ensure_lengths(s),799ShapeMismatch: "all series in `str.{}()` should have equal or unit length",800fn_name801);802Ok(())803}804805pub(super) fn str_slice(s: &[Column]) -> PolarsResult<Column> {806_check_same_length(s, "slice")?;807let ca = s[0].str()?;808let offset = &s[1];809let length = &s[2];810Ok(ca.str_slice(offset, length)?.into_column())811}812813pub(super) fn str_head(s: &[Column]) -> PolarsResult<Column> {814_check_same_length(s, "head")?;815let ca = s[0].str()?;816let n = &s[1];817Ok(ca.str_head(n)?.into_column())818}819820pub(super) fn str_tail(s: &[Column]) -> PolarsResult<Column> {821_check_same_length(s, "tail")?;822let ca = s[0].str()?;823let n = &s[1];824Ok(ca.str_tail(n)?.into_column())825}826827#[cfg(feature = "string_encoding")]828pub(super) fn hex_encode(s: &Column) -> PolarsResult<Column> {829Ok(s.str()?.hex_encode().into_column())830}831832#[cfg(feature = "binary_encoding")]833pub(super) fn hex_decode(s: &Column, strict: bool) -> PolarsResult<Column> {834s.str()?.hex_decode(strict).map(|ca| ca.into_column())835}836837#[cfg(feature = "string_encoding")]838pub(super) fn base64_encode(s: &Column) -> PolarsResult<Column> {839Ok(s.str()?.base64_encode().into_column())840}841842#[cfg(feature = "binary_encoding")]843pub(super) fn base64_decode(s: &Column, strict: bool) -> PolarsResult<Column> {844s.str()?.base64_decode(strict).map(|ca| ca.into_column())845}846847#[cfg(feature = "dtype-decimal")]848pub(super) fn to_decimal(s: &Column, scale: usize) -> PolarsResult<Column> {849let ca = s.str()?;850ca.to_decimal(polars_compute::decimal::DEC128_MAX_PREC, scale)851.map(Column::from)852}853854#[cfg(feature = "extract_jsonpath")]855pub(super) fn json_decode(s: &Column, dtype: DataType) -> PolarsResult<Column> {856use polars_ops::prelude::Utf8JsonPathImpl;857858let ca = s.str()?;859ca.json_decode(Some(dtype), None).map(Column::from)860}861862#[cfg(feature = "extract_jsonpath")]863pub(super) fn json_path_match(s: &[Column]) -> PolarsResult<Column> {864use polars_ops::prelude::Utf8JsonPathImpl;865866_check_same_length(s, "json_path_match")?;867let ca = s[0].str()?;868let pat = s[1].str()?;869Ok(ca.json_path_match(pat)?.into_column())870}871872#[cfg(feature = "regex")]873pub(super) fn escape_regex(s: &Column) -> PolarsResult<Column> {874let ca = s.str()?;875Ok(ca.str_escape_regex().into_column())876}877878879