Path: blob/main/crates/polars-compute/src/cast/temporal.rs
6939 views
use arrow::array::{PrimitiveArray, Utf8ViewArray};1use arrow::datatypes::{ArrowDataType, TimeUnit};2pub use arrow::temporal_conversions::{3EPOCH_DAYS_FROM_CE, MICROSECONDS, MICROSECONDS_IN_DAY, MILLISECONDS, MILLISECONDS_IN_DAY,4NANOSECONDS, NANOSECONDS_IN_DAY, SECONDS_IN_DAY,5};6use arrow::temporal_conversions::{parse_offset, parse_offset_tz};7use chrono::format::{Parsed, StrftimeItems};8use polars_error::PolarsResult;9use polars_utils::pl_str::PlSmallStr;1011/// Get the time unit as a multiple of a second12pub const fn time_unit_multiple(unit: TimeUnit) -> i64 {13match unit {14TimeUnit::Second => 1,15TimeUnit::Millisecond => MILLISECONDS,16TimeUnit::Microsecond => MICROSECONDS,17TimeUnit::Nanosecond => NANOSECONDS,18}19}2021fn chrono_tz_utf_to_timestamp(22array: &Utf8ViewArray,23fmt: &str,24time_zone: PlSmallStr,25time_unit: TimeUnit,26) -> PolarsResult<PrimitiveArray<i64>> {27let tz = parse_offset_tz(time_zone.as_str())?;28Ok(utf8view_to_timestamp_impl(29array, fmt, time_zone, tz, time_unit,30))31}3233fn utf8view_to_timestamp_impl<T: chrono::TimeZone>(34array: &Utf8ViewArray,35fmt: &str,36time_zone: PlSmallStr,37tz: T,38time_unit: TimeUnit,39) -> PrimitiveArray<i64> {40let iter = array41.iter()42.map(|x| x.and_then(|x| utf8_to_timestamp_scalar(x, fmt, &tz, &time_unit)));4344PrimitiveArray::from_trusted_len_iter(iter)45.to(ArrowDataType::Timestamp(time_unit, Some(time_zone)))46}4748/// Parses `value` to `Option<i64>` consistent with the Arrow's definition of timestamp with timezone.49///50/// `tz` must be built from `timezone` (either via [`parse_offset`] or `chrono-tz`).51/// Returns in scale `tz` of `TimeUnit`.52#[inline]53pub fn utf8_to_timestamp_scalar<T: chrono::TimeZone>(54value: &str,55fmt: &str,56tz: &T,57tu: &TimeUnit,58) -> Option<i64> {59let mut parsed = Parsed::new();60let fmt = StrftimeItems::new(fmt);61let r = chrono::format::parse(&mut parsed, value, fmt).ok();62if r.is_some() {63parsed64.to_datetime()65.map(|x| x.naive_utc())66.map(|x| tz.from_utc_datetime(&x))67.map(|x| match tu {68TimeUnit::Second => x.timestamp(),69TimeUnit::Millisecond => x.timestamp_millis(),70TimeUnit::Microsecond => x.timestamp_micros(),71TimeUnit::Nanosecond => x.timestamp_nanos_opt().unwrap(),72})73.ok()74} else {75None76}77}7879/// Parses a [`Utf8Array`] to a timeozone-aware timestamp, i.e. [`PrimitiveArray<i64>`] with type `Timestamp(Nanosecond, Some(timezone))`.80///81/// # Implementation82///83/// * parsed values with timezone other than `timezone` are converted to `timezone`.84/// * parsed values without timezone are null. Use [`utf8_to_naive_timestamp`] to parse naive timezones.85/// * Null elements remain null; non-parsable elements are null.86///87/// The feature `"chrono-tz"` enables IANA and zoneinfo formats for `timezone`.88///89/// # Error90///91/// This function errors iff `timezone` is not parsable to an offset.92pub(crate) fn utf8view_to_timestamp(93array: &Utf8ViewArray,94fmt: &str,95time_zone: PlSmallStr,96time_unit: TimeUnit,97) -> PolarsResult<PrimitiveArray<i64>> {98let tz = parse_offset(time_zone.as_str());99100if let Ok(tz) = tz {101Ok(utf8view_to_timestamp_impl(102array, fmt, time_zone, tz, time_unit,103))104} else {105chrono_tz_utf_to_timestamp(array, fmt, time_zone, time_unit)106}107}108109/// Parses a [`Utf8Array`] to naive timestamp, i.e.110/// [`PrimitiveArray<i64>`] with type `Timestamp(Nanosecond, None)`.111/// Timezones are ignored.112/// Null elements remain null; non-parsable elements are set to null.113pub(crate) fn utf8view_to_naive_timestamp(114array: &Utf8ViewArray,115fmt: &str,116time_unit: TimeUnit,117) -> PrimitiveArray<i64> {118let iter = array119.iter()120.map(|x| x.and_then(|x| utf8_to_naive_timestamp_scalar(x, fmt, &time_unit)));121122PrimitiveArray::from_trusted_len_iter(iter).to(ArrowDataType::Timestamp(time_unit, None))123}124125/// Parses `value` to `Option<i64>` consistent with the Arrow's definition of timestamp without timezone.126/// Returns in scale `tz` of `TimeUnit`.127#[inline]128pub fn utf8_to_naive_timestamp_scalar(value: &str, fmt: &str, tu: &TimeUnit) -> Option<i64> {129let fmt = StrftimeItems::new(fmt);130let mut parsed = Parsed::new();131chrono::format::parse(&mut parsed, value, fmt.clone()).ok();132parsed133.to_naive_datetime_with_offset(0)134.map(|x| match tu {135TimeUnit::Second => x.and_utc().timestamp(),136TimeUnit::Millisecond => x.and_utc().timestamp_millis(),137TimeUnit::Microsecond => x.and_utc().timestamp_micros(),138TimeUnit::Nanosecond => x.and_utc().timestamp_nanos_opt().unwrap(),139})140.ok()141}142143144