Path: blob/main/crates/polars-core/src/series/any_value.rs
8431 views
use std::fmt::Write;12use arrow::bitmap::MutableBitmap;3use num_traits::AsPrimitive;4use polars_compute::cast::SerPrimitive;56#[cfg(feature = "dtype-categorical")]7use crate::chunked_array::builder::CategoricalChunkedBuilder;8use crate::chunked_array::builder::{AnonymousOwnedListBuilder, get_list_builder};9use crate::prelude::*;10use crate::utils::any_values_to_supertype;1112impl<'a, T: AsRef<[AnyValue<'a>]>> NamedFrom<T, [AnyValue<'a>]> for Series {13/// Construct a new [`Series`] from a collection of [`AnyValue`].14///15/// # Panics16///17/// Panics if the values do not all share the same data type (with the exception18/// of [`DataType::Null`], which is always allowed).19///20/// [`AnyValue`]: crate::datatypes::AnyValue21fn new(name: PlSmallStr, values: T) -> Self {22let values = values.as_ref();23Series::from_any_values(name, values, true).expect("data types of values should match")24}25}2627impl Series {28/// Construct a new [`Series`] from a slice of AnyValues.29///30/// The data type of the resulting Series is determined by the `values`31/// and the `strict` parameter:32/// - If `strict` is `true`, the data type is equal to the data type of the33/// first non-null value. If any other non-null values do not match this34/// data type, an error is raised. If the first non-null value is a35/// decimal the slice is scanned for the maximum precision and scale possible.36/// - If `strict` is `false`, the data type is the supertype of the `values`.37/// An error is returned if no supertype can be determined.38/// **WARNING**: A full pass over the values is required to determine the supertype.39/// - If no values were passed, the resulting data type is `Null`.40pub fn from_any_values(41name: PlSmallStr,42values: &[AnyValue],43strict: bool,44) -> PolarsResult<Self> {45fn get_first_non_null_dtype(values: &[AnyValue]) -> DataType {46let mut all_flat_null = true;47let first_non_null = values.iter().find(|av| {48if !av.is_null() {49all_flat_null = false50};51!av.is_nested_null()52});53match first_non_null {54Some(av) => av.dtype(),55None => {56if all_flat_null {57DataType::Null58} else {59// Second pass to check for the nested null value that60// toggled `all_flat_null` to false, e.g. a List(Null).61let first_nested_null = values.iter().find(|av| !av.is_null()).unwrap();62first_nested_null.dtype()63}64},65}66}67let dtype = if strict {68match get_first_non_null_dtype(values) {69#[cfg(feature = "dtype-decimal")]70DataType::Decimal(mut prec, mut scale) => {71for v in values {72if let DataType::Decimal(p, s) = v.dtype() {73prec = prec.max(p);74scale = scale.max(s);75}76}77DataType::Decimal(prec, scale)78},79dt => dt,80}81} else {82any_values_to_supertype(values)?83};8485Self::from_any_values_and_dtype(name, values, &dtype, strict)86}8788/// Construct a new [`Series`] with the given `dtype` from a slice of AnyValues.89///90/// If `strict` is `true`, an error is returned if the values do not match the given91/// data type. If `strict` is `false`, values that do not match the given data type92/// are cast. If casting is not possible, the values are set to null instead.93pub fn from_any_values_and_dtype(94name: PlSmallStr,95values: &[AnyValue],96dtype: &DataType,97strict: bool,98) -> PolarsResult<Self> {99if values.is_empty() {100return Ok(Self::new_empty(name, dtype));101}102103let mut s = match dtype {104#[cfg(feature = "dtype-i8")]105DataType::Int8 => any_values_to_integer::<Int8Type>(values, strict)?.into_series(),106#[cfg(feature = "dtype-i16")]107DataType::Int16 => any_values_to_integer::<Int16Type>(values, strict)?.into_series(),108DataType::Int32 => any_values_to_integer::<Int32Type>(values, strict)?.into_series(),109DataType::Int64 => any_values_to_integer::<Int64Type>(values, strict)?.into_series(),110#[cfg(feature = "dtype-i128")]111DataType::Int128 => any_values_to_integer::<Int128Type>(values, strict)?.into_series(),112#[cfg(feature = "dtype-u8")]113DataType::UInt8 => any_values_to_integer::<UInt8Type>(values, strict)?.into_series(),114#[cfg(feature = "dtype-u16")]115DataType::UInt16 => any_values_to_integer::<UInt16Type>(values, strict)?.into_series(),116DataType::UInt32 => any_values_to_integer::<UInt32Type>(values, strict)?.into_series(),117DataType::UInt64 => any_values_to_integer::<UInt64Type>(values, strict)?.into_series(),118#[cfg(feature = "dtype-u128")]119DataType::UInt128 => {120any_values_to_integer::<UInt128Type>(values, strict)?.into_series()121},122#[cfg(feature = "dtype-f16")]123DataType::Float16 => any_values_to_f16(values, strict)?.into_series(),124DataType::Float32 => any_values_to_f32(values, strict)?.into_series(),125DataType::Float64 => any_values_to_f64(values, strict)?.into_series(),126DataType::Boolean => any_values_to_bool(values, strict)?.into_series(),127DataType::String => any_values_to_string(values, strict)?.into_series(),128DataType::Binary => any_values_to_binary(values, strict)?.into_series(),129DataType::BinaryOffset => any_values_to_binary_offset(values, strict)?.into_series(),130#[cfg(feature = "dtype-date")]131DataType::Date => any_values_to_date(values, strict)?.into_series(),132#[cfg(feature = "dtype-time")]133DataType::Time => any_values_to_time(values, strict)?.into_series(),134#[cfg(feature = "dtype-datetime")]135DataType::Datetime(tu, tz) => {136any_values_to_datetime(values, *tu, (*tz).clone(), strict)?.into_series()137},138#[cfg(feature = "dtype-duration")]139DataType::Duration(tu) => any_values_to_duration(values, *tu, strict)?.into_series(),140#[cfg(feature = "dtype-categorical")]141dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => {142any_values_to_categorical(values, dt, strict)?143},144#[cfg(feature = "dtype-decimal")]145DataType::Decimal(precision, scale) => {146any_values_to_decimal(values, *precision, *scale, strict)?.into_series()147},148#[cfg(feature = "dtype-extension")]149DataType::Extension(typ, storage) => {150Series::from_any_values_and_dtype(name.clone(), values, storage, strict)?151.into_extension(typ.clone())152},153DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(),154#[cfg(feature = "dtype-array")]155DataType::Array(inner, size) => any_values_to_array(values, inner, strict, *size)?156.into_series()157.cast(&DataType::Array(inner.clone(), *size))?,158#[cfg(feature = "dtype-struct")]159DataType::Struct(fields) => any_values_to_struct(values, fields, strict)?,160#[cfg(feature = "object")]161DataType::Object(_) => any_values_to_object(values)?,162DataType::Null => Series::new_null(PlSmallStr::EMPTY, values.len()),163dt => {164polars_bail!(165InvalidOperation:166"constructing a Series with data type {dt:?} from AnyValues is not supported"167)168},169};170s.rename(name);171Ok(s)172}173}174175fn any_values_to_primitive_nonstrict<T: PolarsNumericType>(values: &[AnyValue]) -> ChunkedArray<T> {176values177.iter()178.map(|av| av.extract::<T::Native>())179.collect_trusted()180}181182fn any_values_to_integer<T: PolarsIntegerType>(183values: &[AnyValue],184strict: bool,185) -> PolarsResult<ChunkedArray<T>> {186fn any_values_to_integer_strict<T: PolarsIntegerType>(187values: &[AnyValue],188) -> PolarsResult<ChunkedArray<T>> {189let mut builder = PrimitiveChunkedBuilder::<T>::new(PlSmallStr::EMPTY, values.len());190for av in values {191match &av {192av if av.is_integer() => {193let opt_val = av.extract::<T::Native>();194let val = match opt_val {195Some(v) => v,196None => return Err(invalid_value_error(&T::get_static_dtype(), av)),197};198builder.append_value(val)199},200AnyValue::Null => builder.append_null(),201av => return Err(invalid_value_error(&T::get_static_dtype(), av)),202}203}204Ok(builder.finish())205}206207if strict {208any_values_to_integer_strict::<T>(values)209} else {210Ok(any_values_to_primitive_nonstrict::<T>(values))211}212}213214#[cfg(feature = "dtype-f16")]215fn any_values_to_f16(values: &[AnyValue], strict: bool) -> PolarsResult<Float16Chunked> {216fn any_values_to_f16_strict(values: &[AnyValue]) -> PolarsResult<Float16Chunked> {217let mut builder =218PrimitiveChunkedBuilder::<Float16Type>::new(PlSmallStr::EMPTY, values.len());219for av in values {220match av {221AnyValue::Float16(i) => builder.append_value(*i),222AnyValue::Null => builder.append_null(),223av => return Err(invalid_value_error(&DataType::Float16, av)),224}225}226Ok(builder.finish())227}228if strict {229any_values_to_f16_strict(values)230} else {231Ok(any_values_to_primitive_nonstrict::<Float16Type>(values))232}233}234235fn any_values_to_f32(values: &[AnyValue], strict: bool) -> PolarsResult<Float32Chunked> {236fn any_values_to_f32_strict(values: &[AnyValue]) -> PolarsResult<Float32Chunked> {237let mut builder =238PrimitiveChunkedBuilder::<Float32Type>::new(PlSmallStr::EMPTY, values.len());239for av in values {240match av {241AnyValue::Float32(i) => builder.append_value(*i),242AnyValue::Float16(i) => builder.append_value(i.as_()),243AnyValue::Null => builder.append_null(),244av => return Err(invalid_value_error(&DataType::Float32, av)),245}246}247Ok(builder.finish())248}249if strict {250any_values_to_f32_strict(values)251} else {252Ok(any_values_to_primitive_nonstrict::<Float32Type>(values))253}254}255fn any_values_to_f64(values: &[AnyValue], strict: bool) -> PolarsResult<Float64Chunked> {256fn any_values_to_f64_strict(values: &[AnyValue]) -> PolarsResult<Float64Chunked> {257let mut builder =258PrimitiveChunkedBuilder::<Float64Type>::new(PlSmallStr::EMPTY, values.len());259for av in values {260match av {261AnyValue::Float64(i) => builder.append_value(*i),262AnyValue::Float32(i) => builder.append_value(*i as f64),263AnyValue::Float16(i) => builder.append_value(i.as_()),264AnyValue::Null => builder.append_null(),265av => return Err(invalid_value_error(&DataType::Float64, av)),266}267}268Ok(builder.finish())269}270if strict {271any_values_to_f64_strict(values)272} else {273Ok(any_values_to_primitive_nonstrict::<Float64Type>(values))274}275}276277fn any_values_to_bool(values: &[AnyValue], strict: bool) -> PolarsResult<BooleanChunked> {278let mut builder = BooleanChunkedBuilder::new(PlSmallStr::EMPTY, values.len());279for av in values {280match av {281AnyValue::Boolean(b) => builder.append_value(*b),282AnyValue::Null => builder.append_null(),283av => {284if strict {285return Err(invalid_value_error(&DataType::Boolean, av));286}287match av.cast(&DataType::Boolean) {288AnyValue::Boolean(b) => builder.append_value(b),289_ => builder.append_null(),290}291},292}293}294Ok(builder.finish())295}296297fn any_values_to_string(values: &[AnyValue], strict: bool) -> PolarsResult<StringChunked> {298fn any_values_to_string_strict(values: &[AnyValue]) -> PolarsResult<StringChunked> {299let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());300for av in values {301match av {302AnyValue::String(s) => builder.append_value(s),303AnyValue::StringOwned(s) => builder.append_value(s),304AnyValue::Null => builder.append_null(),305av => return Err(invalid_value_error(&DataType::String, av)),306}307}308Ok(builder.finish())309}310fn any_values_to_string_nonstrict(values: &[AnyValue]) -> StringChunked {311fn _write_any_value(av: &AnyValue<'_>, buffer: &mut String) {312match av {313AnyValue::String(s) => buffer.push_str(s),314AnyValue::Float64(f) => {315SerPrimitive::write(unsafe { buffer.as_mut_vec() }, *f);316},317AnyValue::Float32(f) => {318SerPrimitive::write(unsafe { buffer.as_mut_vec() }, *f);319},320#[cfg(feature = "dtype-f16")]321AnyValue::Float16(f) => {322SerPrimitive::write(unsafe { buffer.as_mut_vec() }, *f);323},324#[cfg(feature = "dtype-struct")]325AnyValue::StructOwned(payload) => {326buffer.push('{');327let mut iter = payload.0.iter().peekable();328while let Some(child) = iter.next() {329_write_any_value(child, buffer);330if iter.peek().is_some() {331buffer.push(',')332}333}334buffer.push('}');335},336#[cfg(feature = "dtype-struct")]337AnyValue::Struct(_, _, flds) => {338let mut vals = Vec::with_capacity(flds.len());339av._materialize_struct_av(&mut vals);340341buffer.push('{');342let mut iter = vals.iter().peekable();343while let Some(child) = iter.next() {344_write_any_value(child, buffer);345if iter.peek().is_some() {346buffer.push(',')347}348}349buffer.push('}');350},351#[cfg(feature = "dtype-array")]352AnyValue::Array(vals, _) => {353buffer.push('[');354let mut iter = vals.iter().peekable();355while let Some(child) = iter.next() {356_write_any_value(&child, buffer);357if iter.peek().is_some() {358buffer.push(',');359}360}361buffer.push(']');362},363AnyValue::List(vals) => {364buffer.push('[');365let mut iter = vals.iter().peekable();366while let Some(child) = iter.next() {367_write_any_value(&child, buffer);368if iter.peek().is_some() {369buffer.push(',');370}371}372buffer.push(']');373},374av => {375write!(buffer, "{av}").unwrap();376},377}378}379380let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());381let mut owned = String::new(); // Amortize allocations.382for av in values {383owned.clear();384385match av {386AnyValue::String(s) => builder.append_value(s),387AnyValue::StringOwned(s) => builder.append_value(s),388AnyValue::Null => builder.append_null(),389AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => builder.append_null(),390391// Explicitly convert and dump floating-point values to strings392// to preserve as much precision as possible.393// Using write!(..., "{av}") steps through Display formatting394// which rounds to an arbitrary precision thus losing information.395av => {396_write_any_value(av, &mut owned);397builder.append_value(&owned);398},399}400}401builder.finish()402}403if strict {404any_values_to_string_strict(values)405} else {406Ok(any_values_to_string_nonstrict(values))407}408}409410fn any_values_to_binary(values: &[AnyValue], strict: bool) -> PolarsResult<BinaryChunked> {411fn any_values_to_binary_strict(values: &[AnyValue]) -> PolarsResult<BinaryChunked> {412let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, values.len());413for av in values {414match av {415AnyValue::Binary(s) => builder.append_value(*s),416AnyValue::BinaryOwned(s) => builder.append_value(&**s),417AnyValue::Null => builder.append_null(),418av => return Err(invalid_value_error(&DataType::Binary, av)),419}420}421Ok(builder.finish())422}423fn any_values_to_binary_nonstrict(values: &[AnyValue]) -> BinaryChunked {424values425.iter()426.map(|av| match av {427AnyValue::Binary(b) => Some(*b),428AnyValue::BinaryOwned(b) => Some(&**b),429AnyValue::String(s) => Some(s.as_bytes()),430AnyValue::StringOwned(s) => Some(s.as_bytes()),431_ => None,432})433.collect_trusted()434}435if strict {436any_values_to_binary_strict(values)437} else {438Ok(any_values_to_binary_nonstrict(values))439}440}441442fn any_values_to_binary_offset(443values: &[AnyValue],444strict: bool,445) -> PolarsResult<BinaryOffsetChunked> {446let mut builder = MutableBinaryArray::<i64>::new();447for av in values {448match av {449AnyValue::Binary(s) => builder.push(Some(*s)),450AnyValue::BinaryOwned(s) => builder.push(Some(&**s)),451AnyValue::Null => builder.push_null(),452av => {453if strict {454return Err(invalid_value_error(&DataType::Binary, av));455} else {456builder.push_null();457};458},459}460}461Ok(BinaryOffsetChunked::with_chunk(462Default::default(),463builder.into(),464))465}466467#[cfg(feature = "dtype-date")]468fn any_values_to_date(values: &[AnyValue], strict: bool) -> PolarsResult<DateChunked> {469let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(PlSmallStr::EMPTY, values.len());470for av in values {471match av {472AnyValue::Date(i) => builder.append_value(*i),473AnyValue::Null => builder.append_null(),474av => {475if strict {476return Err(invalid_value_error(&DataType::Date, av));477}478match av.cast(&DataType::Date) {479AnyValue::Date(i) => builder.append_value(i),480_ => builder.append_null(),481}482},483}484}485Ok(builder.finish().into_date())486}487488#[cfg(feature = "dtype-time")]489fn any_values_to_time(values: &[AnyValue], strict: bool) -> PolarsResult<TimeChunked> {490let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());491for av in values {492match av {493AnyValue::Time(i) => builder.append_value(*i),494AnyValue::Null => builder.append_null(),495av => {496if strict {497return Err(invalid_value_error(&DataType::Time, av));498}499match av.cast(&DataType::Time) {500AnyValue::Time(i) => builder.append_value(i),501_ => builder.append_null(),502}503},504}505}506Ok(builder.finish().into_time())507}508509#[cfg(feature = "dtype-datetime")]510fn any_values_to_datetime(511values: &[AnyValue],512time_unit: TimeUnit,513time_zone: Option<TimeZone>,514strict: bool,515) -> PolarsResult<DatetimeChunked> {516let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());517let target_dtype = DataType::Datetime(time_unit, time_zone.clone());518for av in values {519match av {520AnyValue::Datetime(i, tu, _) if *tu == time_unit => builder.append_value(*i),521AnyValue::DatetimeOwned(i, tu, _) if *tu == time_unit => builder.append_value(*i),522AnyValue::Null => builder.append_null(),523av => {524if strict {525return Err(invalid_value_error(&target_dtype, av));526}527match av.cast(&target_dtype) {528AnyValue::Datetime(i, _, _) => builder.append_value(i),529AnyValue::DatetimeOwned(i, _, _) => builder.append_value(i),530_ => builder.append_null(),531}532},533}534}535Ok(builder.finish().into_datetime(time_unit, time_zone))536}537538#[cfg(feature = "dtype-duration")]539fn any_values_to_duration(540values: &[AnyValue],541time_unit: TimeUnit,542strict: bool,543) -> PolarsResult<DurationChunked> {544let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());545let target_dtype = DataType::Duration(time_unit);546for av in values {547match av {548AnyValue::Duration(i, tu) if *tu == time_unit => builder.append_value(*i),549AnyValue::Null => builder.append_null(),550av => {551if strict {552return Err(invalid_value_error(&target_dtype, av));553}554match av.cast(&target_dtype) {555AnyValue::Duration(i, _) => builder.append_value(i),556_ => builder.append_null(),557}558},559}560}561Ok(builder.finish().into_duration(time_unit))562}563564#[cfg(feature = "dtype-categorical")]565fn any_values_to_categorical(566values: &[AnyValue],567dtype: &DataType,568strict: bool,569) -> PolarsResult<Series> {570with_match_categorical_physical_type!(dtype.cat_physical().unwrap(), |$C| {571let mut builder = CategoricalChunkedBuilder::<$C>::new(PlSmallStr::EMPTY, dtype.clone());572573let mut owned = String::new(); // Amortize allocations.574for av in values {575let ret = match av {576AnyValue::String(s) => builder.append_str(s),577AnyValue::StringOwned(s) => builder.append_str(s),578579&AnyValue::Enum(cat, &ref map) |580&AnyValue::EnumOwned(cat, ref map) |581&AnyValue::Categorical(cat, &ref map) |582&AnyValue::CategoricalOwned(cat, ref map) => builder.append_cat(cat, map),583584AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => {585builder.append_null();586Ok(())587},588AnyValue::Null => {589builder.append_null();590Ok(())591}592593av => {594if strict {595return Err(invalid_value_error(&DataType::String, av));596}597598owned.clear();599write!(owned, "{av}").unwrap();600builder.append_str(&owned)601},602};603604if let Err(e) = ret {605if strict {606return Err(e);607} else {608builder.append_null();609}610}611}612613let ca = builder.finish();614Ok(ca.into_series())615})616}617618#[cfg(feature = "dtype-decimal")]619fn any_values_to_decimal(620values: &[AnyValue],621precision: usize,622scale: usize,623strict: bool,624) -> PolarsResult<DecimalChunked> {625let target_dtype = DataType::Decimal(precision, scale);626627let mut builder = PrimitiveChunkedBuilder::<Int128Type>::new(PlSmallStr::EMPTY, values.len());628for av in values {629match av {630// Allow equal or less scale. We do want to support different scales even in 'strict' mode.631AnyValue::Decimal(v, p, s) if *s <= scale => {632if *p <= precision && *s == scale {633builder.append_value(*v)634} else {635match av.strict_cast(&target_dtype) {636Some(AnyValue::Decimal(i, _, _)) => builder.append_value(i),637_ => builder.append_null(),638}639}640},641AnyValue::Null => builder.append_null(),642av => {643if strict {644return Err(invalid_value_error(&target_dtype, av));645}646match av.strict_cast(&target_dtype) {647Some(AnyValue::Decimal(i, _, _)) => builder.append_value(i),648_ => builder.append_null(),649}650},651};652}653654// Build the array and do a precision check if needed.655builder.finish().into_decimal(precision, scale)656}657658fn any_values_to_list(659avs: &[AnyValue],660inner_type: &DataType,661strict: bool,662) -> PolarsResult<ListChunked> {663// GB:664// Lord forgive for the sins I have committed in this function. The amount of strange665// exceptions that need to happen for this to work are insane and I feel like I am going crazy.666//667// This function is essentially a copy of the `<ListChunked as FromIterator>` where it does not668// sample the datatype from the first element and instead we give it explicitly. This allows669// this function to properly assign a datatype if `avs` starts with a `null` value. Previously,670// this was solved by assigning the `dtype` again afterwards, but why? We should not link the671// implementation of these functions. We still need to assign the dtype of the ListArray and672// such, anyways.673//674// Then, `collect_ca_with_dtype` does not possess the necessary exceptions shown in this675// function to use that. I have tried adding the exceptions there and it broke other things. I676// really do feel like this is the simplest solution.677678let mut valid = true;679let capacity = avs.len();680681let ca = match inner_type {682// AnyValues with empty lists in python can create683// Series of an unknown dtype.684// We use the anonymousbuilder without a dtype685// the empty arrays is then not added (we add an extra offset instead)686// the next non-empty series then must have the correct dtype.687DataType::Null => {688let mut builder = AnonymousOwnedListBuilder::new(PlSmallStr::EMPTY, capacity, None);689for av in avs {690match av {691AnyValue::List(b) => builder.append_series(b)?,692AnyValue::Null => builder.append_null(),693_ => {694valid = false;695builder.append_null();696},697}698}699builder.finish()700},701702#[cfg(feature = "object")]703DataType::Object(_) => polars_bail!(nyi = "Nested object types"),704705_ => {706let mut builder =707get_list_builder(inner_type, capacity * 5, capacity, PlSmallStr::EMPTY);708for av in avs {709match av {710AnyValue::List(b) => match b.cast(inner_type) {711Ok(casted) => {712if casted.null_count() != b.null_count() {713valid = !strict;714}715builder.append_series(&casted)?;716},717Err(_) => {718valid = false;719for _ in 0..b.len() {720builder.append_null();721}722},723},724AnyValue::Null => builder.append_null(),725_ => {726valid = false;727builder.append_null()728},729}730}731732builder.finish()733},734};735736if strict && !valid {737polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", DataType::List(Box::new(inner_type.clone())));738}739740Ok(ca)741}742743#[cfg(feature = "dtype-array")]744fn any_values_to_array(745avs: &[AnyValue],746inner_type: &DataType,747strict: bool,748width: usize,749) -> PolarsResult<ArrayChunked> {750fn to_arr(s: &Series) -> Option<ArrayRef> {751if s.chunks().len() > 1 {752let s = s.rechunk();753Some(s.chunks()[0].clone())754} else {755Some(s.chunks()[0].clone())756}757}758759let target_dtype = DataType::Array(Box::new(inner_type.clone()), width);760761// This is handled downstream. The builder will choose the first non null type.762let mut valid = true;763#[allow(unused_mut)]764let mut out: ArrayChunked = if inner_type == &DataType::Null {765avs.iter()766.map(|av| match av {767AnyValue::List(b) | AnyValue::Array(b, _) => to_arr(b),768AnyValue::Null => None,769_ => {770valid = false;771None772},773})774.collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())775}776// Make sure that wrongly inferred AnyValues don't deviate from the datatype.777else {778avs.iter()779.map(|av| match av {780AnyValue::List(b) | AnyValue::Array(b, _) => {781if b.dtype() == inner_type {782to_arr(b)783} else {784let s = match b.cast(inner_type) {785Ok(out) => out,786Err(_) => Series::full_null(b.name().clone(), b.len(), inner_type),787};788to_arr(&s)789}790},791AnyValue::Null => None,792_ => {793valid = false;794None795},796})797.collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())798};799800if strict && !valid {801polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", target_dtype);802}803polars_ensure!(804out.width() == width,805SchemaMismatch: "got mixed size array widths where width {} was expected", width806);807808// Ensure the logical type is correct for nested types.809#[cfg(feature = "dtype-struct")]810if !matches!(inner_type, DataType::Null) && out.inner_dtype().is_nested() {811unsafe {812out.set_dtype(target_dtype);813};814}815816Ok(out)817}818819#[cfg(feature = "dtype-struct")]820fn _any_values_to_struct<'a>(821av_fields: &[Field],822av_values: &[AnyValue<'a>],823field_index: usize,824field: &Field,825fields: &[Field],826field_avs: &mut Vec<AnyValue<'a>>,827) {828// TODO: Optimize.829830let mut append_by_search = || {831// Search for the name.832if let Some(i) = av_fields833.iter()834.position(|av_fld| av_fld.name == field.name)835{836field_avs.push(av_values[i].clone());837return;838}839field_avs.push(AnyValue::Null)840};841842// All fields are available in this single value.843// We can use the index to get value.844if fields.len() == av_fields.len() {845if fields.iter().zip(av_fields.iter()).any(|(l, r)| l != r) {846append_by_search()847} else {848let av_val = av_values849.get(field_index)850.cloned()851.unwrap_or(AnyValue::Null);852field_avs.push(av_val)853}854}855// Not all fields are available, we search the proper field.856else {857// Search for the name.858append_by_search()859}860}861862#[cfg(feature = "dtype-struct")]863fn any_values_to_struct(864values: &[AnyValue],865fields: &[Field],866strict: bool,867) -> PolarsResult<Series> {868// Fast path for structs with no fields.869if fields.is_empty() {870return Ok(871StructChunked::from_series(PlSmallStr::EMPTY, values.len(), [].iter())?.into_series(),872);873}874875// The physical series fields of the struct.876let mut series_fields = Vec::with_capacity(fields.len());877let mut has_outer_validity = false;878let mut field_avs = Vec::with_capacity(values.len());879for (i, field) in fields.iter().enumerate() {880field_avs.clear();881882for av in values.iter() {883match av {884AnyValue::StructOwned(payload) => {885let av_fields = &payload.1;886let av_values = &payload.0;887_any_values_to_struct(av_fields, av_values, i, field, fields, &mut field_avs);888},889AnyValue::Struct(_, _, av_fields) => {890let av_values: Vec<_> = av._iter_struct_av().collect();891_any_values_to_struct(av_fields, &av_values, i, field, fields, &mut field_avs);892},893AnyValue::List(s) if s.len() == fields.len() => {894let av = unsafe { s.get_unchecked(i) };895field_avs.push(av);896},897#[cfg(feature = "dtype-array")]898AnyValue::Array(s, _) if s.len() == fields.len() => {899let av = unsafe { s.get_unchecked(i) };900field_avs.push(av);901},902AnyValue::Null => {903has_outer_validity = true;904field_avs.push(AnyValue::Null)905},906_ => {907if strict {908return Err(invalid_value_error(&DataType::Struct(fields.to_vec()), av));909} else {910has_outer_validity = true;911field_avs.push(AnyValue::Null)912}913},914}915}916// If the inferred dtype is null, we let auto inference work.917let s = if matches!(field.dtype, DataType::Null) {918Series::from_any_values(field.name().clone(), &field_avs, strict)?919} else {920Series::from_any_values_and_dtype(921field.name().clone(),922&field_avs,923&field.dtype,924strict,925)?926};927series_fields.push(s)928}929930let mut out =931StructChunked::from_series(PlSmallStr::EMPTY, values.len(), series_fields.iter())?;932if has_outer_validity {933let mut validity = MutableBitmap::new();934validity.extend_constant(values.len(), true);935for (i, v) in values.iter().enumerate() {936if matches!(v, AnyValue::Null) {937unsafe { validity.set_unchecked(i, false) }938}939}940out.set_outer_validity(Some(validity.freeze()))941}942Ok(out.into_series())943}944945#[cfg(feature = "object")]946fn any_values_to_object(values: &[AnyValue]) -> PolarsResult<Series> {947use crate::chunked_array::object::registry;948let converter = registry::get_object_converter();949let mut builder = registry::get_object_builder(PlSmallStr::EMPTY, values.len());950for av in values {951match av {952AnyValue::Object(val) => builder.append_value(val.as_any()),953AnyValue::Null => builder.append_null(),954_ => {955// This is needed because in Python users can send mixed types.956// This only works if you set a global converter.957let any = converter(av.as_borrowed());958builder.append_value(&*any)959},960}961}962963Ok(builder.to_series())964}965966fn invalid_value_error(dtype: &DataType, value: &AnyValue) -> PolarsError {967polars_err!(968SchemaMismatch:969"unexpected value while building Series of type {:?}; found value of type {:?}: {}",970dtype,971value.dtype(),972value973)974}975976977