Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/from.rs
8443 views
1
use arrow::datatypes::{IntervalUnit, Metadata};
2
use arrow::offset::OffsetsBuffer;
3
#[cfg(any(
4
feature = "dtype-date",
5
feature = "dtype-datetime",
6
feature = "dtype-time",
7
feature = "dtype-duration"
8
))]
9
use arrow::temporal_conversions::*;
10
use arrow::types::months_days_ns;
11
use polars_compute::cast::cast_unchecked as cast;
12
#[cfg(feature = "dtype-decimal")]
13
use polars_compute::decimal::dec128_fits;
14
use polars_error::feature_gated;
15
use polars_utils::itertools::Itertools;
16
17
use crate::chunked_array::cast::{CastOptions, cast_chunks};
18
#[cfg(feature = "object")]
19
use crate::chunked_array::object::extension::polars_extension::PolarsExtension;
20
#[cfg(feature = "object")]
21
use crate::chunked_array::object::registry::get_object_builder;
22
use crate::config::check_allow_importing_interval_as_struct;
23
use crate::prelude::*;
24
25
impl Series {
26
pub fn from_array<A: ParameterFreeDtypeStaticArray>(name: PlSmallStr, array: A) -> Self {
27
unsafe {
28
Self::from_chunks_and_dtype_unchecked(
29
name,
30
vec![Box::new(array)],
31
&DataType::from_arrow_dtype(&A::get_dtype()),
32
)
33
}
34
}
35
36
pub fn from_chunk_and_dtype(
37
name: PlSmallStr,
38
chunk: ArrayRef,
39
dtype: &DataType,
40
) -> PolarsResult<Self> {
41
if &dtype.to_physical().to_arrow(CompatLevel::newest()) != chunk.dtype() {
42
polars_bail!(
43
InvalidOperation: "cannot create a series of type '{dtype}' of arrow chunk with type '{:?}'",
44
chunk.dtype()
45
);
46
}
47
48
// SAFETY: We check that the datatype matches.
49
let series = unsafe { Self::from_chunks_and_dtype_unchecked(name, vec![chunk], dtype) };
50
Ok(series)
51
}
52
53
/// Takes chunks and a polars datatype and constructs the Series.
54
/// This is faster than creating from chunks and an arrow datatype because there is no
55
/// casting involved.
56
///
57
/// # Safety
58
///
59
/// The caller must ensure that the given `dtype`'s physical type matches all the `ArrayRef` dtypes.
60
pub unsafe fn from_chunks_and_dtype_unchecked(
61
name: PlSmallStr,
62
chunks: Vec<ArrayRef>,
63
dtype: &DataType,
64
) -> Self {
65
use DataType::*;
66
match dtype {
67
Int8 => Int8Chunked::from_chunks(name, chunks).into_series(),
68
Int16 => Int16Chunked::from_chunks(name, chunks).into_series(),
69
Int32 => Int32Chunked::from_chunks(name, chunks).into_series(),
70
Int64 => Int64Chunked::from_chunks(name, chunks).into_series(),
71
UInt8 => UInt8Chunked::from_chunks(name, chunks).into_series(),
72
UInt16 => UInt16Chunked::from_chunks(name, chunks).into_series(),
73
UInt32 => UInt32Chunked::from_chunks(name, chunks).into_series(),
74
UInt64 => UInt64Chunked::from_chunks(name, chunks).into_series(),
75
#[cfg(feature = "dtype-i128")]
76
Int128 => Int128Chunked::from_chunks(name, chunks).into_series(),
77
#[cfg(feature = "dtype-u128")]
78
UInt128 => UInt128Chunked::from_chunks(name, chunks).into_series(),
79
#[cfg(feature = "dtype-date")]
80
Date => Int32Chunked::from_chunks(name, chunks)
81
.into_date()
82
.into_series(),
83
#[cfg(feature = "dtype-time")]
84
Time => Int64Chunked::from_chunks(name, chunks)
85
.into_time()
86
.into_series(),
87
#[cfg(feature = "dtype-duration")]
88
Duration(tu) => Int64Chunked::from_chunks(name, chunks)
89
.into_duration(*tu)
90
.into_series(),
91
#[cfg(feature = "dtype-datetime")]
92
Datetime(tu, tz) => Int64Chunked::from_chunks(name, chunks)
93
.into_datetime(*tu, tz.clone())
94
.into_series(),
95
#[cfg(feature = "dtype-decimal")]
96
Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks)
97
.into_decimal_unchecked(*precision, *scale)
98
.into_series(),
99
#[cfg(feature = "dtype-array")]
100
Array(_, _) => {
101
ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
102
.into_series()
103
},
104
List(_) => ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
105
.into_series(),
106
String => StringChunked::from_chunks(name, chunks).into_series(),
107
Binary => BinaryChunked::from_chunks(name, chunks).into_series(),
108
#[cfg(feature = "dtype-categorical")]
109
dt @ (Categorical(_, _) | Enum(_, _)) => {
110
with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
111
let phys = ChunkedArray::from_chunks(name, chunks);
112
CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(phys, dt.clone()).into_series()
113
})
114
},
115
Boolean => BooleanChunked::from_chunks(name, chunks).into_series(),
116
#[cfg(feature = "dtype-f16")]
117
Float16 => Float16Chunked::from_chunks(name, chunks).into_series(),
118
Float32 => Float32Chunked::from_chunks(name, chunks).into_series(),
119
Float64 => Float64Chunked::from_chunks(name, chunks).into_series(),
120
BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(),
121
#[cfg(feature = "dtype-extension")]
122
Extension(typ, storage) => ExtensionChunked::from_storage(
123
typ.clone(),
124
Series::from_chunks_and_dtype_unchecked(name, chunks, storage),
125
)
126
.into_series(),
127
#[cfg(feature = "dtype-struct")]
128
Struct(_) => {
129
let mut ca =
130
StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone());
131
StructChunked::propagate_nulls_mut(&mut ca);
132
ca.into_series()
133
},
134
#[cfg(feature = "object")]
135
Object(_) => {
136
if let Some(arr) = chunks[0].as_any().downcast_ref::<FixedSizeBinaryArray>() {
137
assert_eq!(chunks.len(), 1);
138
// SAFETY:
139
// this is highly unsafe. it will dereference a raw ptr on the heap
140
// make sure the ptr is allocated and from this pid
141
// (the pid is checked before dereference)
142
{
143
let pe = PolarsExtension::new(arr.clone());
144
let s = pe.get_series(&name);
145
pe.take_and_forget();
146
s
147
}
148
} else {
149
unsafe { get_object_builder(name, 0).from_chunks(chunks) }
150
}
151
},
152
Null => new_null(name, &chunks),
153
Unknown(_) => {
154
panic!("dtype is unknown; consider supplying data-types for all operations")
155
},
156
#[allow(unreachable_patterns)]
157
_ => unreachable!(),
158
}
159
}
160
161
/// # Safety
162
/// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
163
pub unsafe fn _try_from_arrow_unchecked(
164
name: PlSmallStr,
165
chunks: Vec<ArrayRef>,
166
dtype: &ArrowDataType,
167
) -> PolarsResult<Self> {
168
Self::_try_from_arrow_unchecked_with_md(name, chunks, dtype, None)
169
}
170
171
/// Create a new Series without checking if the inner dtype of the chunks is correct
172
///
173
/// # Safety
174
/// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
175
pub unsafe fn _try_from_arrow_unchecked_with_md(
176
name: PlSmallStr,
177
mut chunks: Vec<ArrayRef>,
178
dtype: &ArrowDataType,
179
md: Option<&Metadata>,
180
) -> PolarsResult<Self> {
181
match dtype {
182
ArrowDataType::Utf8View => Ok(StringChunked::from_chunks(name, chunks).into_series()),
183
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
184
let chunks =
185
cast_chunks(&chunks, &DataType::String, CastOptions::NonStrict).unwrap();
186
Ok(StringChunked::from_chunks(name, chunks).into_series())
187
},
188
ArrowDataType::BinaryView => Ok(BinaryChunked::from_chunks(name, chunks).into_series()),
189
ArrowDataType::LargeBinary => {
190
if let Some(md) = md {
191
if md.maintain_type() {
192
return Ok(BinaryOffsetChunked::from_chunks(name, chunks).into_series());
193
}
194
}
195
let chunks =
196
cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
197
Ok(BinaryChunked::from_chunks(name, chunks).into_series())
198
},
199
ArrowDataType::Binary => {
200
let chunks =
201
cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
202
Ok(BinaryChunked::from_chunks(name, chunks).into_series())
203
},
204
ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
205
let (chunks, dtype) = to_physical_and_dtype(chunks, md);
206
unsafe {
207
Ok(
208
ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
209
.into_series(),
210
)
211
}
212
},
213
#[cfg(feature = "dtype-array")]
214
ArrowDataType::FixedSizeList(_, _) => {
215
let (chunks, dtype) = to_physical_and_dtype(chunks, md);
216
unsafe {
217
Ok(
218
ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
219
.into_series(),
220
)
221
}
222
},
223
ArrowDataType::Boolean => Ok(BooleanChunked::from_chunks(name, chunks).into_series()),
224
#[cfg(feature = "dtype-u8")]
225
ArrowDataType::UInt8 => Ok(UInt8Chunked::from_chunks(name, chunks).into_series()),
226
#[cfg(feature = "dtype-u16")]
227
ArrowDataType::UInt16 => Ok(UInt16Chunked::from_chunks(name, chunks).into_series()),
228
ArrowDataType::UInt32 => Ok(UInt32Chunked::from_chunks(name, chunks).into_series()),
229
ArrowDataType::UInt64 => Ok(UInt64Chunked::from_chunks(name, chunks).into_series()),
230
ArrowDataType::UInt128 => feature_gated!(
231
"dtype-u128",
232
Ok(UInt128Chunked::from_chunks(name, chunks).into_series())
233
),
234
#[cfg(feature = "dtype-i8")]
235
ArrowDataType::Int8 => Ok(Int8Chunked::from_chunks(name, chunks).into_series()),
236
#[cfg(feature = "dtype-i16")]
237
ArrowDataType::Int16 => Ok(Int16Chunked::from_chunks(name, chunks).into_series()),
238
ArrowDataType::Int32 => Ok(Int32Chunked::from_chunks(name, chunks).into_series()),
239
ArrowDataType::Int64 => Ok(Int64Chunked::from_chunks(name, chunks).into_series()),
240
ArrowDataType::Int128 => feature_gated!(
241
"dtype-i128",
242
Ok(Int128Chunked::from_chunks(name, chunks).into_series())
243
),
244
#[cfg(feature = "dtype-f16")]
245
ArrowDataType::Float16 => {
246
let chunks =
247
cast_chunks(&chunks, &DataType::Float16, CastOptions::NonStrict).unwrap();
248
Ok(Float16Chunked::from_chunks(name, chunks).into_series())
249
},
250
ArrowDataType::Float32 => Ok(Float32Chunked::from_chunks(name, chunks).into_series()),
251
ArrowDataType::Float64 => Ok(Float64Chunked::from_chunks(name, chunks).into_series()),
252
#[cfg(feature = "dtype-date")]
253
ArrowDataType::Date32 => {
254
let chunks =
255
cast_chunks(&chunks, &DataType::Int32, CastOptions::Overflowing).unwrap();
256
Ok(Int32Chunked::from_chunks(name, chunks)
257
.into_date()
258
.into_series())
259
},
260
#[cfg(feature = "dtype-datetime")]
261
ArrowDataType::Date64 => {
262
let chunks =
263
cast_chunks(&chunks, &DataType::Int64, CastOptions::Overflowing).unwrap();
264
let ca = Int64Chunked::from_chunks(name, chunks);
265
Ok(ca.into_datetime(TimeUnit::Milliseconds, None).into_series())
266
},
267
#[cfg(feature = "dtype-datetime")]
268
ArrowDataType::Timestamp(tu, tz) => {
269
let tz = TimeZone::opt_try_new(tz.clone())?;
270
let chunks =
271
cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
272
let s = Int64Chunked::from_chunks(name, chunks)
273
.into_datetime(tu.into(), tz)
274
.into_series();
275
Ok(match tu {
276
ArrowTimeUnit::Second => &s * MILLISECONDS,
277
ArrowTimeUnit::Millisecond => s,
278
ArrowTimeUnit::Microsecond => s,
279
ArrowTimeUnit::Nanosecond => s,
280
})
281
},
282
#[cfg(feature = "dtype-duration")]
283
ArrowDataType::Duration(tu) => {
284
let chunks =
285
cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
286
let s = Int64Chunked::from_chunks(name, chunks)
287
.into_duration(tu.into())
288
.into_series();
289
Ok(match tu {
290
ArrowTimeUnit::Second => &s * MILLISECONDS,
291
ArrowTimeUnit::Millisecond => s,
292
ArrowTimeUnit::Microsecond => s,
293
ArrowTimeUnit::Nanosecond => s,
294
})
295
},
296
#[cfg(feature = "dtype-time")]
297
ArrowDataType::Time64(tu) | ArrowDataType::Time32(tu) => {
298
let mut chunks = chunks;
299
if matches!(dtype, ArrowDataType::Time32(_)) {
300
chunks =
301
cast_chunks(&chunks, &DataType::Int32, CastOptions::NonStrict).unwrap();
302
}
303
let chunks =
304
cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
305
let s = Int64Chunked::from_chunks(name, chunks)
306
.into_time()
307
.into_series();
308
Ok(match tu {
309
ArrowTimeUnit::Second => &s * NANOSECONDS,
310
ArrowTimeUnit::Millisecond => &s * 1_000_000,
311
ArrowTimeUnit::Microsecond => &s * 1_000,
312
ArrowTimeUnit::Nanosecond => s,
313
})
314
},
315
ArrowDataType::Decimal32(precision, scale) => {
316
feature_gated!("dtype-decimal", {
317
polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
318
319
let mut chunks = chunks;
320
for chunk in chunks.iter_mut() {
321
let old_chunk = chunk
322
.as_any_mut()
323
.downcast_mut::<PrimitiveArray<i32>>()
324
.unwrap();
325
326
// For now, we just cast the whole data to i128.
327
let (_, values, validity) = std::mem::take(old_chunk).into_inner();
328
*chunk = PrimitiveArray::new(
329
ArrowDataType::Int128,
330
values.iter().map(|&v| v as i128).collect(),
331
validity,
332
)
333
.to_boxed();
334
}
335
336
let s = Int128Chunked::from_chunks(name, chunks)
337
.into_decimal_unchecked(*precision, *scale)
338
.into_series();
339
Ok(s)
340
})
341
},
342
ArrowDataType::Decimal64(precision, scale) => {
343
feature_gated!("dtype-decimal", {
344
polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
345
346
let mut chunks = chunks;
347
for chunk in chunks.iter_mut() {
348
let old_chunk = chunk
349
.as_any_mut()
350
.downcast_mut::<PrimitiveArray<i64>>()
351
.unwrap();
352
353
// For now, we just cast the whole data to i128.
354
let (_, values, validity) = std::mem::take(old_chunk).into_inner();
355
*chunk = PrimitiveArray::new(
356
ArrowDataType::Int128,
357
values.iter().map(|&v| v as i128).collect(),
358
validity,
359
)
360
.to_boxed();
361
}
362
363
let s = Int128Chunked::from_chunks(name, chunks)
364
.into_decimal_unchecked(*precision, *scale)
365
.into_series();
366
Ok(s)
367
})
368
},
369
ArrowDataType::Decimal(precision, scale) => {
370
feature_gated!("dtype-decimal", {
371
polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
372
373
let mut chunks = chunks;
374
for chunk in chunks.iter_mut() {
375
*chunk = std::mem::take(
376
chunk
377
.as_any_mut()
378
.downcast_mut::<PrimitiveArray<i128>>()
379
.unwrap(),
380
)
381
.to(ArrowDataType::Int128)
382
.to_boxed();
383
}
384
385
let s = Int128Chunked::from_chunks(name, chunks)
386
.into_decimal_unchecked(*precision, *scale)
387
.into_series();
388
Ok(s)
389
})
390
},
391
ArrowDataType::Decimal256(precision, scale) => {
392
feature_gated!("dtype-decimal", {
393
use arrow::types::i256;
394
395
polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?;
396
397
let mut chunks = chunks;
398
for chunk in chunks.iter_mut() {
399
let arr = std::mem::take(
400
chunk
401
.as_any_mut()
402
.downcast_mut::<PrimitiveArray<i256>>()
403
.unwrap(),
404
);
405
let arr_128: PrimitiveArray<i128> = arr.iter().map(|opt_v| {
406
if let Some(v) = opt_v {
407
let smaller: Option<i128> = (*v).try_into().ok();
408
let smaller = smaller.filter(|v| dec128_fits(*v, *precision));
409
smaller.ok_or_else(|| {
410
polars_err!(ComputeError: "Decimal256 to Decimal128 conversion overflowed, Decimal256 is not (yet) supported in Polars")
411
}).map(Some)
412
} else {
413
Ok(None)
414
}
415
}).try_collect_arr_trusted()?;
416
417
*chunk = arr_128.to(ArrowDataType::Int128).to_boxed();
418
}
419
420
let s = Int128Chunked::from_chunks(name, chunks)
421
.into_decimal_unchecked(*precision, *scale)
422
.into_series();
423
Ok(s)
424
})
425
},
426
ArrowDataType::Null => Ok(new_null(name, &chunks)),
427
#[cfg(not(feature = "dtype-categorical"))]
428
ArrowDataType::Dictionary(_, _, _) => {
429
panic!("activate dtype-categorical to convert dictionary arrays")
430
},
431
#[cfg(feature = "dtype-categorical")]
432
ArrowDataType::Dictionary(key_type, _, _) => {
433
let polars_dtype = DataType::from_arrow(chunks[0].dtype(), md);
434
435
let mut series_iter = chunks.into_iter().map(|arr| {
436
import_arrow_dictionary_array(name.clone(), arr, key_type, &polars_dtype)
437
});
438
439
let mut first = series_iter.next().unwrap()?;
440
441
for s in series_iter {
442
first.append_owned(s?)?;
443
}
444
445
Ok(first)
446
},
447
#[cfg(feature = "object")]
448
ArrowDataType::Extension(ext)
449
if ext.name == POLARS_OBJECT_EXTENSION_NAME && ext.metadata.is_some() =>
450
{
451
assert_eq!(chunks.len(), 1);
452
let arr = chunks[0]
453
.as_any()
454
.downcast_ref::<FixedSizeBinaryArray>()
455
.unwrap();
456
// SAFETY:
457
// this is highly unsafe. it will dereference a raw ptr on the heap
458
// make sure the ptr is allocated and from this pid
459
// (the pid is checked before dereference)
460
let s = {
461
let pe = PolarsExtension::new(arr.clone());
462
let s = pe.get_series(&name);
463
pe.take_and_forget();
464
s
465
};
466
Ok(s)
467
},
468
#[cfg(feature = "dtype-extension")]
469
ArrowDataType::Extension(ext) => {
470
use crate::datatypes::extension::get_extension_type_or_storage;
471
472
for chunk in &mut chunks {
473
debug_assert!(
474
chunk.dtype() == dtype,
475
"expected chunk dtype to be {:?}, got {:?}",
476
dtype,
477
chunk.dtype()
478
);
479
*chunk.dtype_mut() = ext.inner.clone();
480
}
481
let storage = Series::_try_from_arrow_unchecked_with_md(
482
name.clone(),
483
chunks,
484
&ext.inner,
485
md,
486
)?;
487
488
Ok(
489
match get_extension_type_or_storage(
490
&ext.name,
491
storage.dtype(),
492
ext.metadata.as_deref(),
493
) {
494
Some(typ) => ExtensionChunked::from_storage(typ, storage).into_series(),
495
None => storage,
496
},
497
)
498
},
499
500
#[cfg(feature = "dtype-struct")]
501
ArrowDataType::Struct(_) => {
502
let (chunks, dtype) = to_physical_and_dtype(chunks, md);
503
504
unsafe {
505
let mut ca =
506
StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype);
507
StructChunked::propagate_nulls_mut(&mut ca);
508
Ok(ca.into_series())
509
}
510
},
511
ArrowDataType::FixedSizeBinary(_) => {
512
let chunks = cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict)?;
513
Ok(BinaryChunked::from_chunks(name, chunks).into_series())
514
},
515
ArrowDataType::Map(field, _is_ordered) => {
516
let struct_arrays = chunks
517
.iter()
518
.map(|arr| {
519
let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
520
arr.field().clone()
521
})
522
.collect::<Vec<_>>();
523
524
let (phys_struct_arrays, dtype) =
525
to_physical_and_dtype(struct_arrays, field.metadata.as_deref());
526
527
let chunks = chunks
528
.iter()
529
.zip(phys_struct_arrays)
530
.map(|(arr, values)| {
531
let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
532
let offsets: &OffsetsBuffer<i32> = arr.offsets();
533
534
let validity = values.validity().cloned();
535
536
Box::from(ListArray::<i64>::new(
537
ListArray::<i64>::default_datatype(values.dtype().clone()),
538
OffsetsBuffer::<i64>::from(offsets),
539
values,
540
validity,
541
)) as ArrayRef
542
})
543
.collect();
544
545
unsafe {
546
let out = ListChunked::from_chunks_and_dtype_unchecked(
547
name,
548
chunks,
549
DataType::List(Box::new(dtype)),
550
);
551
552
Ok(out.into_series())
553
}
554
},
555
ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
556
check_allow_importing_interval_as_struct("month_day_nano_interval")?;
557
558
feature_gated!("dtype-struct", {
559
let chunks = chunks
560
.into_iter()
561
.map(convert_month_day_nano_to_struct)
562
.collect::<PolarsResult<Vec<_>>>()?;
563
564
Ok(StructChunked::from_chunks_and_dtype_unchecked(
565
name,
566
chunks,
567
DataType::_month_days_ns_struct_type(),
568
)
569
.into_series())
570
})
571
},
572
573
dt => polars_bail!(ComputeError: "cannot create series from {:?}", dt),
574
}
575
}
576
}
577
578
fn convert<F: Fn(&dyn Array) -> ArrayRef>(arr: &[ArrayRef], f: F) -> Vec<ArrayRef> {
579
arr.iter().map(|arr| f(&**arr)).collect()
580
}
581
582
/// Converts to physical types and bubbles up the correct [`DataType`].
583
#[allow(clippy::only_used_in_recursion)]
584
unsafe fn to_physical_and_dtype(
585
arrays: Vec<ArrayRef>,
586
md: Option<&Metadata>,
587
) -> (Vec<ArrayRef>, DataType) {
588
match arrays[0].dtype() {
589
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
590
let chunks = cast_chunks(&arrays, &DataType::String, CastOptions::NonStrict).unwrap();
591
(chunks, DataType::String)
592
},
593
ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
594
let chunks = cast_chunks(&arrays, &DataType::Binary, CastOptions::NonStrict).unwrap();
595
(chunks, DataType::Binary)
596
},
597
#[allow(unused_variables)]
598
dt @ ArrowDataType::Dictionary(_, _, _) => {
599
feature_gated!("dtype-categorical", {
600
let s = unsafe {
601
let dt = dt.clone();
602
Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
603
}
604
.unwrap();
605
(s.chunks().clone(), s.dtype().clone())
606
})
607
},
608
dt @ ArrowDataType::Extension(_) => {
609
feature_gated!("dtype-extension", {
610
let s = unsafe {
611
let dt = dt.clone();
612
Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
613
}
614
.unwrap();
615
(s.chunks().clone(), s.dtype().clone())
616
})
617
},
618
ArrowDataType::List(field) => {
619
let out = convert(&arrays, |arr| {
620
cast(arr, &ArrowDataType::LargeList(field.clone())).unwrap()
621
});
622
to_physical_and_dtype(out, md)
623
},
624
#[cfg(feature = "dtype-array")]
625
ArrowDataType::FixedSizeList(field, size) => {
626
let values = arrays
627
.iter()
628
.map(|arr| {
629
let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
630
arr.values().clone()
631
})
632
.collect::<Vec<_>>();
633
634
let (converted_values, dtype) =
635
to_physical_and_dtype(values, field.metadata.as_deref());
636
637
let arrays = arrays
638
.iter()
639
.zip(converted_values)
640
.map(|(arr, values)| {
641
let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
642
643
let dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), *size);
644
Box::from(FixedSizeListArray::new(
645
dtype,
646
arr.len(),
647
values,
648
arr.validity().cloned(),
649
)) as ArrayRef
650
})
651
.collect();
652
(arrays, DataType::Array(Box::new(dtype), *size))
653
},
654
ArrowDataType::LargeList(field) => {
655
let values = arrays
656
.iter()
657
.map(|arr| {
658
let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
659
arr.values().clone()
660
})
661
.collect::<Vec<_>>();
662
663
let (converted_values, dtype) =
664
to_physical_and_dtype(values, field.metadata.as_deref());
665
666
let arrays = arrays
667
.iter()
668
.zip(converted_values)
669
.map(|(arr, values)| {
670
let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
671
672
let dtype = ListArray::<i64>::default_datatype(values.dtype().clone());
673
Box::from(ListArray::<i64>::new(
674
dtype,
675
arr.offsets().clone(),
676
values,
677
arr.validity().cloned(),
678
)) as ArrayRef
679
})
680
.collect();
681
(arrays, DataType::List(Box::new(dtype)))
682
},
683
ArrowDataType::Struct(_fields) => {
684
feature_gated!("dtype-struct", {
685
let mut pl_fields = None;
686
let arrays = arrays
687
.iter()
688
.map(|arr| {
689
let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
690
let (values, dtypes): (Vec<_>, Vec<_>) = arr
691
.values()
692
.iter()
693
.zip(_fields.iter())
694
.map(|(value, field)| {
695
let mut out = to_physical_and_dtype(
696
vec![value.clone()],
697
field.metadata.as_deref(),
698
);
699
(out.0.pop().unwrap(), out.1)
700
})
701
.unzip();
702
703
let arrow_fields = values
704
.iter()
705
.zip(_fields.iter())
706
.map(|(arr, field)| {
707
ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
708
})
709
.collect();
710
let arrow_array = Box::new(StructArray::new(
711
ArrowDataType::Struct(arrow_fields),
712
arr.len(),
713
values,
714
arr.validity().cloned(),
715
)) as ArrayRef;
716
717
if pl_fields.is_none() {
718
pl_fields = Some(
719
_fields
720
.iter()
721
.zip(dtypes)
722
.map(|(field, dtype)| Field::new(field.name.clone(), dtype))
723
.collect_vec(),
724
)
725
}
726
727
arrow_array
728
})
729
.collect_vec();
730
731
(arrays, DataType::Struct(pl_fields.unwrap()))
732
})
733
},
734
// Use Series architecture to convert nested logical types to physical.
735
dt @ (ArrowDataType::Duration(_)
736
| ArrowDataType::Time32(_)
737
| ArrowDataType::Time64(_)
738
| ArrowDataType::Timestamp(_, _)
739
| ArrowDataType::Date32
740
| ArrowDataType::Decimal(_, _)
741
| ArrowDataType::Date64
742
| ArrowDataType::Map(_, _)) => {
743
let dt = dt.clone();
744
let mut s = Series::_try_from_arrow_unchecked(PlSmallStr::EMPTY, arrays, &dt).unwrap();
745
let dtype = s.dtype().clone();
746
(std::mem::take(s.chunks_mut()), dtype)
747
},
748
dt => {
749
let dtype = DataType::from_arrow(dt, md);
750
(arrays, dtype)
751
},
752
}
753
}
754
755
#[cfg(feature = "dtype-categorical")]
756
unsafe fn import_arrow_dictionary_array(
757
name: PlSmallStr,
758
arr: Box<dyn Array>,
759
key_type: &arrow::datatypes::IntegerType,
760
polars_dtype: &DataType,
761
) -> PolarsResult<Series> {
762
use arrow::datatypes::IntegerType as I;
763
764
if matches!(
765
polars_dtype,
766
DataType::Categorical(_, _) | DataType::Enum(_, _)
767
) {
768
macro_rules! unpack_categorical_chunked {
769
($dt:ty) => {{
770
let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
771
let keys = arr.keys();
772
let values = arr.values();
773
let values = cast(&**values, &ArrowDataType::Utf8View)?;
774
let values = values.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
775
with_match_categorical_physical_type!(polars_dtype.cat_physical().unwrap(), |$C| {
776
let ca = CategoricalChunked::<$C>::from_str_iter(
777
name,
778
polars_dtype.clone(),
779
keys.iter().map(|k| {
780
let k: usize = (*k?).try_into().ok()?;
781
values.get(k)
782
}),
783
)?;
784
Ok(ca.into_series())
785
})
786
}};
787
}
788
789
match key_type {
790
I::Int8 => unpack_categorical_chunked!(i8),
791
I::UInt8 => unpack_categorical_chunked!(u8),
792
I::Int16 => unpack_categorical_chunked!(i16),
793
I::UInt16 => unpack_categorical_chunked!(u16),
794
I::Int32 => unpack_categorical_chunked!(i32),
795
I::UInt32 => unpack_categorical_chunked!(u32),
796
I::Int64 => unpack_categorical_chunked!(i64),
797
I::UInt64 => unpack_categorical_chunked!(u64),
798
_ => polars_bail!(
799
ComputeError: "unsupported arrow key type: {key_type:?}"
800
),
801
}
802
} else {
803
macro_rules! unpack_keys_values {
804
($dt:ty) => {{
805
let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
806
let keys = arr.keys();
807
let keys = polars_compute::cast::primitive_to_primitive::<
808
$dt,
809
<IdxType as PolarsNumericType>::Native,
810
>(keys, &IDX_DTYPE.to_arrow(CompatLevel::newest()));
811
(keys, arr.values())
812
}};
813
}
814
815
let (keys, values) = match key_type {
816
I::Int8 => unpack_keys_values!(i8),
817
I::UInt8 => unpack_keys_values!(u8),
818
I::Int16 => unpack_keys_values!(i16),
819
I::UInt16 => unpack_keys_values!(u16),
820
I::Int32 => unpack_keys_values!(i32),
821
I::UInt32 => unpack_keys_values!(u32),
822
I::Int64 => unpack_keys_values!(i64),
823
I::UInt64 => unpack_keys_values!(u64),
824
_ => polars_bail!(
825
ComputeError: "unsupported arrow key type: {key_type:?}"
826
),
827
};
828
829
let values = Series::_try_from_arrow_unchecked_with_md(
830
name,
831
vec![values.clone()],
832
values.dtype(),
833
None,
834
)?;
835
836
values.take(&IdxCa::from_chunks_and_dtype(
837
PlSmallStr::EMPTY,
838
vec![keys.to_boxed()],
839
IDX_DTYPE,
840
))
841
}
842
}
843
844
#[cfg(feature = "dtype-struct")]
845
fn convert_month_day_nano_to_struct(chunk: Box<dyn Array>) -> PolarsResult<Box<dyn Array>> {
846
let arr: &PrimitiveArray<months_days_ns> = chunk.as_any().downcast_ref().unwrap();
847
848
let values: &[months_days_ns] = arr.values();
849
850
let (months_out, days_out, nanoseconds_out): (Vec<i32>, Vec<i32>, Vec<i64>) = values
851
.iter()
852
.map(|x| (x.months(), x.days(), x.ns()))
853
.collect();
854
855
let out = StructArray::new(
856
DataType::_month_days_ns_struct_type()
857
.to_physical()
858
.to_arrow(CompatLevel::newest()),
859
arr.len(),
860
vec![
861
PrimitiveArray::<i32>::from_vec(months_out).boxed(),
862
PrimitiveArray::<i32>::from_vec(days_out).boxed(),
863
PrimitiveArray::<i64>::from_vec(nanoseconds_out).boxed(),
864
],
865
arr.validity().cloned(),
866
);
867
868
Ok(out.boxed())
869
}
870
871
fn check_types(chunks: &[ArrayRef]) -> PolarsResult<ArrowDataType> {
872
let mut chunks_iter = chunks.iter();
873
let dtype: ArrowDataType = chunks_iter
874
.next()
875
.ok_or_else(|| polars_err!(NoData: "expected at least one array-ref"))?
876
.dtype()
877
.clone();
878
879
for chunk in chunks_iter {
880
if chunk.dtype() != &dtype {
881
polars_bail!(
882
ComputeError: "cannot create series from multiple arrays with different types"
883
);
884
}
885
}
886
Ok(dtype)
887
}
888
889
impl Series {
890
pub fn try_new<T>(
891
name: PlSmallStr,
892
data: T,
893
) -> Result<Self, <(PlSmallStr, T) as TryInto<Self>>::Error>
894
where
895
(PlSmallStr, T): TryInto<Self>,
896
{
897
// # TODO
898
// * Remove the TryFrom<tuple> impls in favor of this
899
<(PlSmallStr, T) as TryInto<Self>>::try_into((name, data))
900
}
901
}
902
903
impl TryFrom<(PlSmallStr, Vec<ArrayRef>)> for Series {
904
type Error = PolarsError;
905
906
fn try_from(name_arr: (PlSmallStr, Vec<ArrayRef>)) -> PolarsResult<Self> {
907
let (name, chunks) = name_arr;
908
909
let dtype = check_types(&chunks)?;
910
// SAFETY:
911
// dtype is checked
912
unsafe { Series::_try_from_arrow_unchecked(name, chunks, &dtype) }
913
}
914
}
915
916
impl TryFrom<(PlSmallStr, ArrayRef)> for Series {
917
type Error = PolarsError;
918
919
fn try_from(name_arr: (PlSmallStr, ArrayRef)) -> PolarsResult<Self> {
920
let (name, arr) = name_arr;
921
Series::try_from((name, vec![arr]))
922
}
923
}
924
925
impl TryFrom<(&ArrowField, Vec<ArrayRef>)> for Series {
926
type Error = PolarsError;
927
928
fn try_from(field_arr: (&ArrowField, Vec<ArrayRef>)) -> PolarsResult<Self> {
929
let (field, chunks) = field_arr;
930
let arrow_dt = field.dtype();
931
let dtype = check_types(&chunks)?;
932
let compatible = match (&dtype, arrow_dt) {
933
// See #26174, we don't care about dictionary ordering.
934
(
935
ArrowDataType::Dictionary(int0, inner0, _ord0),
936
ArrowDataType::Dictionary(int1, inner1, _ord1),
937
) => (int0, inner0) == (int1, inner1),
938
(l, r) => l == r,
939
};
940
polars_ensure!(compatible, ComputeError: "Arrow Field dtype does not match the ArrayRef dtypes");
941
942
// SAFETY:
943
// dtype is checked
944
unsafe {
945
Series::_try_from_arrow_unchecked_with_md(
946
field.name.clone(),
947
chunks,
948
&dtype,
949
field.metadata.as_deref(),
950
)
951
}
952
}
953
}
954
955
impl TryFrom<(&ArrowField, ArrayRef)> for Series {
956
type Error = PolarsError;
957
958
fn try_from(field_arr: (&ArrowField, ArrayRef)) -> PolarsResult<Self> {
959
let (field, arr) = field_arr;
960
Series::try_from((field, vec![arr]))
961
}
962
}
963
964
/// Used to convert a [`ChunkedArray`], `&dyn SeriesTrait` and [`Series`]
965
/// into a [`Series`].
966
/// # Safety
967
///
968
/// This trait is marked `unsafe` as the `is_series` return is used
969
/// to transmute to `Series`. This must always return `false` except
970
/// for `Series` structs.
971
pub unsafe trait IntoSeries {
972
fn is_series() -> bool {
973
false
974
}
975
976
fn into_series(self) -> Series
977
where
978
Self: Sized;
979
}
980
981
impl<T> From<ChunkedArray<T>> for Series
982
where
983
T: PolarsDataType,
984
ChunkedArray<T>: IntoSeries,
985
{
986
fn from(ca: ChunkedArray<T>) -> Self {
987
ca.into_series()
988
}
989
}
990
991
#[cfg(feature = "dtype-date")]
992
impl From<DateChunked> for Series {
993
fn from(a: DateChunked) -> Self {
994
a.into_series()
995
}
996
}
997
998
#[cfg(feature = "dtype-datetime")]
999
impl From<DatetimeChunked> for Series {
1000
fn from(a: DatetimeChunked) -> Self {
1001
a.into_series()
1002
}
1003
}
1004
1005
#[cfg(feature = "dtype-duration")]
1006
impl From<DurationChunked> for Series {
1007
fn from(a: DurationChunked) -> Self {
1008
a.into_series()
1009
}
1010
}
1011
1012
#[cfg(feature = "dtype-time")]
1013
impl From<TimeChunked> for Series {
1014
fn from(a: TimeChunked) -> Self {
1015
a.into_series()
1016
}
1017
}
1018
1019
unsafe impl IntoSeries for Arc<dyn SeriesTrait> {
1020
fn into_series(self) -> Series {
1021
Series(self)
1022
}
1023
}
1024
1025
unsafe impl IntoSeries for Series {
1026
fn is_series() -> bool {
1027
true
1028
}
1029
1030
fn into_series(self) -> Series {
1031
self
1032
}
1033
}
1034
1035
fn new_null(name: PlSmallStr, chunks: &[ArrayRef]) -> Series {
1036
let len = chunks.iter().map(|arr| arr.len()).sum();
1037
Series::new_null(name, len)
1038
}
1039
1040