Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/from.rs
6940 views
1
use arrow::datatypes::Metadata;
2
use arrow::offset::OffsetsBuffer;
3
#[cfg(any(
4
feature = "dtype-date",
5
feature = "dtype-datetime",
6
feature = "dtype-time",
7
feature = "dtype-duration"
8
))]
9
use arrow::temporal_conversions::*;
10
use polars_compute::cast::cast_unchecked as cast;
11
use polars_error::feature_gated;
12
use polars_utils::itertools::Itertools;
13
14
use crate::chunked_array::cast::{CastOptions, cast_chunks};
15
#[cfg(feature = "object")]
16
use crate::chunked_array::object::extension::polars_extension::PolarsExtension;
17
#[cfg(feature = "object")]
18
use crate::chunked_array::object::registry::get_object_builder;
19
use crate::prelude::*;
20
21
impl Series {
22
pub fn from_array<A: ParameterFreeDtypeStaticArray>(name: PlSmallStr, array: A) -> Self {
23
unsafe {
24
Self::from_chunks_and_dtype_unchecked(
25
name,
26
vec![Box::new(array)],
27
&DataType::from_arrow_dtype(&A::get_dtype()),
28
)
29
}
30
}
31
32
pub fn from_chunk_and_dtype(
33
name: PlSmallStr,
34
chunk: ArrayRef,
35
dtype: &DataType,
36
) -> PolarsResult<Self> {
37
if &dtype.to_physical().to_arrow(CompatLevel::newest()) != chunk.dtype() {
38
polars_bail!(
39
InvalidOperation: "cannot create a series of type '{dtype}' of arrow chunk with type '{:?}'",
40
chunk.dtype()
41
);
42
}
43
44
// SAFETY: We check that the datatype matches.
45
let series = unsafe { Self::from_chunks_and_dtype_unchecked(name, vec![chunk], dtype) };
46
Ok(series)
47
}
48
49
/// Takes chunks and a polars datatype and constructs the Series
50
/// This is faster than creating from chunks and an arrow datatype because there is no
51
/// casting involved
52
///
53
/// # Safety
54
///
55
/// The caller must ensure that the given `dtype`'s physical type matches all the `ArrayRef` dtypes.
56
pub unsafe fn from_chunks_and_dtype_unchecked(
57
name: PlSmallStr,
58
chunks: Vec<ArrayRef>,
59
dtype: &DataType,
60
) -> Self {
61
use DataType::*;
62
match dtype {
63
Int8 => Int8Chunked::from_chunks(name, chunks).into_series(),
64
Int16 => Int16Chunked::from_chunks(name, chunks).into_series(),
65
Int32 => Int32Chunked::from_chunks(name, chunks).into_series(),
66
Int64 => Int64Chunked::from_chunks(name, chunks).into_series(),
67
UInt8 => UInt8Chunked::from_chunks(name, chunks).into_series(),
68
UInt16 => UInt16Chunked::from_chunks(name, chunks).into_series(),
69
UInt32 => UInt32Chunked::from_chunks(name, chunks).into_series(),
70
UInt64 => UInt64Chunked::from_chunks(name, chunks).into_series(),
71
#[cfg(feature = "dtype-i128")]
72
Int128 => Int128Chunked::from_chunks(name, chunks).into_series(),
73
#[cfg(feature = "dtype-date")]
74
Date => Int32Chunked::from_chunks(name, chunks)
75
.into_date()
76
.into_series(),
77
#[cfg(feature = "dtype-time")]
78
Time => Int64Chunked::from_chunks(name, chunks)
79
.into_time()
80
.into_series(),
81
#[cfg(feature = "dtype-duration")]
82
Duration(tu) => Int64Chunked::from_chunks(name, chunks)
83
.into_duration(*tu)
84
.into_series(),
85
#[cfg(feature = "dtype-datetime")]
86
Datetime(tu, tz) => Int64Chunked::from_chunks(name, chunks)
87
.into_datetime(*tu, tz.clone())
88
.into_series(),
89
#[cfg(feature = "dtype-decimal")]
90
Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks)
91
.into_decimal_unchecked(
92
*precision,
93
scale.unwrap_or_else(|| unreachable!("scale should be set")),
94
)
95
.into_series(),
96
#[cfg(feature = "dtype-array")]
97
Array(_, _) => {
98
ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
99
.into_series()
100
},
101
List(_) => ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone())
102
.into_series(),
103
String => StringChunked::from_chunks(name, chunks).into_series(),
104
Binary => BinaryChunked::from_chunks(name, chunks).into_series(),
105
#[cfg(feature = "dtype-categorical")]
106
dt @ (Categorical(_, _) | Enum(_, _)) => {
107
with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
108
let phys = ChunkedArray::from_chunks(name, chunks);
109
CategoricalChunked::<$C>::from_cats_and_dtype_unchecked(phys, dt.clone()).into_series()
110
})
111
},
112
Boolean => BooleanChunked::from_chunks(name, chunks).into_series(),
113
Float32 => Float32Chunked::from_chunks(name, chunks).into_series(),
114
Float64 => Float64Chunked::from_chunks(name, chunks).into_series(),
115
BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(),
116
#[cfg(feature = "dtype-struct")]
117
Struct(_) => {
118
let mut ca =
119
StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype.clone());
120
StructChunked::propagate_nulls_mut(&mut ca);
121
ca.into_series()
122
},
123
#[cfg(feature = "object")]
124
Object(_) => {
125
if let Some(arr) = chunks[0].as_any().downcast_ref::<FixedSizeBinaryArray>() {
126
assert_eq!(chunks.len(), 1);
127
// SAFETY:
128
// this is highly unsafe. it will dereference a raw ptr on the heap
129
// make sure the ptr is allocated and from this pid
130
// (the pid is checked before dereference)
131
{
132
let pe = PolarsExtension::new(arr.clone());
133
let s = pe.get_series(&name);
134
pe.take_and_forget();
135
s
136
}
137
} else {
138
unsafe { get_object_builder(name, 0).from_chunks(chunks) }
139
}
140
},
141
Null => new_null(name, &chunks),
142
Unknown(_) => {
143
panic!("dtype is unknown; consider supplying data-types for all operations")
144
},
145
#[allow(unreachable_patterns)]
146
_ => unreachable!(),
147
}
148
}
149
150
/// # Safety
151
/// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
152
pub unsafe fn _try_from_arrow_unchecked(
153
name: PlSmallStr,
154
chunks: Vec<ArrayRef>,
155
dtype: &ArrowDataType,
156
) -> PolarsResult<Self> {
157
Self::_try_from_arrow_unchecked_with_md(name, chunks, dtype, None)
158
}
159
160
/// Create a new Series without checking if the inner dtype of the chunks is correct
161
///
162
/// # Safety
163
/// The caller must ensure that the given `dtype` matches all the `ArrayRef` dtypes.
164
pub unsafe fn _try_from_arrow_unchecked_with_md(
165
name: PlSmallStr,
166
chunks: Vec<ArrayRef>,
167
dtype: &ArrowDataType,
168
md: Option<&Metadata>,
169
) -> PolarsResult<Self> {
170
match dtype {
171
ArrowDataType::Utf8View => Ok(StringChunked::from_chunks(name, chunks).into_series()),
172
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
173
let chunks =
174
cast_chunks(&chunks, &DataType::String, CastOptions::NonStrict).unwrap();
175
Ok(StringChunked::from_chunks(name, chunks).into_series())
176
},
177
ArrowDataType::BinaryView => Ok(BinaryChunked::from_chunks(name, chunks).into_series()),
178
ArrowDataType::LargeBinary => {
179
if let Some(md) = md {
180
if md.maintain_type() {
181
return Ok(BinaryOffsetChunked::from_chunks(name, chunks).into_series());
182
}
183
}
184
let chunks =
185
cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
186
Ok(BinaryChunked::from_chunks(name, chunks).into_series())
187
},
188
ArrowDataType::Binary => {
189
let chunks =
190
cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict).unwrap();
191
Ok(BinaryChunked::from_chunks(name, chunks).into_series())
192
},
193
ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
194
let (chunks, dtype) = to_physical_and_dtype(chunks, md);
195
unsafe {
196
Ok(
197
ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
198
.into_series(),
199
)
200
}
201
},
202
#[cfg(feature = "dtype-array")]
203
ArrowDataType::FixedSizeList(_, _) => {
204
let (chunks, dtype) = to_physical_and_dtype(chunks, md);
205
unsafe {
206
Ok(
207
ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype)
208
.into_series(),
209
)
210
}
211
},
212
ArrowDataType::Boolean => Ok(BooleanChunked::from_chunks(name, chunks).into_series()),
213
#[cfg(feature = "dtype-u8")]
214
ArrowDataType::UInt8 => Ok(UInt8Chunked::from_chunks(name, chunks).into_series()),
215
#[cfg(feature = "dtype-u16")]
216
ArrowDataType::UInt16 => Ok(UInt16Chunked::from_chunks(name, chunks).into_series()),
217
ArrowDataType::UInt32 => Ok(UInt32Chunked::from_chunks(name, chunks).into_series()),
218
ArrowDataType::UInt64 => Ok(UInt64Chunked::from_chunks(name, chunks).into_series()),
219
#[cfg(feature = "dtype-i8")]
220
ArrowDataType::Int8 => Ok(Int8Chunked::from_chunks(name, chunks).into_series()),
221
#[cfg(feature = "dtype-i16")]
222
ArrowDataType::Int16 => Ok(Int16Chunked::from_chunks(name, chunks).into_series()),
223
ArrowDataType::Int32 => Ok(Int32Chunked::from_chunks(name, chunks).into_series()),
224
ArrowDataType::Int64 => Ok(Int64Chunked::from_chunks(name, chunks).into_series()),
225
ArrowDataType::Int128 => feature_gated!(
226
"dtype-i128",
227
Ok(Int128Chunked::from_chunks(name, chunks).into_series())
228
),
229
ArrowDataType::Float16 => {
230
let chunks =
231
cast_chunks(&chunks, &DataType::Float32, CastOptions::NonStrict).unwrap();
232
Ok(Float32Chunked::from_chunks(name, chunks).into_series())
233
},
234
ArrowDataType::Float32 => Ok(Float32Chunked::from_chunks(name, chunks).into_series()),
235
ArrowDataType::Float64 => Ok(Float64Chunked::from_chunks(name, chunks).into_series()),
236
#[cfg(feature = "dtype-date")]
237
ArrowDataType::Date32 => {
238
let chunks =
239
cast_chunks(&chunks, &DataType::Int32, CastOptions::Overflowing).unwrap();
240
Ok(Int32Chunked::from_chunks(name, chunks)
241
.into_date()
242
.into_series())
243
},
244
#[cfg(feature = "dtype-datetime")]
245
ArrowDataType::Date64 => {
246
let chunks =
247
cast_chunks(&chunks, &DataType::Int64, CastOptions::Overflowing).unwrap();
248
let ca = Int64Chunked::from_chunks(name, chunks);
249
Ok(ca.into_datetime(TimeUnit::Milliseconds, None).into_series())
250
},
251
#[cfg(feature = "dtype-datetime")]
252
ArrowDataType::Timestamp(tu, tz) => {
253
let tz = TimeZone::opt_try_new(tz.clone())?;
254
let chunks =
255
cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
256
let s = Int64Chunked::from_chunks(name, chunks)
257
.into_datetime(tu.into(), tz)
258
.into_series();
259
Ok(match tu {
260
ArrowTimeUnit::Second => &s * MILLISECONDS,
261
ArrowTimeUnit::Millisecond => s,
262
ArrowTimeUnit::Microsecond => s,
263
ArrowTimeUnit::Nanosecond => s,
264
})
265
},
266
#[cfg(feature = "dtype-duration")]
267
ArrowDataType::Duration(tu) => {
268
let chunks =
269
cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
270
let s = Int64Chunked::from_chunks(name, chunks)
271
.into_duration(tu.into())
272
.into_series();
273
Ok(match tu {
274
ArrowTimeUnit::Second => &s * MILLISECONDS,
275
ArrowTimeUnit::Millisecond => s,
276
ArrowTimeUnit::Microsecond => s,
277
ArrowTimeUnit::Nanosecond => s,
278
})
279
},
280
#[cfg(feature = "dtype-time")]
281
ArrowDataType::Time64(tu) | ArrowDataType::Time32(tu) => {
282
let mut chunks = chunks;
283
if matches!(dtype, ArrowDataType::Time32(_)) {
284
chunks =
285
cast_chunks(&chunks, &DataType::Int32, CastOptions::NonStrict).unwrap();
286
}
287
let chunks =
288
cast_chunks(&chunks, &DataType::Int64, CastOptions::NonStrict).unwrap();
289
let s = Int64Chunked::from_chunks(name, chunks)
290
.into_time()
291
.into_series();
292
Ok(match tu {
293
ArrowTimeUnit::Second => &s * NANOSECONDS,
294
ArrowTimeUnit::Millisecond => &s * 1_000_000,
295
ArrowTimeUnit::Microsecond => &s * 1_000,
296
ArrowTimeUnit::Nanosecond => s,
297
})
298
},
299
ArrowDataType::Decimal32(precision, scale) => {
300
feature_gated!("dtype-decimal", {
301
polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
302
polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
303
304
let mut chunks = chunks;
305
for chunk in chunks.iter_mut() {
306
let old_chunk = chunk
307
.as_any_mut()
308
.downcast_mut::<PrimitiveArray<i32>>()
309
.unwrap();
310
311
// For now, we just cast the whole data to i128.
312
let (_, values, validity) = std::mem::take(old_chunk).into_inner();
313
*chunk = PrimitiveArray::new(
314
ArrowDataType::Int128,
315
values.iter().map(|&v| v as i128).collect(),
316
validity,
317
)
318
.to_boxed();
319
}
320
321
// @NOTE: We cannot cast here as that will lower the scale.
322
let s = Int128Chunked::from_chunks(name, chunks)
323
.into_decimal_unchecked(Some(*precision), *scale)
324
.into_series();
325
Ok(s)
326
})
327
},
328
ArrowDataType::Decimal64(precision, scale) => {
329
feature_gated!("dtype-decimal", {
330
polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
331
polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
332
333
let mut chunks = chunks;
334
for chunk in chunks.iter_mut() {
335
let old_chunk = chunk
336
.as_any_mut()
337
.downcast_mut::<PrimitiveArray<i64>>()
338
.unwrap();
339
340
// For now, we just cast the whole data to i128.
341
let (_, values, validity) = std::mem::take(old_chunk).into_inner();
342
*chunk = PrimitiveArray::new(
343
ArrowDataType::Int128,
344
values.iter().map(|&v| v as i128).collect(),
345
validity,
346
)
347
.to_boxed();
348
}
349
350
// @NOTE: We cannot cast here as that will lower the scale.
351
let s = Int128Chunked::from_chunks(name, chunks)
352
.into_decimal_unchecked(Some(*precision), *scale)
353
.into_series();
354
Ok(s)
355
})
356
},
357
ArrowDataType::Decimal(precision, scale)
358
| ArrowDataType::Decimal256(precision, scale) => {
359
feature_gated!("dtype-decimal", {
360
polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})");
361
polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision");
362
363
// Q? I don't think this is correct for Decimal256?
364
let mut chunks = chunks;
365
for chunk in chunks.iter_mut() {
366
*chunk = std::mem::take(
367
chunk
368
.as_any_mut()
369
.downcast_mut::<PrimitiveArray<i128>>()
370
.unwrap(),
371
)
372
.to(ArrowDataType::Int128)
373
.to_boxed();
374
}
375
376
// @NOTE: We cannot cast here as that will lower the scale.
377
let s = Int128Chunked::from_chunks(name, chunks)
378
.into_decimal_unchecked(Some(*precision), *scale)
379
.into_series();
380
Ok(s)
381
})
382
},
383
ArrowDataType::Null => Ok(new_null(name, &chunks)),
384
#[cfg(not(feature = "dtype-categorical"))]
385
ArrowDataType::Dictionary(_, _, _) => {
386
panic!("activate dtype-categorical to convert dictionary arrays")
387
},
388
#[cfg(feature = "dtype-categorical")]
389
ArrowDataType::Dictionary(key_type, _, _) => {
390
let polars_dtype = DataType::from_arrow(chunks[0].dtype(), md);
391
392
let mut series_iter = chunks.into_iter().map(|arr| {
393
import_arrow_dictionary_array(name.clone(), arr, key_type, &polars_dtype)
394
});
395
396
let mut first = series_iter.next().unwrap()?;
397
398
for s in series_iter {
399
first.append_owned(s?)?;
400
}
401
402
Ok(first)
403
},
404
#[cfg(feature = "object")]
405
ArrowDataType::Extension(ext)
406
if ext.name == EXTENSION_NAME && ext.metadata.is_some() =>
407
{
408
assert_eq!(chunks.len(), 1);
409
let arr = chunks[0]
410
.as_any()
411
.downcast_ref::<FixedSizeBinaryArray>()
412
.unwrap();
413
// SAFETY:
414
// this is highly unsafe. it will dereference a raw ptr on the heap
415
// make sure the ptr is allocated and from this pid
416
// (the pid is checked before dereference)
417
let s = {
418
let pe = PolarsExtension::new(arr.clone());
419
let s = pe.get_series(&name);
420
pe.take_and_forget();
421
s
422
};
423
Ok(s)
424
},
425
#[cfg(feature = "dtype-struct")]
426
ArrowDataType::Struct(_) => {
427
let (chunks, dtype) = to_physical_and_dtype(chunks, md);
428
429
unsafe {
430
let mut ca =
431
StructChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype);
432
StructChunked::propagate_nulls_mut(&mut ca);
433
Ok(ca.into_series())
434
}
435
},
436
ArrowDataType::FixedSizeBinary(_) => {
437
let chunks = cast_chunks(&chunks, &DataType::Binary, CastOptions::NonStrict)?;
438
Ok(BinaryChunked::from_chunks(name, chunks).into_series())
439
},
440
ArrowDataType::Map(field, _is_ordered) => {
441
let struct_arrays = chunks
442
.iter()
443
.map(|arr| {
444
let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
445
arr.field().clone()
446
})
447
.collect::<Vec<_>>();
448
449
let (phys_struct_arrays, dtype) =
450
to_physical_and_dtype(struct_arrays, field.metadata.as_deref());
451
452
let chunks = chunks
453
.iter()
454
.zip(phys_struct_arrays)
455
.map(|(arr, values)| {
456
let arr = arr.as_any().downcast_ref::<MapArray>().unwrap();
457
let offsets: &OffsetsBuffer<i32> = arr.offsets();
458
459
let validity = values.validity().cloned();
460
461
Box::from(ListArray::<i64>::new(
462
ListArray::<i64>::default_datatype(values.dtype().clone()),
463
OffsetsBuffer::<i64>::from(offsets),
464
values,
465
validity,
466
)) as ArrayRef
467
})
468
.collect();
469
470
unsafe {
471
let out = ListChunked::from_chunks_and_dtype_unchecked(
472
name,
473
chunks,
474
DataType::List(Box::new(dtype)),
475
);
476
477
Ok(out.into_series())
478
}
479
},
480
dt => polars_bail!(ComputeError: "cannot create series from {:?}", dt),
481
}
482
}
483
}
484
485
fn convert<F: Fn(&dyn Array) -> ArrayRef>(arr: &[ArrayRef], f: F) -> Vec<ArrayRef> {
486
arr.iter().map(|arr| f(&**arr)).collect()
487
}
488
489
/// Converts to physical types and bubbles up the correct [`DataType`].
490
#[allow(clippy::only_used_in_recursion)]
491
unsafe fn to_physical_and_dtype(
492
arrays: Vec<ArrayRef>,
493
md: Option<&Metadata>,
494
) -> (Vec<ArrayRef>, DataType) {
495
match arrays[0].dtype() {
496
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
497
let chunks = cast_chunks(&arrays, &DataType::String, CastOptions::NonStrict).unwrap();
498
(chunks, DataType::String)
499
},
500
ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::FixedSizeBinary(_) => {
501
let chunks = cast_chunks(&arrays, &DataType::Binary, CastOptions::NonStrict).unwrap();
502
(chunks, DataType::Binary)
503
},
504
#[allow(unused_variables)]
505
dt @ ArrowDataType::Dictionary(_, _, _) => {
506
feature_gated!("dtype-categorical", {
507
let s = unsafe {
508
let dt = dt.clone();
509
Series::_try_from_arrow_unchecked_with_md(PlSmallStr::EMPTY, arrays, &dt, md)
510
}
511
.unwrap();
512
(s.chunks().clone(), s.dtype().clone())
513
})
514
},
515
ArrowDataType::List(field) => {
516
let out = convert(&arrays, |arr| {
517
cast(arr, &ArrowDataType::LargeList(field.clone())).unwrap()
518
});
519
to_physical_and_dtype(out, md)
520
},
521
#[cfg(feature = "dtype-array")]
522
ArrowDataType::FixedSizeList(field, size) => {
523
let values = arrays
524
.iter()
525
.map(|arr| {
526
let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
527
arr.values().clone()
528
})
529
.collect::<Vec<_>>();
530
531
let (converted_values, dtype) =
532
to_physical_and_dtype(values, field.metadata.as_deref());
533
534
let arrays = arrays
535
.iter()
536
.zip(converted_values)
537
.map(|(arr, values)| {
538
let arr = arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
539
540
let dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), *size);
541
Box::from(FixedSizeListArray::new(
542
dtype,
543
arr.len(),
544
values,
545
arr.validity().cloned(),
546
)) as ArrayRef
547
})
548
.collect();
549
(arrays, DataType::Array(Box::new(dtype), *size))
550
},
551
ArrowDataType::LargeList(field) => {
552
let values = arrays
553
.iter()
554
.map(|arr| {
555
let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
556
arr.values().clone()
557
})
558
.collect::<Vec<_>>();
559
560
let (converted_values, dtype) =
561
to_physical_and_dtype(values, field.metadata.as_deref());
562
563
let arrays = arrays
564
.iter()
565
.zip(converted_values)
566
.map(|(arr, values)| {
567
let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
568
569
let dtype = ListArray::<i64>::default_datatype(values.dtype().clone());
570
Box::from(ListArray::<i64>::new(
571
dtype,
572
arr.offsets().clone(),
573
values,
574
arr.validity().cloned(),
575
)) as ArrayRef
576
})
577
.collect();
578
(arrays, DataType::List(Box::new(dtype)))
579
},
580
ArrowDataType::Struct(_fields) => {
581
feature_gated!("dtype-struct", {
582
let mut pl_fields = None;
583
let arrays = arrays
584
.iter()
585
.map(|arr| {
586
let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
587
let (values, dtypes): (Vec<_>, Vec<_>) = arr
588
.values()
589
.iter()
590
.zip(_fields.iter())
591
.map(|(value, field)| {
592
let mut out = to_physical_and_dtype(
593
vec![value.clone()],
594
field.metadata.as_deref(),
595
);
596
(out.0.pop().unwrap(), out.1)
597
})
598
.unzip();
599
600
let arrow_fields = values
601
.iter()
602
.zip(_fields.iter())
603
.map(|(arr, field)| {
604
ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
605
})
606
.collect();
607
let arrow_array = Box::new(StructArray::new(
608
ArrowDataType::Struct(arrow_fields),
609
arr.len(),
610
values,
611
arr.validity().cloned(),
612
)) as ArrayRef;
613
614
if pl_fields.is_none() {
615
pl_fields = Some(
616
_fields
617
.iter()
618
.zip(dtypes)
619
.map(|(field, dtype)| Field::new(field.name.clone(), dtype))
620
.collect_vec(),
621
)
622
}
623
624
arrow_array
625
})
626
.collect_vec();
627
628
(arrays, DataType::Struct(pl_fields.unwrap()))
629
})
630
},
631
// Use Series architecture to convert nested logical types to physical.
632
dt @ (ArrowDataType::Duration(_)
633
| ArrowDataType::Time32(_)
634
| ArrowDataType::Time64(_)
635
| ArrowDataType::Timestamp(_, _)
636
| ArrowDataType::Date32
637
| ArrowDataType::Decimal(_, _)
638
| ArrowDataType::Date64
639
| ArrowDataType::Map(_, _)) => {
640
let dt = dt.clone();
641
let mut s = Series::_try_from_arrow_unchecked(PlSmallStr::EMPTY, arrays, &dt).unwrap();
642
let dtype = s.dtype().clone();
643
(std::mem::take(s.chunks_mut()), dtype)
644
},
645
dt => {
646
let dtype = DataType::from_arrow(dt, md);
647
(arrays, dtype)
648
},
649
}
650
}
651
652
#[cfg(feature = "dtype-categorical")]
653
unsafe fn import_arrow_dictionary_array(
654
name: PlSmallStr,
655
arr: Box<dyn Array>,
656
key_type: &arrow::datatypes::IntegerType,
657
polars_dtype: &DataType,
658
) -> PolarsResult<Series> {
659
use arrow::datatypes::IntegerType as I;
660
661
if matches!(
662
polars_dtype,
663
DataType::Categorical(_, _) | DataType::Enum(_, _)
664
) {
665
macro_rules! unpack_categorical_chunked {
666
($dt:ty) => {{
667
let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
668
let keys = arr.keys();
669
let values = arr.values();
670
let values = cast(&**values, &ArrowDataType::Utf8View)?;
671
let values = values.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
672
with_match_categorical_physical_type!(polars_dtype.cat_physical().unwrap(), |$C| {
673
let ca = CategoricalChunked::<$C>::from_str_iter(
674
name,
675
polars_dtype.clone(),
676
keys.iter().map(|k| {
677
let k: usize = (*k?).try_into().ok()?;
678
values.get(k)
679
}),
680
)?;
681
Ok(ca.into_series())
682
})
683
}};
684
}
685
686
match key_type {
687
I::Int8 => unpack_categorical_chunked!(i8),
688
I::UInt8 => unpack_categorical_chunked!(u8),
689
I::Int16 => unpack_categorical_chunked!(i16),
690
I::UInt16 => unpack_categorical_chunked!(u16),
691
I::Int32 => unpack_categorical_chunked!(i32),
692
I::UInt32 => unpack_categorical_chunked!(u32),
693
I::Int64 => unpack_categorical_chunked!(i64),
694
I::UInt64 => unpack_categorical_chunked!(u64),
695
_ => polars_bail!(
696
ComputeError: "unsupported arrow key type: {key_type:?}"
697
),
698
}
699
} else {
700
macro_rules! unpack_keys_values {
701
($dt:ty) => {{
702
let arr = arr.as_any().downcast_ref::<DictionaryArray<$dt>>().unwrap();
703
let keys = arr.keys();
704
let keys = polars_compute::cast::primitive_to_primitive::<
705
$dt,
706
<IdxType as PolarsNumericType>::Native,
707
>(keys, &IDX_DTYPE.to_arrow(CompatLevel::newest()));
708
(keys, arr.values())
709
}};
710
}
711
712
let (keys, values) = match key_type {
713
I::Int8 => unpack_keys_values!(i8),
714
I::UInt8 => unpack_keys_values!(u8),
715
I::Int16 => unpack_keys_values!(i16),
716
I::UInt16 => unpack_keys_values!(u16),
717
I::Int32 => unpack_keys_values!(i32),
718
I::UInt32 => unpack_keys_values!(u32),
719
I::Int64 => unpack_keys_values!(i64),
720
I::UInt64 => unpack_keys_values!(u64),
721
_ => polars_bail!(
722
ComputeError: "unsupported arrow key type: {key_type:?}"
723
),
724
};
725
726
let values = Series::_try_from_arrow_unchecked_with_md(
727
name,
728
vec![values.clone()],
729
values.dtype(),
730
None,
731
)?;
732
733
values.take(&IdxCa::from_chunks_and_dtype(
734
PlSmallStr::EMPTY,
735
vec![keys.to_boxed()],
736
IDX_DTYPE,
737
))
738
}
739
}
740
741
fn check_types(chunks: &[ArrayRef]) -> PolarsResult<ArrowDataType> {
742
let mut chunks_iter = chunks.iter();
743
let dtype: ArrowDataType = chunks_iter
744
.next()
745
.ok_or_else(|| polars_err!(NoData: "expected at least one array-ref"))?
746
.dtype()
747
.clone();
748
749
for chunk in chunks_iter {
750
if chunk.dtype() != &dtype {
751
polars_bail!(
752
ComputeError: "cannot create series from multiple arrays with different types"
753
);
754
}
755
}
756
Ok(dtype)
757
}
758
759
impl Series {
760
pub fn try_new<T>(
761
name: PlSmallStr,
762
data: T,
763
) -> Result<Self, <(PlSmallStr, T) as TryInto<Self>>::Error>
764
where
765
(PlSmallStr, T): TryInto<Self>,
766
{
767
// # TODO
768
// * Remove the TryFrom<tuple> impls in favor of this
769
<(PlSmallStr, T) as TryInto<Self>>::try_into((name, data))
770
}
771
}
772
773
impl TryFrom<(PlSmallStr, Vec<ArrayRef>)> for Series {
774
type Error = PolarsError;
775
776
fn try_from(name_arr: (PlSmallStr, Vec<ArrayRef>)) -> PolarsResult<Self> {
777
let (name, chunks) = name_arr;
778
779
let dtype = check_types(&chunks)?;
780
// SAFETY:
781
// dtype is checked
782
unsafe { Series::_try_from_arrow_unchecked(name, chunks, &dtype) }
783
}
784
}
785
786
impl TryFrom<(PlSmallStr, ArrayRef)> for Series {
787
type Error = PolarsError;
788
789
fn try_from(name_arr: (PlSmallStr, ArrayRef)) -> PolarsResult<Self> {
790
let (name, arr) = name_arr;
791
Series::try_from((name, vec![arr]))
792
}
793
}
794
795
impl TryFrom<(&ArrowField, Vec<ArrayRef>)> for Series {
796
type Error = PolarsError;
797
798
fn try_from(field_arr: (&ArrowField, Vec<ArrayRef>)) -> PolarsResult<Self> {
799
let (field, chunks) = field_arr;
800
801
let dtype = check_types(&chunks)?;
802
803
// SAFETY:
804
// dtype is checked
805
unsafe {
806
Series::_try_from_arrow_unchecked_with_md(
807
field.name.clone(),
808
chunks,
809
&dtype,
810
field.metadata.as_deref(),
811
)
812
}
813
}
814
}
815
816
impl TryFrom<(&ArrowField, ArrayRef)> for Series {
817
type Error = PolarsError;
818
819
fn try_from(field_arr: (&ArrowField, ArrayRef)) -> PolarsResult<Self> {
820
let (field, arr) = field_arr;
821
Series::try_from((field, vec![arr]))
822
}
823
}
824
825
/// Used to convert a [`ChunkedArray`], `&dyn SeriesTrait` and [`Series`]
826
/// into a [`Series`].
827
/// # Safety
828
///
829
/// This trait is marked `unsafe` as the `is_series` return is used
830
/// to transmute to `Series`. This must always return `false` except
831
/// for `Series` structs.
832
pub unsafe trait IntoSeries {
833
fn is_series() -> bool {
834
false
835
}
836
837
fn into_series(self) -> Series
838
where
839
Self: Sized;
840
}
841
842
impl<T> From<ChunkedArray<T>> for Series
843
where
844
T: PolarsDataType,
845
ChunkedArray<T>: IntoSeries,
846
{
847
fn from(ca: ChunkedArray<T>) -> Self {
848
ca.into_series()
849
}
850
}
851
852
#[cfg(feature = "dtype-date")]
853
impl From<DateChunked> for Series {
854
fn from(a: DateChunked) -> Self {
855
a.into_series()
856
}
857
}
858
859
#[cfg(feature = "dtype-datetime")]
860
impl From<DatetimeChunked> for Series {
861
fn from(a: DatetimeChunked) -> Self {
862
a.into_series()
863
}
864
}
865
866
#[cfg(feature = "dtype-duration")]
867
impl From<DurationChunked> for Series {
868
fn from(a: DurationChunked) -> Self {
869
a.into_series()
870
}
871
}
872
873
#[cfg(feature = "dtype-time")]
874
impl From<TimeChunked> for Series {
875
fn from(a: TimeChunked) -> Self {
876
a.into_series()
877
}
878
}
879
880
unsafe impl IntoSeries for Arc<dyn SeriesTrait> {
881
fn into_series(self) -> Series {
882
Series(self)
883
}
884
}
885
886
unsafe impl IntoSeries for Series {
887
fn is_series() -> bool {
888
true
889
}
890
891
fn into_series(self) -> Series {
892
self
893
}
894
}
895
896
fn new_null(name: PlSmallStr, chunks: &[ArrayRef]) -> Series {
897
let len = chunks.iter().map(|arr| arr.len()).sum();
898
Series::new_null(name, len)
899
}
900
901