Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/ffi/schema.rs
8424 views
1
use std::collections::BTreeMap;
2
use std::ffi::{CStr, CString};
3
use std::ptr;
4
5
use polars_error::{PolarsResult, polars_bail, polars_err};
6
use polars_utils::pl_str::PlSmallStr;
7
8
use super::ArrowSchema;
9
use crate::datatypes::{
10
ArrowDataType, Extension, ExtensionType, Field, IntegerType, IntervalUnit, Metadata, TimeUnit,
11
UnionMode, UnionType,
12
};
13
14
#[allow(dead_code)]
15
struct SchemaPrivateData {
16
name: CString,
17
format: CString,
18
metadata: Option<Vec<u8>>,
19
children_ptr: Box<[*mut ArrowSchema]>,
20
dictionary: Option<*mut ArrowSchema>,
21
}
22
23
// callback used to drop [ArrowSchema] when it is exported.
24
unsafe extern "C" fn c_release_schema(schema: *mut ArrowSchema) {
25
if schema.is_null() {
26
return;
27
}
28
let schema = &mut *schema;
29
30
let private = Box::from_raw(schema.private_data as *mut SchemaPrivateData);
31
for child in private.children_ptr.iter() {
32
let _ = Box::from_raw(*child);
33
}
34
35
if let Some(ptr) = private.dictionary {
36
let _ = Box::from_raw(ptr);
37
}
38
39
schema.release = None;
40
}
41
42
/// allocate (and hold) the children
43
fn schema_children(dtype: &ArrowDataType, flags: &mut i64) -> Box<[*mut ArrowSchema]> {
44
match dtype {
45
ArrowDataType::List(field)
46
| ArrowDataType::FixedSizeList(field, _)
47
| ArrowDataType::LargeList(field) => {
48
Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))])
49
},
50
ArrowDataType::Map(field, is_sorted) => {
51
*flags += (*is_sorted as i64) * 4;
52
Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))])
53
},
54
ArrowDataType::Struct(fields) => fields
55
.iter()
56
.map(|field| Box::into_raw(Box::new(ArrowSchema::new(field))))
57
.collect::<Box<[_]>>(),
58
ArrowDataType::Union(u) => u
59
.fields
60
.iter()
61
.map(|field| Box::into_raw(Box::new(ArrowSchema::new(field))))
62
.collect::<Box<[_]>>(),
63
ArrowDataType::Extension(ext) => schema_children(&ext.inner, flags),
64
_ => Box::new([]),
65
}
66
}
67
68
impl ArrowSchema {
69
/// creates a new [ArrowSchema]
70
pub(crate) fn new(field: &Field) -> Self {
71
let format = to_format(field.dtype());
72
let name = field.name.clone();
73
74
let mut flags = field.is_nullable as i64 * 2;
75
76
// note: this cannot be done along with the above because the above is fallible and this op leaks.
77
let children_ptr = schema_children(field.dtype(), &mut flags);
78
let n_children = children_ptr.len() as i64;
79
80
let dictionary = if let ArrowDataType::Dictionary(_, values, is_ordered) = field.dtype() {
81
flags += *is_ordered as i64;
82
// we do not store field info in the dict values, so can't recover it all :(
83
let field = Field::new(PlSmallStr::EMPTY, values.as_ref().clone(), true);
84
Some(Box::new(ArrowSchema::new(&field)))
85
} else {
86
None
87
};
88
89
let metadata = field
90
.metadata
91
.as_ref()
92
.map(|inner| (**inner).clone())
93
.unwrap_or_default();
94
95
let metadata = if let ArrowDataType::Extension(ext) = field.dtype() {
96
// append extension information.
97
let mut metadata = metadata;
98
99
// metadata
100
if let Some(extension_metadata) = &ext.metadata {
101
metadata.insert(
102
PlSmallStr::from_static("ARROW:extension:metadata"),
103
extension_metadata.clone(),
104
);
105
}
106
107
metadata.insert(
108
PlSmallStr::from_static("ARROW:extension:name"),
109
ext.name.clone(),
110
);
111
112
Some(metadata_to_bytes(&metadata))
113
} else if !metadata.is_empty() {
114
Some(metadata_to_bytes(&metadata))
115
} else {
116
None
117
};
118
119
let name = CString::new(name.as_bytes()).unwrap();
120
let format = CString::new(format).unwrap();
121
122
let mut private = Box::new(SchemaPrivateData {
123
name,
124
format,
125
metadata,
126
children_ptr,
127
dictionary: dictionary.map(Box::into_raw),
128
});
129
130
// <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
131
Self {
132
format: private.format.as_ptr(),
133
name: private.name.as_ptr(),
134
metadata: private
135
.metadata
136
.as_ref()
137
.map(|x| x.as_ptr())
138
.unwrap_or(std::ptr::null()) as *const ::std::os::raw::c_char,
139
flags,
140
n_children,
141
children: private.children_ptr.as_mut_ptr(),
142
dictionary: private.dictionary.unwrap_or(std::ptr::null_mut()),
143
release: Some(c_release_schema),
144
private_data: Box::into_raw(private) as *mut ::std::os::raw::c_void,
145
}
146
}
147
148
/// create an empty [ArrowSchema]
149
pub fn empty() -> Self {
150
Self {
151
format: std::ptr::null_mut(),
152
name: std::ptr::null_mut(),
153
metadata: std::ptr::null_mut(),
154
flags: 0,
155
n_children: 0,
156
children: ptr::null_mut(),
157
dictionary: std::ptr::null_mut(),
158
release: None,
159
private_data: std::ptr::null_mut(),
160
}
161
}
162
163
pub fn is_null(&self) -> bool {
164
self.private_data.is_null()
165
}
166
167
/// returns the format of this schema.
168
pub(crate) fn format(&self) -> &str {
169
assert!(!self.format.is_null());
170
// safe because the lifetime of `self.format` equals `self`
171
unsafe { CStr::from_ptr(self.format) }
172
.to_str()
173
.expect("The external API has a non-utf8 as format")
174
}
175
176
/// returns the name of this schema.
177
///
178
/// Since this field is optional, `""` is returned if it is not set (as per the spec).
179
pub(crate) fn name(&self) -> &str {
180
if self.name.is_null() {
181
return "";
182
}
183
// safe because the lifetime of `self.name` equals `self`
184
unsafe { CStr::from_ptr(self.name) }.to_str().unwrap()
185
}
186
187
pub(crate) fn child(&self, index: usize) -> &'static Self {
188
assert!(index < self.n_children as usize);
189
unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
190
}
191
192
pub(crate) fn dictionary(&self) -> Option<&'static Self> {
193
if self.dictionary.is_null() {
194
return None;
195
};
196
Some(unsafe { self.dictionary.as_ref().unwrap() })
197
}
198
199
pub(crate) fn nullable(&self) -> bool {
200
(self.flags / 2) & 1 == 1
201
}
202
}
203
204
impl Drop for ArrowSchema {
205
fn drop(&mut self) {
206
match self.release {
207
None => (),
208
Some(release) => unsafe { release(self) },
209
};
210
}
211
}
212
213
pub(crate) unsafe fn to_field(schema: &ArrowSchema) -> PolarsResult<Field> {
214
let dictionary = schema.dictionary();
215
let dtype = if let Some(dictionary) = dictionary {
216
let indices = to_integer_type(schema.format())?;
217
let values = to_field(dictionary)?;
218
let is_ordered = schema.flags & 1 == 1;
219
ArrowDataType::Dictionary(indices, Box::new(values.dtype().clone()), is_ordered)
220
} else {
221
to_dtype(schema)?
222
};
223
let (metadata, extension) = unsafe { metadata_from_bytes(schema.metadata) };
224
225
let dtype = if let Some((name, extension_metadata)) = extension {
226
ArrowDataType::Extension(Box::new(ExtensionType {
227
name,
228
inner: dtype,
229
metadata: extension_metadata,
230
}))
231
} else {
232
dtype
233
};
234
235
Ok(Field::new(
236
PlSmallStr::from_str(schema.name()),
237
dtype,
238
schema.nullable(),
239
)
240
.with_metadata(metadata))
241
}
242
243
fn to_integer_type(format: &str) -> PolarsResult<IntegerType> {
244
use IntegerType::*;
245
Ok(match format {
246
"c" => Int8,
247
"C" => UInt8,
248
"s" => Int16,
249
"S" => UInt16,
250
"i" => Int32,
251
"I" => UInt32,
252
"l" => Int64,
253
"L" => UInt64,
254
_ => {
255
polars_bail!(
256
ComputeError:
257
"dictionary indices can only be integers"
258
)
259
},
260
})
261
}
262
263
unsafe fn to_dtype(schema: &ArrowSchema) -> PolarsResult<ArrowDataType> {
264
Ok(match schema.format() {
265
"n" => ArrowDataType::Null,
266
"b" => ArrowDataType::Boolean,
267
"c" => ArrowDataType::Int8,
268
"C" => ArrowDataType::UInt8,
269
"s" => ArrowDataType::Int16,
270
"S" => ArrowDataType::UInt16,
271
"i" => ArrowDataType::Int32,
272
"I" => ArrowDataType::UInt32,
273
"l" => ArrowDataType::Int64,
274
"L" => ArrowDataType::UInt64,
275
"_pli128" => ArrowDataType::Int128,
276
"_plu128" => ArrowDataType::UInt128,
277
"e" => ArrowDataType::Float16,
278
"f" => ArrowDataType::Float32,
279
"g" => ArrowDataType::Float64,
280
"z" => ArrowDataType::Binary,
281
"Z" => ArrowDataType::LargeBinary,
282
"u" => ArrowDataType::Utf8,
283
"U" => ArrowDataType::LargeUtf8,
284
"tdD" => ArrowDataType::Date32,
285
"tdm" => ArrowDataType::Date64,
286
"tts" => ArrowDataType::Time32(TimeUnit::Second),
287
"ttm" => ArrowDataType::Time32(TimeUnit::Millisecond),
288
"ttu" => ArrowDataType::Time64(TimeUnit::Microsecond),
289
"ttn" => ArrowDataType::Time64(TimeUnit::Nanosecond),
290
"tDs" => ArrowDataType::Duration(TimeUnit::Second),
291
"tDm" => ArrowDataType::Duration(TimeUnit::Millisecond),
292
"tDu" => ArrowDataType::Duration(TimeUnit::Microsecond),
293
"tDn" => ArrowDataType::Duration(TimeUnit::Nanosecond),
294
"tiM" => ArrowDataType::Interval(IntervalUnit::YearMonth),
295
"tiD" => ArrowDataType::Interval(IntervalUnit::DayTime),
296
"tin" => ArrowDataType::Interval(IntervalUnit::MonthDayNano),
297
"vu" => ArrowDataType::Utf8View,
298
"vz" => ArrowDataType::BinaryView,
299
"+l" => {
300
let child = schema.child(0);
301
ArrowDataType::List(Box::new(to_field(child)?))
302
},
303
"+L" => {
304
let child = schema.child(0);
305
ArrowDataType::LargeList(Box::new(to_field(child)?))
306
},
307
"+m" => {
308
let child = schema.child(0);
309
310
let is_sorted = (schema.flags & 4) != 0;
311
ArrowDataType::Map(Box::new(to_field(child)?), is_sorted)
312
},
313
"+s" => {
314
let children = (0..schema.n_children as usize)
315
.map(|x| to_field(schema.child(x)))
316
.collect::<PolarsResult<Vec<_>>>()?;
317
ArrowDataType::Struct(children)
318
},
319
other => {
320
match other.splitn(2, ':').collect::<Vec<_>>()[..] {
321
// Timestamps with no timezone
322
["tss", ""] => ArrowDataType::Timestamp(TimeUnit::Second, None),
323
["tsm", ""] => ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
324
["tsu", ""] => ArrowDataType::Timestamp(TimeUnit::Microsecond, None),
325
["tsn", ""] => ArrowDataType::Timestamp(TimeUnit::Nanosecond, None),
326
327
// Timestamps with timezone
328
["tss", tz] => {
329
ArrowDataType::Timestamp(TimeUnit::Second, Some(PlSmallStr::from_str(tz)))
330
},
331
["tsm", tz] => {
332
ArrowDataType::Timestamp(TimeUnit::Millisecond, Some(PlSmallStr::from_str(tz)))
333
},
334
["tsu", tz] => {
335
ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(PlSmallStr::from_str(tz)))
336
},
337
["tsn", tz] => {
338
ArrowDataType::Timestamp(TimeUnit::Nanosecond, Some(PlSmallStr::from_str(tz)))
339
},
340
341
["w", size_raw] => {
342
// Example: "w:42" fixed-width binary [42 bytes]
343
let size = size_raw
344
.parse::<usize>()
345
.map_err(|_| polars_err!(ComputeError: "size is not a valid integer"))?;
346
ArrowDataType::FixedSizeBinary(size)
347
},
348
["+w", size_raw] => {
349
// Example: "+w:123" fixed-sized list [123 items]
350
let size = size_raw
351
.parse::<usize>()
352
.map_err(|_| polars_err!(ComputeError: "size is not a valid integer"))?;
353
let child = to_field(schema.child(0))?;
354
ArrowDataType::FixedSizeList(Box::new(child), size)
355
},
356
["d", raw] => {
357
// Decimal
358
let (precision, scale) = match raw.split(',').collect::<Vec<_>>()[..] {
359
[precision_raw, scale_raw] => {
360
// Example: "d:19,10" decimal128 [precision 19, scale 10]
361
(precision_raw, scale_raw)
362
},
363
[precision_raw, scale_raw, width_raw] => {
364
// Example: "d:19,10,NNN" decimal bitwidth = NNN [precision 19, scale 10]
365
// Only bitwdth of 128 currently supported
366
let bit_width = width_raw.parse::<usize>().map_err(|_| {
367
polars_err!(ComputeError: "Decimal bit width is not a valid integer")
368
})?;
369
match bit_width {
370
32 => return Ok(ArrowDataType::Decimal32(
371
precision_raw.parse::<usize>().map_err(|_| {
372
polars_err!(ComputeError: "Decimal precision is not a valid integer")
373
})?,
374
scale_raw.parse::<usize>().map_err(|_| {
375
polars_err!(ComputeError: "Decimal scale is not a valid integer")
376
})?,
377
)),
378
64 => return Ok(ArrowDataType::Decimal64(
379
precision_raw.parse::<usize>().map_err(|_| {
380
polars_err!(ComputeError: "Decimal precision is not a valid integer")
381
})?,
382
scale_raw.parse::<usize>().map_err(|_| {
383
polars_err!(ComputeError: "Decimal scale is not a valid integer")
384
})?,
385
)),
386
256 => return Ok(ArrowDataType::Decimal256(
387
precision_raw.parse::<usize>().map_err(|_| {
388
polars_err!(ComputeError: "Decimal precision is not a valid integer")
389
})?,
390
scale_raw.parse::<usize>().map_err(|_| {
391
polars_err!(ComputeError: "Decimal scale is not a valid integer")
392
})?,
393
)),
394
_ => {},
395
}
396
(precision_raw, scale_raw)
397
},
398
_ => {
399
polars_bail!(ComputeError:
400
"Decimal must contain 2 or 3 comma-separated values"
401
)
402
},
403
};
404
405
ArrowDataType::Decimal(
406
precision.parse::<usize>().map_err(|_| {
407
polars_err!(ComputeError:
408
"Decimal precision is not a valid integer"
409
)
410
})?,
411
scale.parse::<usize>().map_err(|_| {
412
polars_err!(ComputeError:
413
"Decimal scale is not a valid integer"
414
)
415
})?,
416
)
417
},
418
[union_type @ "+us", union_parts] | [union_type @ "+ud", union_parts] => {
419
// union, sparse
420
// Example "+us:I,J,..." sparse union with type ids I,J...
421
// Example: "+ud:I,J,..." dense union with type ids I,J...
422
let mode = UnionMode::sparse(union_type == "+us");
423
let type_ids = union_parts
424
.split(',')
425
.map(|x| {
426
x.parse::<i32>().map_err(|_| {
427
polars_err!(ComputeError:
428
"Union type id is not a valid integer"
429
)
430
})
431
})
432
.collect::<PolarsResult<Vec<_>>>()?;
433
let fields = (0..schema.n_children as usize)
434
.map(|x| to_field(schema.child(x)))
435
.collect::<PolarsResult<Vec<_>>>()?;
436
ArrowDataType::Union(Box::new(UnionType {
437
fields,
438
ids: Some(type_ids),
439
mode,
440
}))
441
},
442
_ => {
443
polars_bail!(ComputeError:
444
"The datatype \"{other}\" is still not supported in Rust implementation",
445
)
446
},
447
}
448
},
449
})
450
}
451
452
/// the inverse of [to_field]
453
fn to_format(dtype: &ArrowDataType) -> String {
454
match dtype {
455
ArrowDataType::Null => "n".to_string(),
456
ArrowDataType::Boolean => "b".to_string(),
457
ArrowDataType::Int8 => "c".to_string(),
458
ArrowDataType::UInt8 => "C".to_string(),
459
ArrowDataType::Int16 => "s".to_string(),
460
ArrowDataType::UInt16 => "S".to_string(),
461
ArrowDataType::Int32 => "i".to_string(),
462
ArrowDataType::UInt32 => "I".to_string(),
463
ArrowDataType::Int64 => "l".to_string(),
464
ArrowDataType::UInt64 => "L".to_string(),
465
// Doesn't exist in arrow, '_pl' prefixed is Polars specific
466
ArrowDataType::Int128 => "_pli128".to_string(),
467
// Doesn't exist in arrow, '_pl' prefixed is Polars specific
468
ArrowDataType::UInt128 => "_plu128".to_string(),
469
ArrowDataType::Float16 => "e".to_string(),
470
ArrowDataType::Float32 => "f".to_string(),
471
ArrowDataType::Float64 => "g".to_string(),
472
ArrowDataType::Binary => "z".to_string(),
473
ArrowDataType::LargeBinary => "Z".to_string(),
474
ArrowDataType::Utf8 => "u".to_string(),
475
ArrowDataType::LargeUtf8 => "U".to_string(),
476
ArrowDataType::Date32 => "tdD".to_string(),
477
ArrowDataType::Date64 => "tdm".to_string(),
478
ArrowDataType::Time32(TimeUnit::Second) => "tts".to_string(),
479
ArrowDataType::Time32(TimeUnit::Millisecond) => "ttm".to_string(),
480
ArrowDataType::Time32(_) => {
481
unreachable!("Time32 is only supported for seconds and milliseconds")
482
},
483
ArrowDataType::Time64(TimeUnit::Microsecond) => "ttu".to_string(),
484
ArrowDataType::Time64(TimeUnit::Nanosecond) => "ttn".to_string(),
485
ArrowDataType::Time64(_) => {
486
unreachable!("Time64 is only supported for micro and nanoseconds")
487
},
488
ArrowDataType::Duration(TimeUnit::Second) => "tDs".to_string(),
489
ArrowDataType::Duration(TimeUnit::Millisecond) => "tDm".to_string(),
490
ArrowDataType::Duration(TimeUnit::Microsecond) => "tDu".to_string(),
491
ArrowDataType::Duration(TimeUnit::Nanosecond) => "tDn".to_string(),
492
ArrowDataType::Interval(IntervalUnit::YearMonth) => "tiM".to_string(),
493
ArrowDataType::Interval(IntervalUnit::DayTime) => "tiD".to_string(),
494
ArrowDataType::Interval(IntervalUnit::MonthDayNano) => "tin".to_string(),
495
ArrowDataType::Interval(IntervalUnit::MonthDayMillis) => unimplemented!(),
496
ArrowDataType::Timestamp(unit, tz) => {
497
let unit = match unit {
498
TimeUnit::Second => "s",
499
TimeUnit::Millisecond => "m",
500
TimeUnit::Microsecond => "u",
501
TimeUnit::Nanosecond => "n",
502
};
503
format!(
504
"ts{}:{}",
505
unit,
506
tz.as_ref().map(|x| x.as_str()).unwrap_or("")
507
)
508
},
509
ArrowDataType::Utf8View => "vu".to_string(),
510
ArrowDataType::BinaryView => "vz".to_string(),
511
ArrowDataType::Decimal(precision, scale) => format!("d:{precision},{scale}"),
512
ArrowDataType::Decimal32(precision, scale) => format!("d:{precision},{scale},32"),
513
ArrowDataType::Decimal64(precision, scale) => format!("d:{precision},{scale},64"),
514
ArrowDataType::Decimal256(precision, scale) => format!("d:{precision},{scale},256"),
515
ArrowDataType::List(_) => "+l".to_string(),
516
ArrowDataType::LargeList(_) => "+L".to_string(),
517
ArrowDataType::Struct(_) => "+s".to_string(),
518
ArrowDataType::FixedSizeBinary(size) => format!("w:{size}"),
519
ArrowDataType::FixedSizeList(_, size) => format!("+w:{size}"),
520
ArrowDataType::Union(u) => {
521
let sparsness = if u.mode.is_sparse() { 's' } else { 'd' };
522
let mut r = format!("+u{sparsness}:");
523
let ids = if let Some(ids) = &u.ids {
524
ids.iter()
525
.fold(String::new(), |a, b| a + b.to_string().as_str() + ",")
526
} else {
527
(0..u.fields.len()).fold(String::new(), |a, b| a + b.to_string().as_str() + ",")
528
};
529
let ids = &ids[..ids.len() - 1]; // take away last ","
530
r.push_str(ids);
531
r
532
},
533
ArrowDataType::Map(_, _) => "+m".to_string(),
534
ArrowDataType::Dictionary(index, _, _) => to_format(&(*index).into()),
535
ArrowDataType::Extension(ext) => to_format(&ext.inner),
536
ArrowDataType::Unknown => unimplemented!(),
537
}
538
}
539
540
pub(super) fn get_child(dtype: &ArrowDataType, index: usize) -> PolarsResult<ArrowDataType> {
541
match (index, dtype) {
542
(0, ArrowDataType::List(field)) => Ok(field.dtype().clone()),
543
(0, ArrowDataType::FixedSizeList(field, _)) => Ok(field.dtype().clone()),
544
(0, ArrowDataType::LargeList(field)) => Ok(field.dtype().clone()),
545
(0, ArrowDataType::Map(field, _)) => Ok(field.dtype().clone()),
546
(index, ArrowDataType::Struct(fields)) => Ok(fields[index].dtype().clone()),
547
(index, ArrowDataType::Union(u)) => Ok(u.fields[index].dtype().clone()),
548
(index, ArrowDataType::Extension(ext)) => get_child(&ext.inner, index),
549
(child, dtype) => polars_bail!(ComputeError:
550
"Requested child {child} to type {dtype:?} that has no such child",
551
),
552
}
553
}
554
555
fn metadata_to_bytes(metadata: &BTreeMap<PlSmallStr, PlSmallStr>) -> Vec<u8> {
556
let a = (metadata.len() as i32).to_ne_bytes().to_vec();
557
metadata.iter().fold(a, |mut acc, (key, value)| {
558
acc.extend((key.len() as i32).to_ne_bytes());
559
acc.extend(key.as_bytes());
560
acc.extend((value.len() as i32).to_ne_bytes());
561
acc.extend(value.as_bytes());
562
acc
563
})
564
}
565
566
unsafe fn read_ne_i32(ptr: *const u8) -> i32 {
567
let slice = std::slice::from_raw_parts(ptr, 4);
568
i32::from_ne_bytes(slice.try_into().unwrap())
569
}
570
571
unsafe fn read_bytes(ptr: *const u8, len: usize) -> &'static str {
572
let slice = std::slice::from_raw_parts(ptr, len);
573
simdutf8::basic::from_utf8(slice).unwrap()
574
}
575
576
unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata, Extension) {
577
let mut data = data as *const u8; // u8 = i8
578
if data.is_null() {
579
return (Metadata::default(), None);
580
};
581
let len = read_ne_i32(data);
582
data = data.add(4);
583
584
let mut result = BTreeMap::new();
585
let mut extension_name = None;
586
let mut extension_metadata = None;
587
for _ in 0..len {
588
let key_len = read_ne_i32(data) as usize;
589
data = data.add(4);
590
let key = read_bytes(data, key_len);
591
data = data.add(key_len);
592
let value_len = read_ne_i32(data) as usize;
593
data = data.add(4);
594
let value = read_bytes(data, value_len);
595
data = data.add(value_len);
596
match key {
597
"ARROW:extension:name" => {
598
extension_name = Some(PlSmallStr::from_str(value));
599
},
600
"ARROW:extension:metadata" => {
601
extension_metadata = Some(PlSmallStr::from_str(value));
602
},
603
_ => {
604
result.insert(PlSmallStr::from_str(key), PlSmallStr::from_str(value));
605
},
606
};
607
}
608
let extension = extension_name.map(|name| (name, extension_metadata));
609
(result, extension)
610
}
611
612
#[cfg(test)]
613
mod tests {
614
use super::*;
615
use crate::array::LIST_VALUES_NAME;
616
617
#[test]
618
fn test_all() {
619
let mut dts = vec![
620
ArrowDataType::Null,
621
ArrowDataType::Boolean,
622
ArrowDataType::UInt8,
623
ArrowDataType::UInt16,
624
ArrowDataType::UInt32,
625
ArrowDataType::UInt64,
626
ArrowDataType::UInt128,
627
ArrowDataType::Int8,
628
ArrowDataType::Int16,
629
ArrowDataType::Int32,
630
ArrowDataType::Int64,
631
ArrowDataType::Int128,
632
ArrowDataType::Float32,
633
ArrowDataType::Float64,
634
ArrowDataType::Date32,
635
ArrowDataType::Date64,
636
ArrowDataType::Time32(TimeUnit::Second),
637
ArrowDataType::Time32(TimeUnit::Millisecond),
638
ArrowDataType::Time64(TimeUnit::Microsecond),
639
ArrowDataType::Time64(TimeUnit::Nanosecond),
640
ArrowDataType::Decimal(5, 5),
641
ArrowDataType::Utf8,
642
ArrowDataType::LargeUtf8,
643
ArrowDataType::Binary,
644
ArrowDataType::LargeBinary,
645
ArrowDataType::FixedSizeBinary(2),
646
ArrowDataType::List(Box::new(Field::new(
647
PlSmallStr::from_static("example"),
648
ArrowDataType::Boolean,
649
false,
650
))),
651
ArrowDataType::FixedSizeList(
652
Box::new(Field::new(
653
PlSmallStr::from_static("example"),
654
ArrowDataType::Boolean,
655
false,
656
)),
657
2,
658
),
659
ArrowDataType::LargeList(Box::new(Field::new(
660
PlSmallStr::from_static("example"),
661
ArrowDataType::Boolean,
662
false,
663
))),
664
ArrowDataType::Struct(vec![
665
Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
666
Field::new(
667
PlSmallStr::from_static("b"),
668
ArrowDataType::List(Box::new(Field::new(
669
LIST_VALUES_NAME,
670
ArrowDataType::Int32,
671
true,
672
))),
673
true,
674
),
675
]),
676
ArrowDataType::Map(
677
Box::new(Field::new(
678
PlSmallStr::from_static("a"),
679
ArrowDataType::Int64,
680
true,
681
)),
682
true,
683
),
684
ArrowDataType::Union(Box::new(UnionType {
685
fields: vec![
686
Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
687
Field::new(
688
PlSmallStr::from_static("b"),
689
ArrowDataType::List(Box::new(Field::new(
690
LIST_VALUES_NAME,
691
ArrowDataType::Int32,
692
true,
693
))),
694
true,
695
),
696
],
697
ids: Some(vec![1, 2]),
698
mode: UnionMode::Dense,
699
})),
700
ArrowDataType::Union(Box::new(UnionType {
701
fields: vec![
702
Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
703
Field::new(
704
PlSmallStr::from_static("b"),
705
ArrowDataType::List(Box::new(Field::new(
706
LIST_VALUES_NAME,
707
ArrowDataType::Int32,
708
true,
709
))),
710
true,
711
),
712
],
713
ids: Some(vec![0, 1]),
714
mode: UnionMode::Sparse,
715
})),
716
];
717
for time_unit in [
718
TimeUnit::Second,
719
TimeUnit::Millisecond,
720
TimeUnit::Microsecond,
721
TimeUnit::Nanosecond,
722
] {
723
dts.push(ArrowDataType::Timestamp(time_unit, None));
724
dts.push(ArrowDataType::Timestamp(
725
time_unit,
726
Some(PlSmallStr::from_static("00:00")),
727
));
728
dts.push(ArrowDataType::Duration(time_unit));
729
}
730
for interval_type in [
731
IntervalUnit::DayTime,
732
IntervalUnit::YearMonth,
733
//IntervalUnit::MonthDayNano, // not yet defined on the C data interface
734
] {
735
dts.push(ArrowDataType::Interval(interval_type));
736
}
737
738
for expected in dts {
739
let field = Field::new(PlSmallStr::from_static("a"), expected.clone(), true);
740
let schema = ArrowSchema::new(&field);
741
let result = unsafe { super::to_dtype(&schema).unwrap() };
742
assert_eq!(result, expected);
743
}
744
}
745
}
746
747