Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/io/ipc/read/array/utf8.rs
8420 views
1
use std::io::{Read, Seek};
2
3
use polars_buffer::Buffer;
4
use polars_error::polars_err;
5
use polars_utils::bool::UnsafeBool;
6
7
use super::super::read_basic::*;
8
use super::*;
9
use crate::array::Utf8Array;
10
use crate::offset::Offset;
11
12
#[allow(clippy::too_many_arguments)]
13
pub fn read_utf8<O: Offset, R: Read + Seek>(
14
field_nodes: &mut VecDeque<Node>,
15
dtype: ArrowDataType,
16
buffers: &mut VecDeque<IpcBuffer>,
17
reader: &mut R,
18
block_offset: u64,
19
is_little_endian: bool,
20
compression: Option<Compression>,
21
limit: Option<usize>,
22
scratch: &mut Vec<u8>,
23
checked: UnsafeBool,
24
) -> PolarsResult<Utf8Array<O>> {
25
let field_node = try_get_field_node(field_nodes, &dtype)?;
26
27
let validity = read_validity(
28
buffers,
29
field_node,
30
reader,
31
block_offset,
32
is_little_endian,
33
compression,
34
limit,
35
scratch,
36
)?;
37
38
let length = try_get_array_length(field_node, limit)?;
39
40
let offsets: Buffer<O> = read_buffer(
41
buffers,
42
1 + length,
43
reader,
44
block_offset,
45
is_little_endian,
46
compression,
47
scratch,
48
)
49
// Older versions of the IPC format sometimes do not report an offset
50
.or_else(|_| PolarsResult::Ok(Buffer::<O>::from(vec![O::default()])))?;
51
52
let last_offset = offsets.last().unwrap().to_usize();
53
let values = read_buffer(
54
buffers,
55
last_offset,
56
reader,
57
block_offset,
58
is_little_endian,
59
compression,
60
scratch,
61
)?;
62
63
if *checked {
64
Utf8Array::<O>::try_new(dtype, offsets.try_into()?, values, validity)
65
} else {
66
// SAFETY:
67
// Invariant of the `checked` state that this is valid.
68
unsafe {
69
Ok(Utf8Array::<O>::new_unchecked(
70
dtype,
71
offsets.try_into()?,
72
values,
73
validity,
74
))
75
}
76
}
77
}
78
79
pub fn skip_utf8(
80
field_nodes: &mut VecDeque<Node>,
81
buffers: &mut VecDeque<IpcBuffer>,
82
) -> PolarsResult<()> {
83
let _ = field_nodes.pop_front().ok_or_else(|| {
84
polars_err!(
85
oos = "IPC: unable to fetch the field for utf8. The file or stream is corrupted."
86
)
87
})?;
88
89
let _ = buffers
90
.pop_front()
91
.ok_or_else(|| polars_err!(oos = "IPC: missing validity buffer."))?;
92
let _ = buffers
93
.pop_front()
94
.ok_or_else(|| polars_err!(oos = "IPC: missing offsets buffer."))?;
95
let _ = buffers
96
.pop_front()
97
.ok_or_else(|| polars_err!(oos = "IPC: missing values buffer."))?;
98
Ok(())
99
}
100
101