Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/parquet/encoding/plain_byte_array.rs
6940 views
1
/// Decodes according to [Plain strings](https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0),
2
/// prefixes, lengths and values
3
/// # Implementation
4
/// This struct does not allocate on the heap.
5
use crate::parquet::error::ParquetError;
6
7
#[derive(Debug)]
8
pub struct BinaryIter<'a> {
9
values: &'a [u8],
10
length: Option<usize>,
11
}
12
13
impl<'a> BinaryIter<'a> {
14
pub fn new(values: &'a [u8], length: Option<usize>) -> Self {
15
Self { values, length }
16
}
17
}
18
19
impl<'a> Iterator for BinaryIter<'a> {
20
type Item = Result<&'a [u8], ParquetError>;
21
22
#[inline]
23
fn next(&mut self) -> Option<Self::Item> {
24
if self.values.len() < 4 {
25
return None;
26
}
27
if let Some(x) = self.length.as_mut() {
28
*x = x.saturating_sub(1)
29
}
30
let length = u32::from_le_bytes(self.values[0..4].try_into().unwrap()) as usize;
31
self.values = &self.values[4..];
32
if length > self.values.len() {
33
return Some(Err(ParquetError::oos(
34
"A string in plain encoding declares a length that is out of range",
35
)));
36
}
37
let (result, remaining) = self.values.split_at(length);
38
self.values = remaining;
39
Some(Ok(result))
40
}
41
42
#[inline]
43
fn size_hint(&self) -> (usize, Option<usize>) {
44
(self.length.unwrap_or_default(), self.length)
45
}
46
}
47
48