Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/legacy/array/utf8.rs
6939 views
1
use crate::array::{BinaryArray, Utf8Array};
2
use crate::datatypes::ArrowDataType;
3
use crate::legacy::trusted_len::TrustedLenPush;
4
use crate::offset::Offsets;
5
6
#[inline]
7
unsafe fn extend_from_trusted_len_values_iter<I, P>(
8
offsets: &mut Vec<i64>,
9
values: &mut Vec<u8>,
10
iterator: I,
11
) where
12
P: AsRef<[u8]>,
13
I: Iterator<Item = P>,
14
{
15
let mut total_length = 0;
16
offsets.push(total_length);
17
iterator.for_each(|item| {
18
let s = item.as_ref();
19
// Push new entries for both `values` and `offsets` buffer
20
values.extend_from_slice(s);
21
22
total_length += s.len() as i64;
23
offsets.push_unchecked(total_length);
24
});
25
}
26
27
/// # Safety
28
/// reported `len` must be correct.
29
#[inline]
30
unsafe fn fill_offsets_and_values<I, P>(
31
iterator: I,
32
value_capacity: usize,
33
len: usize,
34
) -> (Offsets<i64>, Vec<u8>)
35
where
36
P: AsRef<[u8]>,
37
I: Iterator<Item = P>,
38
{
39
let mut offsets = Vec::with_capacity(len + 1);
40
let mut values = Vec::<u8>::with_capacity(value_capacity);
41
42
extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator);
43
44
(Offsets::new_unchecked(offsets), values)
45
}
46
47
struct StrAsBytes<P>(P);
48
impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
49
#[inline(always)]
50
fn as_ref(&self) -> &[u8] {
51
self.0.as_ref().as_bytes()
52
}
53
}
54
55
pub trait Utf8FromIter {
56
#[inline]
57
fn from_values_iter<I, S>(iter: I, len: usize, size_hint: usize) -> Utf8Array<i64>
58
where
59
S: AsRef<str>,
60
I: Iterator<Item = S>,
61
{
62
let iter = iter.map(StrAsBytes);
63
let (offsets, values) = unsafe { fill_offsets_and_values(iter, size_hint, len) };
64
unsafe {
65
Utf8Array::new_unchecked(
66
ArrowDataType::LargeUtf8,
67
offsets.into(),
68
values.into(),
69
None,
70
)
71
}
72
}
73
}
74
75
impl Utf8FromIter for Utf8Array<i64> {}
76
77
pub trait BinaryFromIter {
78
#[inline]
79
fn from_values_iter<I, S>(iter: I, len: usize, value_cap: usize) -> BinaryArray<i64>
80
where
81
S: AsRef<[u8]>,
82
I: Iterator<Item = S>,
83
{
84
let (offsets, values) = unsafe { fill_offsets_and_values(iter, value_cap, len) };
85
BinaryArray::new(
86
ArrowDataType::LargeBinary,
87
offsets.into(),
88
values.into(),
89
None,
90
)
91
}
92
}
93
94
impl BinaryFromIter for BinaryArray<i64> {}
95
96