Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/binary/builder.rs
6939 views
1
use polars_utils::IdxSize;
2
3
use crate::array::BinaryArray;
4
use crate::array::builder::{ShareStrategy, StaticArrayBuilder};
5
use crate::bitmap::OptBitmapBuilder;
6
use crate::buffer::Buffer;
7
use crate::datatypes::ArrowDataType;
8
use crate::offset::{Offset, Offsets, OffsetsBuffer};
9
10
pub struct BinaryArrayBuilder<O: Offset> {
11
dtype: ArrowDataType,
12
offsets: Offsets<O>,
13
values: Vec<u8>,
14
validity: OptBitmapBuilder,
15
}
16
17
impl<O: Offset> BinaryArrayBuilder<O> {
18
pub fn new(dtype: ArrowDataType) -> Self {
19
Self {
20
dtype,
21
offsets: Offsets::new(),
22
values: Vec::new(),
23
validity: OptBitmapBuilder::default(),
24
}
25
}
26
}
27
28
impl<O: Offset> StaticArrayBuilder for BinaryArrayBuilder<O> {
29
type Array = BinaryArray<O>;
30
31
fn dtype(&self) -> &ArrowDataType {
32
&self.dtype
33
}
34
35
fn reserve(&mut self, additional: usize) {
36
self.offsets.reserve(additional);
37
self.validity.reserve(additional);
38
// No values reserve, we have no idea how large it needs to be.
39
}
40
41
fn freeze(self) -> BinaryArray<O> {
42
let offsets = OffsetsBuffer::from(self.offsets);
43
let values = Buffer::from(self.values);
44
let validity = self.validity.into_opt_validity();
45
BinaryArray::new(self.dtype, offsets, values, validity)
46
}
47
48
fn freeze_reset(&mut self) -> Self::Array {
49
let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets));
50
let values = Buffer::from(core::mem::take(&mut self.values));
51
let validity = core::mem::take(&mut self.validity).into_opt_validity();
52
BinaryArray::new(self.dtype.clone(), offsets, values, validity)
53
}
54
55
fn len(&self) -> usize {
56
self.offsets.len_proxy()
57
}
58
59
fn extend_nulls(&mut self, length: usize) {
60
self.offsets.extend_constant(length);
61
self.validity.extend_constant(length, false);
62
}
63
64
fn subslice_extend(
65
&mut self,
66
other: &BinaryArray<O>,
67
start: usize,
68
length: usize,
69
_share: ShareStrategy,
70
) {
71
let start_offset = other.offsets()[start].to_usize();
72
let stop_offset = other.offsets()[start + length].to_usize();
73
self.offsets
74
.try_extend_from_slice(other.offsets(), start, length)
75
.unwrap();
76
self.values
77
.extend_from_slice(&other.values()[start_offset..stop_offset]);
78
self.validity
79
.subslice_extend_from_opt_validity(other.validity(), start, length);
80
}
81
82
fn subslice_extend_each_repeated(
83
&mut self,
84
other: &BinaryArray<O>,
85
start: usize,
86
length: usize,
87
repeats: usize,
88
_share: ShareStrategy,
89
) {
90
let other_offsets = other.offsets();
91
let other_values = &**other.values();
92
93
let start_offset = other.offsets()[start].to_usize();
94
let stop_offset = other.offsets()[start + length].to_usize();
95
self.offsets.reserve(length * repeats);
96
self.values.reserve((stop_offset - start_offset) * repeats);
97
for offset_idx in start..start + length {
98
let substring_start = other_offsets[offset_idx].to_usize();
99
let substring_stop = other_offsets[offset_idx + 1].to_usize();
100
for _ in 0..repeats {
101
self.offsets
102
.try_push(substring_stop - substring_start)
103
.unwrap();
104
self.values
105
.extend_from_slice(&other_values[substring_start..substring_stop]);
106
}
107
}
108
self.validity
109
.subslice_extend_each_repeated_from_opt_validity(
110
other.validity(),
111
start,
112
length,
113
repeats,
114
);
115
}
116
117
unsafe fn gather_extend(
118
&mut self,
119
other: &BinaryArray<O>,
120
idxs: &[IdxSize],
121
_share: ShareStrategy,
122
) {
123
let other_values = &**other.values();
124
let other_offsets = other.offsets();
125
126
// Pre-compute proper length for reserve.
127
let total_len: usize = idxs
128
.iter()
129
.map(|i| {
130
let start_offset = other_offsets.get_unchecked(*i as usize).to_usize();
131
let stop_offset = other_offsets.get_unchecked(*i as usize + 1).to_usize();
132
stop_offset - start_offset
133
})
134
.sum();
135
self.values.reserve(total_len);
136
137
for idx in idxs {
138
let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
139
let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
140
self.values
141
.extend_from_slice(other_values.get_unchecked(start_offset..stop_offset));
142
}
143
144
self.validity
145
.gather_extend_from_opt_validity(other.validity(), idxs);
146
}
147
148
fn opt_gather_extend(
149
&mut self,
150
other: &BinaryArray<O>,
151
idxs: &[IdxSize],
152
_share: ShareStrategy,
153
) {
154
let other_values = &**other.values();
155
let other_offsets = other.offsets();
156
157
unsafe {
158
// Pre-compute proper length for reserve.
159
let total_len: usize = idxs
160
.iter()
161
.map(|idx| {
162
if (*idx as usize) < other.len() {
163
let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
164
let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
165
stop_offset - start_offset
166
} else {
167
0
168
}
169
})
170
.sum();
171
self.values.reserve(total_len);
172
173
for idx in idxs {
174
let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
175
let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
176
self.values
177
.extend_from_slice(other_values.get_unchecked(start_offset..stop_offset));
178
}
179
180
self.validity
181
.opt_gather_extend_from_opt_validity(other.validity(), idxs, other.len());
182
}
183
}
184
}
185
186