Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/gather/generic_binary.rs
6939 views
1
use arrow::array::{GenericBinaryArray, PrimitiveArray};
2
use arrow::bitmap::{Bitmap, BitmapBuilder};
3
use arrow::buffer::Buffer;
4
use arrow::offset::{Offset, Offsets, OffsetsBuffer};
5
use polars_utils::vec::{CapacityByFactor, PushUnchecked};
6
7
use super::Index;
8
9
fn create_offsets<I: Iterator<Item = usize>, O: Offset>(
10
lengths: I,
11
idx_len: usize,
12
) -> OffsetsBuffer<O> {
13
let mut length_so_far = O::default();
14
let mut offsets = Vec::with_capacity(idx_len + 1);
15
offsets.push(length_so_far);
16
17
for len in lengths {
18
unsafe {
19
length_so_far += O::from_usize(len).unwrap_unchecked();
20
offsets.push_unchecked(length_so_far)
21
};
22
}
23
unsafe { Offsets::new_unchecked(offsets).into() }
24
}
25
26
pub(super) unsafe fn take_values<O: Offset>(
27
length: O,
28
starts: &[O],
29
offsets: &OffsetsBuffer<O>,
30
values: &[u8],
31
) -> Buffer<u8> {
32
let new_len = length.to_usize();
33
let mut buffer = Vec::with_capacity(new_len);
34
starts
35
.iter()
36
.map(|start| start.to_usize())
37
.zip(offsets.lengths())
38
.for_each(|(start, length)| {
39
let end = start + length;
40
buffer.extend_from_slice(values.get_unchecked(start..end));
41
});
42
buffer.into()
43
}
44
45
// take implementation when neither values nor indices contain nulls
46
pub(super) unsafe fn take_no_validity_unchecked<O: Offset, I: Index>(
47
offsets: &OffsetsBuffer<O>,
48
values: &[u8],
49
indices: &[I],
50
) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
51
let values_len = offsets.last().to_usize();
52
let fraction_estimate = indices.len() as f64 / offsets.len() as f64 + 0.3;
53
let mut buffer = Vec::<u8>::with_capacity_by_factor(values_len, fraction_estimate);
54
55
let lengths = indices.iter().map(|index| index.to_usize()).map(|index| {
56
let (start, end) = offsets.start_end_unchecked(index);
57
buffer.extend_from_slice(values.get_unchecked(start..end));
58
end - start
59
});
60
let offsets = create_offsets(lengths, indices.len());
61
62
(offsets, buffer.into(), None)
63
}
64
65
// take implementation when only values contain nulls
66
pub(super) unsafe fn take_values_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(
67
values: &A,
68
indices: &[I],
69
) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
70
let validity_values = values.validity().unwrap();
71
let validity = indices
72
.iter()
73
.map(|index| validity_values.get_bit_unchecked(index.to_usize()));
74
let validity = Bitmap::from_trusted_len_iter(validity);
75
76
let mut total_length = O::default();
77
78
let offsets = values.offsets();
79
let values_values = values.values();
80
81
let mut starts = Vec::<O>::with_capacity(indices.len());
82
let lengths = indices.iter().map(|index| {
83
let index = index.to_usize();
84
let start = *offsets.get_unchecked(index);
85
let length = *offsets.get_unchecked(index + 1) - start;
86
total_length += length;
87
starts.push_unchecked(start);
88
length.to_usize()
89
});
90
let offsets = create_offsets(lengths, indices.len());
91
let buffer = take_values(total_length, starts.as_slice(), &offsets, values_values);
92
93
(offsets, buffer, validity.into())
94
}
95
96
// take implementation when only indices contain nulls
97
pub(super) unsafe fn take_indices_validity<O: Offset, I: Index>(
98
offsets: &OffsetsBuffer<O>,
99
values: &[u8],
100
indices: &PrimitiveArray<I>,
101
) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
102
let mut total_length = O::default();
103
104
let offsets = offsets.buffer();
105
106
let mut starts = Vec::<O>::with_capacity(indices.len());
107
let lengths = indices.values().iter().map(|index| {
108
let index = index.to_usize();
109
let length;
110
match offsets.get(index + 1) {
111
Some(&next) => {
112
let start = *offsets.get_unchecked(index);
113
length = next - start;
114
total_length += length;
115
starts.push_unchecked(start);
116
},
117
None => {
118
length = O::zero();
119
starts.push_unchecked(O::default());
120
},
121
};
122
length.to_usize()
123
});
124
let offsets = create_offsets(lengths, indices.len());
125
126
let buffer = take_values(total_length, &starts, &offsets, values);
127
128
(offsets, buffer, indices.validity().cloned())
129
}
130
131
// take implementation when both indices and values contain nulls
132
pub(super) unsafe fn take_values_indices_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(
133
values: &A,
134
indices: &PrimitiveArray<I>,
135
) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
136
let mut total_length = O::default();
137
let mut validity = BitmapBuilder::with_capacity(indices.len());
138
139
let values_validity = values.validity().unwrap();
140
let offsets = values.offsets();
141
let values_values = values.values();
142
143
let mut starts = Vec::<O>::with_capacity(indices.len());
144
let lengths = indices.iter().map(|index| {
145
let length;
146
match index {
147
Some(index) => {
148
let index = index.to_usize();
149
if values_validity.get_bit(index) {
150
validity.push(true);
151
length = *offsets.get_unchecked(index + 1) - *offsets.get_unchecked(index);
152
starts.push_unchecked(*offsets.get_unchecked(index));
153
} else {
154
validity.push(false);
155
length = O::zero();
156
starts.push_unchecked(O::default());
157
}
158
},
159
None => {
160
validity.push(false);
161
length = O::zero();
162
starts.push_unchecked(O::default());
163
},
164
};
165
total_length += length;
166
length.to_usize()
167
});
168
let offsets = create_offsets(lengths, indices.len());
169
170
let buffer = take_values(total_length, &starts, &offsets, values_values);
171
172
(offsets, buffer, validity.into_opt_validity())
173
}
174
175