Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/builder.rs
6940 views
1
use arrow::array::builder::{ArrayBuilder, ShareStrategy, make_builder};
2
use polars_utils::IdxSize;
3
4
#[cfg(feature = "object")]
5
use crate::chunked_array::object::registry::get_object_builder;
6
use crate::prelude::*;
7
use crate::utils::Container;
8
9
/// A type-erased wrapper around ArrayBuilder.
10
pub struct SeriesBuilder {
11
dtype: DataType,
12
builder: Box<dyn ArrayBuilder>,
13
}
14
15
impl SeriesBuilder {
16
pub fn new(dtype: DataType) -> Self {
17
// FIXME: get rid of this hack.
18
#[cfg(feature = "object")]
19
if matches!(dtype, DataType::Object(_)) {
20
let builder = get_object_builder(PlSmallStr::EMPTY, 0).as_array_builder();
21
return Self { dtype, builder };
22
}
23
24
let builder = make_builder(&dtype.to_physical().to_arrow(CompatLevel::newest()));
25
Self { dtype, builder }
26
}
27
28
#[inline(always)]
29
pub fn reserve(&mut self, additional: usize) {
30
self.builder.reserve(additional);
31
}
32
33
pub fn freeze(self, name: PlSmallStr) -> Series {
34
unsafe {
35
Series::from_chunks_and_dtype_unchecked(name, vec![self.builder.freeze()], &self.dtype)
36
}
37
}
38
39
pub fn freeze_reset(&mut self, name: PlSmallStr) -> Series {
40
unsafe {
41
Series::from_chunks_and_dtype_unchecked(
42
name,
43
vec![self.builder.freeze_reset()],
44
&self.dtype,
45
)
46
}
47
}
48
49
pub fn len(&self) -> usize {
50
self.builder.len()
51
}
52
53
pub fn is_empty(&self) -> bool {
54
self.builder.len() == 0
55
}
56
57
pub fn dtype(&self) -> &DataType {
58
&self.dtype
59
}
60
/// Extend this builder with the given number of null elements.
61
pub fn extend_nulls(&mut self, length: usize) {
62
self.builder.extend_nulls(length);
63
}
64
65
/// Extends this builder with the contents of the given series. May panic if
66
/// other does not match the dtype of this builder.
67
#[inline(always)]
68
pub fn extend(&mut self, other: &Series, share: ShareStrategy) {
69
self.subslice_extend(other, 0, other.len(), share);
70
}
71
72
/// Extends this builder with the contents of the given series subslice.
73
/// May panic if other does not match the dtype of this builder.
74
pub fn subslice_extend(
75
&mut self,
76
other: &Series,
77
mut start: usize,
78
mut length: usize,
79
share: ShareStrategy,
80
) {
81
if length == 0 || other.is_empty() {
82
return;
83
}
84
85
for chunk in other.chunks() {
86
if start < chunk.len() {
87
let length_in_chunk = length.min(chunk.len() - start);
88
self.builder
89
.subslice_extend(&**chunk, start, length_in_chunk, share);
90
91
start = 0;
92
length -= length_in_chunk;
93
if length == 0 {
94
break;
95
}
96
} else {
97
start -= chunk.len();
98
}
99
}
100
}
101
102
pub fn subslice_extend_repeated(
103
&mut self,
104
other: &Series,
105
start: usize,
106
length: usize,
107
repeats: usize,
108
share: ShareStrategy,
109
) {
110
if length == 0 || other.is_empty() {
111
return;
112
}
113
114
let chunks = other.chunks();
115
if chunks.len() == 1 {
116
self.builder
117
.subslice_extend_repeated(&*chunks[0], start, length, repeats, share);
118
} else {
119
for _ in 0..repeats {
120
self.subslice_extend(other, start, length, share);
121
}
122
}
123
}
124
125
pub fn subslice_extend_each_repeated(
126
&mut self,
127
other: &Series,
128
mut start: usize,
129
mut length: usize,
130
repeats: usize,
131
share: ShareStrategy,
132
) {
133
if length == 0 || repeats == 0 || other.is_empty() {
134
return;
135
}
136
137
for chunk in other.chunks() {
138
if start < chunk.len() {
139
let length_in_chunk = length.min(chunk.len() - start);
140
self.builder.subslice_extend_each_repeated(
141
&**chunk,
142
start,
143
length_in_chunk,
144
repeats,
145
share,
146
);
147
148
start = 0;
149
length -= length_in_chunk;
150
if length == 0 {
151
break;
152
}
153
} else {
154
start -= chunk.len();
155
}
156
}
157
}
158
159
/// Extends this builder with the contents of the given series at the given
160
/// indices. That is, `other[idxs[i]]` is appended to this builder in order,
161
/// for each i=0..idxs.len(). May panic if other does not match the dtype
162
/// of this builder, or if the other series is not rechunked.
163
///
164
/// # Safety
165
/// The indices must be in-bounds.
166
pub unsafe fn gather_extend(&mut self, other: &Series, idxs: &[IdxSize], share: ShareStrategy) {
167
let chunks = other.chunks();
168
assert!(chunks.len() == 1);
169
self.builder.gather_extend(&*chunks[0], idxs, share);
170
}
171
172
pub fn opt_gather_extend(&mut self, other: &Series, idxs: &[IdxSize], share: ShareStrategy) {
173
let chunks = other.chunks();
174
assert!(chunks.len() == 1);
175
self.builder.opt_gather_extend(&*chunks[0], idxs, share);
176
}
177
178
pub fn push_any_value(&mut self, value: AnyValue<'static>) {
179
// @PERF
180
self.extend(
181
&Scalar::new(self.dtype.clone(), value).into_series(PlSmallStr::EMPTY),
182
ShareStrategy::Always,
183
);
184
}
185
}
186
187