Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/chunked_array/from.rs
6940 views
1
use super::*;
2
3
#[allow(clippy::all)]
4
fn from_chunks_list_dtype(chunks: &mut Vec<ArrayRef>, dtype: DataType) -> DataType {
5
// ensure we don't get List<null>
6
if let Some(arr) = chunks.get(0) {
7
DataType::from_arrow_dtype(arr.dtype())
8
} else {
9
dtype
10
}
11
}
12
13
impl<T, A> From<A> for ChunkedArray<T>
14
where
15
T: PolarsDataType<Array = A>,
16
A: Array,
17
{
18
fn from(arr: A) -> Self {
19
Self::with_chunk(PlSmallStr::EMPTY, arr)
20
}
21
}
22
23
impl<T> ChunkedArray<T>
24
where
25
T: PolarsDataType,
26
{
27
pub fn with_chunk<A>(name: PlSmallStr, arr: A) -> Self
28
where
29
A: Array,
30
T: PolarsDataType<Array = A>,
31
{
32
unsafe { Self::from_chunks(name, vec![Box::new(arr)]) }
33
}
34
35
pub fn with_chunk_like<A>(ca: &Self, arr: A) -> Self
36
where
37
A: Array,
38
T: PolarsDataType<Array = A>,
39
{
40
Self::from_chunk_iter_like(ca, std::iter::once(arr))
41
}
42
43
pub fn from_chunk_iter<I>(name: PlSmallStr, iter: I) -> Self
44
where
45
I: IntoIterator,
46
T: PolarsDataType<Array = <I as IntoIterator>::Item>,
47
<I as IntoIterator>::Item: Array,
48
{
49
let chunks = iter
50
.into_iter()
51
.map(|x| Box::new(x) as Box<dyn Array>)
52
.collect();
53
unsafe { Self::from_chunks(name, chunks) }
54
}
55
56
pub fn from_chunk_iter_like<I>(ca: &Self, iter: I) -> Self
57
where
58
I: IntoIterator,
59
T: PolarsDataType<Array = <I as IntoIterator>::Item>,
60
<I as IntoIterator>::Item: Array,
61
{
62
let chunks = iter
63
.into_iter()
64
.map(|x| Box::new(x) as Box<dyn Array>)
65
.collect();
66
unsafe {
67
Self::from_chunks_and_dtype_unchecked(ca.name().clone(), chunks, ca.dtype().clone())
68
}
69
}
70
71
pub fn try_from_chunk_iter<I, A, E>(name: PlSmallStr, iter: I) -> Result<Self, E>
72
where
73
I: IntoIterator<Item = Result<A, E>>,
74
T: PolarsDataType<Array = A>,
75
A: Array,
76
{
77
let chunks: Result<_, _> = iter
78
.into_iter()
79
.map(|x| Ok(Box::new(x?) as Box<dyn Array>))
80
.collect();
81
unsafe { Ok(Self::from_chunks(name, chunks?)) }
82
}
83
84
pub(crate) fn from_chunk_iter_and_field<I>(field: Arc<Field>, chunks: I) -> Self
85
where
86
I: IntoIterator,
87
T: PolarsDataType<Array = <I as IntoIterator>::Item>,
88
<I as IntoIterator>::Item: Array,
89
{
90
assert_eq!(
91
std::mem::discriminant(&T::get_static_dtype()),
92
std::mem::discriminant(&field.dtype)
93
);
94
95
let mut length = 0;
96
let mut null_count = 0;
97
let chunks = chunks
98
.into_iter()
99
.map(|x| {
100
length += x.len();
101
null_count += x.null_count();
102
Box::new(x) as Box<dyn Array>
103
})
104
.collect();
105
106
unsafe { ChunkedArray::new_with_dims(field, chunks, length, null_count) }
107
}
108
109
/// Create a new [`ChunkedArray`] from existing chunks.
110
///
111
/// # Safety
112
/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
113
pub unsafe fn from_chunks(name: PlSmallStr, mut chunks: Vec<ArrayRef>) -> Self {
114
let dtype = match T::get_static_dtype() {
115
dtype @ DataType::List(_) => from_chunks_list_dtype(&mut chunks, dtype),
116
#[cfg(feature = "dtype-array")]
117
dtype @ DataType::Array(_, _) => from_chunks_list_dtype(&mut chunks, dtype),
118
#[cfg(feature = "dtype-struct")]
119
dtype @ DataType::Struct(_) => from_chunks_list_dtype(&mut chunks, dtype),
120
dt => dt,
121
};
122
Self::from_chunks_and_dtype(name, chunks, dtype)
123
}
124
125
/// # Safety
126
/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
127
pub unsafe fn with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
128
ChunkedArray::new_with_compute_len(self.field.clone(), chunks)
129
}
130
131
/// Create a new [`ChunkedArray`] from existing chunks.
132
///
133
/// # Safety
134
///
135
/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
136
pub unsafe fn from_chunks_and_dtype(
137
name: PlSmallStr,
138
chunks: Vec<ArrayRef>,
139
dtype: DataType,
140
) -> Self {
141
// assertions in debug mode
142
// that check if the data types in the arrays are as expected
143
#[cfg(debug_assertions)]
144
{
145
if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() {
146
assert_eq!(chunks[0].dtype(), &dtype.to_arrow(CompatLevel::newest()))
147
}
148
}
149
150
Self::from_chunks_and_dtype_unchecked(name, chunks, dtype)
151
}
152
153
/// Create a new [`ChunkedArray`] from existing chunks.
154
///
155
/// # Safety
156
///
157
/// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
158
pub(crate) unsafe fn from_chunks_and_dtype_unchecked(
159
name: PlSmallStr,
160
chunks: Vec<ArrayRef>,
161
dtype: DataType,
162
) -> Self {
163
let field = Arc::new(Field::new(name, dtype));
164
ChunkedArray::new_with_compute_len(field, chunks)
165
}
166
167
pub fn full_null_like(ca: &Self, length: usize) -> Self {
168
let chunks = std::iter::once(T::Array::full_null(
169
length,
170
ca.dtype().to_arrow(CompatLevel::newest()),
171
));
172
Self::from_chunk_iter_like(ca, chunks)
173
}
174
}
175
176
impl<T> ChunkedArray<T>
177
where
178
T: PolarsNumericType,
179
{
180
/// Create a new ChunkedArray by taking ownership of the Vec. This operation is zero copy.
181
pub fn from_vec(name: PlSmallStr, v: Vec<T::Native>) -> Self {
182
Self::with_chunk(name, to_primitive::<T>(v, None))
183
}
184
185
/// Create a new ChunkedArray from a Vec and a validity mask.
186
pub fn from_vec_validity(
187
name: PlSmallStr,
188
values: Vec<T::Native>,
189
buffer: Option<Bitmap>,
190
) -> Self {
191
let arr = to_array::<T>(values, buffer);
192
ChunkedArray::new_with_compute_len(
193
Arc::new(Field::new(name, T::get_static_dtype())),
194
vec![arr],
195
)
196
}
197
198
/// Create a temporary [`ChunkedArray`] from a slice.
199
///
200
/// # Safety
201
/// The lifetime will be bound to the lifetime of the slice.
202
/// This will not be checked by the borrowchecker.
203
pub unsafe fn mmap_slice(name: PlSmallStr, values: &[T::Native]) -> Self {
204
Self::with_chunk(name, arrow::ffi::mmap::slice(values))
205
}
206
}
207
208
impl BooleanChunked {
209
/// Create a temporary [`ChunkedArray`] from a slice.
210
///
211
/// # Safety
212
/// The lifetime will be bound to the lifetime of the slice.
213
/// This will not be checked by the borrowchecker.
214
pub unsafe fn mmap_slice(name: PlSmallStr, values: &[u8], offset: usize, len: usize) -> Self {
215
let arr = arrow::ffi::mmap::bitmap(values, offset, len).unwrap();
216
Self::with_chunk(name, arr)
217
}
218
219
pub fn from_bitmap(name: PlSmallStr, bitmap: Bitmap) -> Self {
220
Self::with_chunk(
221
name,
222
BooleanArray::new(ArrowDataType::Boolean, bitmap, None),
223
)
224
}
225
}
226
227
impl<'a, T> From<&'a ChunkedArray<T>> for Vec<Option<T::Physical<'a>>>
228
where
229
T: PolarsDataType,
230
{
231
fn from(ca: &'a ChunkedArray<T>) -> Self {
232
let mut out = Vec::with_capacity(ca.len());
233
for arr in ca.downcast_iter() {
234
out.extend(arr.iter())
235
}
236
out
237
}
238
}
239
impl From<StringChunked> for Vec<Option<String>> {
240
fn from(ca: StringChunked) -> Self {
241
ca.iter().map(|opt| opt.map(|s| s.to_string())).collect()
242
}
243
}
244
245
impl From<BooleanChunked> for Vec<Option<bool>> {
246
fn from(ca: BooleanChunked) -> Self {
247
let mut out = Vec::with_capacity(ca.len());
248
for arr in ca.downcast_iter() {
249
out.extend(arr.iter())
250
}
251
out
252
}
253
}
254
255