Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/ffi/mmap.rs
8446 views
1
//! Functionality to mmap in-memory data regions.
2
use std::sync::Arc;
3
4
use polars_buffer::{Buffer, SharedStorage};
5
use polars_error::{PolarsResult, polars_bail};
6
7
use super::{ArrowArray, InternalArrowArray};
8
use crate::array::{BooleanArray, FromFfi, PrimitiveArray};
9
use crate::bitmap::Bitmap;
10
use crate::datatypes::ArrowDataType;
11
use crate::types::NativeType;
12
13
#[allow(dead_code)]
14
struct PrivateData<T> {
15
// the owner of the pointers' regions
16
data: T,
17
buffers_ptr: Box<[*const std::os::raw::c_void]>,
18
children_ptr: Box<[*mut ArrowArray]>,
19
dictionary_ptr: Option<*mut ArrowArray>,
20
}
21
22
pub(crate) unsafe fn create_array<
23
O: Send + 'static,
24
I: Iterator<Item = Option<*const u8>>,
25
II: Iterator<Item = ArrowArray>,
26
>(
27
data: Arc<O>,
28
num_rows: usize,
29
null_count: usize,
30
buffers: I,
31
children: II,
32
dictionary: Option<ArrowArray>,
33
offset: Option<usize>,
34
) -> ArrowArray {
35
let buffers_ptr = buffers
36
.map(|maybe_buffer| match maybe_buffer {
37
Some(b) => b as *const std::os::raw::c_void,
38
None => std::ptr::null(),
39
})
40
.collect::<Box<[_]>>();
41
let n_buffers = buffers_ptr.len() as i64;
42
43
let children_ptr = children
44
.map(|child| Box::into_raw(Box::new(child)))
45
.collect::<Box<_>>();
46
let n_children = children_ptr.len() as i64;
47
48
let dictionary_ptr = dictionary.map(|array| Box::into_raw(Box::new(array)));
49
50
let mut private_data = Box::new(PrivateData::<Arc<O>> {
51
data,
52
buffers_ptr,
53
children_ptr,
54
dictionary_ptr,
55
});
56
57
ArrowArray {
58
length: num_rows as i64,
59
null_count: null_count as i64,
60
offset: offset.unwrap_or(0) as i64, // Unwrap: IPC files are by definition not offset
61
n_buffers,
62
n_children,
63
buffers: private_data.buffers_ptr.as_mut_ptr(),
64
children: private_data.children_ptr.as_mut_ptr(),
65
dictionary: private_data.dictionary_ptr.unwrap_or(std::ptr::null_mut()),
66
release: Some(release::<Arc<O>>),
67
private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
68
}
69
}
70
71
/// callback used to drop [`ArrowArray`] when it is exported specified for [`PrivateData`].
72
unsafe extern "C" fn release<T>(array: *mut ArrowArray) {
73
if array.is_null() {
74
return;
75
}
76
let array = &mut *array;
77
78
// take ownership of `private_data`, therefore dropping it
79
let private = Box::from_raw(array.private_data as *mut PrivateData<T>);
80
for child in private.children_ptr.iter() {
81
let _ = Box::from_raw(*child);
82
}
83
84
if let Some(ptr) = private.dictionary_ptr {
85
let _ = Box::from_raw(ptr);
86
}
87
88
array.release = None;
89
}
90
91
/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
92
/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
93
///
94
/// This can be useful if you want to apply arrow kernels on slices without incurring
95
/// a memcopy cost.
96
///
97
/// # Safety
98
///
99
/// Using this function is not unsafe, but the returned PrimitiveArray's lifetime is bound to the lifetime
100
/// of the slice. The returned [`PrimitiveArray`] _must not_ outlive the passed slice.
101
pub unsafe fn slice<T: NativeType>(values: &[T]) -> PrimitiveArray<T> {
102
let static_values = std::mem::transmute::<&[T], &'static [T]>(values);
103
let storage = SharedStorage::from_static(static_values);
104
let buffer = Buffer::from_storage(storage);
105
PrimitiveArray::new_unchecked(T::PRIMITIVE.into(), buffer, None)
106
}
107
108
/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
109
/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
110
///
111
/// This can be useful if you want to apply arrow kernels on slices without incurring
112
/// a memcopy cost.
113
///
114
/// # Safety
115
///
116
/// The caller must ensure the passed `owner` ensures the data remains alive.
117
pub unsafe fn slice_and_owner<T: NativeType, O: Send + 'static>(
118
slice: &[T],
119
owner: O,
120
) -> PrimitiveArray<T> {
121
let num_rows = slice.len();
122
let null_count = 0;
123
let validity = None;
124
125
let data: &[u8] = bytemuck::cast_slice(slice);
126
let ptr = data.as_ptr();
127
let data = Arc::new(owner);
128
129
// SAFETY: the underlying assumption of this function: the array will not be used
130
// beyond the
131
let array = create_array(
132
data,
133
num_rows,
134
null_count,
135
[validity, Some(ptr)].into_iter(),
136
[].into_iter(),
137
None,
138
None,
139
);
140
let array = InternalArrowArray::new(array, T::PRIMITIVE.into());
141
142
// SAFETY: we just created a valid array
143
unsafe { PrimitiveArray::<T>::try_from_ffi(array) }.unwrap()
144
}
145
146
/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
147
/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
148
///
149
/// This can be useful if you want to apply arrow kernels on slices without
150
/// incurring a memcopy cost.
151
///
152
/// The `offset` indicates where the first bit starts in the first byte.
153
///
154
/// # Safety
155
///
156
/// Using this function is not unsafe, but the returned BooleanArrays's lifetime
157
/// is bound to the lifetime of the slice. The returned [`BooleanArray`] _must
158
/// not_ outlive the passed slice.
159
pub unsafe fn bitmap(data: &[u8], offset: usize, length: usize) -> PolarsResult<BooleanArray> {
160
if offset >= 8 {
161
polars_bail!(InvalidOperation: "offset should be < 8")
162
};
163
if length > data.len() * 8 - offset {
164
polars_bail!(InvalidOperation: "given length is oob")
165
}
166
let static_data = std::mem::transmute::<&[u8], &'static [u8]>(data);
167
let storage = SharedStorage::from_static(static_data);
168
let bitmap = Bitmap::from_inner_unchecked(storage, offset, length, None);
169
Ok(BooleanArray::new(ArrowDataType::Boolean, bitmap, None))
170
}
171
172
/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
173
/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
174
///
175
/// This can be useful if you want to apply arrow kernels on slices without
176
/// incurring a memcopy cost.
177
///
178
/// The `offset` indicates where the first bit starts in the first byte.
179
///
180
/// # Safety
181
///
182
/// The caller must ensure the passed `owner` ensures the data remains alive.
183
pub unsafe fn bitmap_and_owner<O: Send + 'static>(
184
data: &[u8],
185
offset: usize,
186
length: usize,
187
owner: O,
188
) -> PolarsResult<BooleanArray> {
189
if offset >= 8 {
190
polars_bail!(InvalidOperation: "offset should be < 8")
191
};
192
if length > data.len() * 8 - offset {
193
polars_bail!(InvalidOperation: "given length is oob")
194
}
195
let null_count = 0;
196
let validity = None;
197
198
let ptr = data.as_ptr();
199
let data = Arc::new(owner);
200
201
// SAFETY: the underlying assumption of this function: the array will not be used
202
// beyond the
203
let array = create_array(
204
data,
205
length,
206
null_count,
207
[validity, Some(ptr)].into_iter(),
208
[].into_iter(),
209
None,
210
Some(offset),
211
);
212
let array = InternalArrowArray::new(array, ArrowDataType::Boolean);
213
214
// SAFETY: we just created a valid array
215
Ok(unsafe { BooleanArray::try_from_ffi(array) }.unwrap())
216
}
217
218