Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/ffi/mmap.rs
6939 views
1
//! Functionality to mmap in-memory data regions.
2
use std::sync::Arc;
3
4
use polars_error::{PolarsResult, polars_bail};
5
6
use super::{ArrowArray, InternalArrowArray};
7
use crate::array::{BooleanArray, FromFfi, PrimitiveArray};
8
use crate::bitmap::Bitmap;
9
use crate::buffer::Buffer;
10
use crate::datatypes::ArrowDataType;
11
use crate::storage::SharedStorage;
12
use crate::types::NativeType;
13
14
#[allow(dead_code)]
15
struct PrivateData<T> {
16
// the owner of the pointers' regions
17
data: T,
18
buffers_ptr: Box<[*const std::os::raw::c_void]>,
19
children_ptr: Box<[*mut ArrowArray]>,
20
dictionary_ptr: Option<*mut ArrowArray>,
21
}
22
23
pub(crate) unsafe fn create_array<
24
T,
25
I: Iterator<Item = Option<*const u8>>,
26
II: Iterator<Item = ArrowArray>,
27
>(
28
data: Arc<T>,
29
num_rows: usize,
30
null_count: usize,
31
buffers: I,
32
children: II,
33
dictionary: Option<ArrowArray>,
34
offset: Option<usize>,
35
) -> ArrowArray {
36
let buffers_ptr = buffers
37
.map(|maybe_buffer| match maybe_buffer {
38
Some(b) => b as *const std::os::raw::c_void,
39
None => std::ptr::null(),
40
})
41
.collect::<Box<[_]>>();
42
let n_buffers = buffers_ptr.len() as i64;
43
44
let children_ptr = children
45
.map(|child| Box::into_raw(Box::new(child)))
46
.collect::<Box<_>>();
47
let n_children = children_ptr.len() as i64;
48
49
let dictionary_ptr = dictionary.map(|array| Box::into_raw(Box::new(array)));
50
51
let mut private_data = Box::new(PrivateData::<Arc<T>> {
52
data,
53
buffers_ptr,
54
children_ptr,
55
dictionary_ptr,
56
});
57
58
ArrowArray {
59
length: num_rows as i64,
60
null_count: null_count as i64,
61
offset: offset.unwrap_or(0) as i64, // Unwrap: IPC files are by definition not offset
62
n_buffers,
63
n_children,
64
buffers: private_data.buffers_ptr.as_mut_ptr(),
65
children: private_data.children_ptr.as_mut_ptr(),
66
dictionary: private_data.dictionary_ptr.unwrap_or(std::ptr::null_mut()),
67
release: Some(release::<Arc<T>>),
68
private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
69
}
70
}
71
72
/// callback used to drop [`ArrowArray`] when it is exported specified for [`PrivateData`].
73
unsafe extern "C" fn release<T>(array: *mut ArrowArray) {
74
if array.is_null() {
75
return;
76
}
77
let array = &mut *array;
78
79
// take ownership of `private_data`, therefore dropping it
80
let private = Box::from_raw(array.private_data as *mut PrivateData<T>);
81
for child in private.children_ptr.iter() {
82
let _ = Box::from_raw(*child);
83
}
84
85
if let Some(ptr) = private.dictionary_ptr {
86
let _ = Box::from_raw(ptr);
87
}
88
89
array.release = None;
90
}
91
92
/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
93
/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
94
///
95
/// This can be useful if you want to apply arrow kernels on slices without incurring
96
/// a memcopy cost.
97
///
98
/// # Safety
99
///
100
/// Using this function is not unsafe, but the returned PrimitiveArray's lifetime is bound to the lifetime
101
/// of the slice. The returned [`PrimitiveArray`] _must not_ outlive the passed slice.
102
pub unsafe fn slice<T: NativeType>(values: &[T]) -> PrimitiveArray<T> {
103
let static_values = std::mem::transmute::<&[T], &'static [T]>(values);
104
let storage = SharedStorage::from_static(static_values);
105
let buffer = Buffer::from_storage(storage);
106
PrimitiveArray::new_unchecked(T::PRIMITIVE.into(), buffer, None)
107
}
108
109
/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
110
/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
111
///
112
/// This can be useful if you want to apply arrow kernels on slices without incurring
113
/// a memcopy cost.
114
///
115
/// # Safety
116
///
117
/// The caller must ensure the passed `owner` ensures the data remains alive.
118
pub unsafe fn slice_and_owner<T: NativeType, O>(slice: &[T], owner: O) -> PrimitiveArray<T> {
119
let num_rows = slice.len();
120
let null_count = 0;
121
let validity = None;
122
123
let data: &[u8] = bytemuck::cast_slice(slice);
124
let ptr = data.as_ptr();
125
let data = Arc::new(owner);
126
127
// SAFETY: the underlying assumption of this function: the array will not be used
128
// beyond the
129
let array = create_array(
130
data,
131
num_rows,
132
null_count,
133
[validity, Some(ptr)].into_iter(),
134
[].into_iter(),
135
None,
136
None,
137
);
138
let array = InternalArrowArray::new(array, T::PRIMITIVE.into());
139
140
// SAFETY: we just created a valid array
141
unsafe { PrimitiveArray::<T>::try_from_ffi(array) }.unwrap()
142
}
143
144
/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
145
/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
146
///
147
/// This can be useful if you want to apply arrow kernels on slices without
148
/// incurring a memcopy cost.
149
///
150
/// The `offset` indicates where the first bit starts in the first byte.
151
///
152
/// # Safety
153
///
154
/// Using this function is not unsafe, but the returned BooleanArrays's lifetime
155
/// is bound to the lifetime of the slice. The returned [`BooleanArray`] _must
156
/// not_ outlive the passed slice.
157
pub unsafe fn bitmap(data: &[u8], offset: usize, length: usize) -> PolarsResult<BooleanArray> {
158
if offset >= 8 {
159
polars_bail!(InvalidOperation: "offset should be < 8")
160
};
161
if length > data.len() * 8 - offset {
162
polars_bail!(InvalidOperation: "given length is oob")
163
}
164
let static_data = std::mem::transmute::<&[u8], &'static [u8]>(data);
165
let storage = SharedStorage::from_static(static_data);
166
let bitmap = Bitmap::from_inner_unchecked(storage, offset, length, None);
167
Ok(BooleanArray::new(ArrowDataType::Boolean, bitmap, None))
168
}
169
170
/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
171
/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
172
///
173
/// This can be useful if you want to apply arrow kernels on slices without
174
/// incurring a memcopy cost.
175
///
176
/// The `offset` indicates where the first bit starts in the first byte.
177
///
178
/// # Safety
179
///
180
/// The caller must ensure the passed `owner` ensures the data remains alive.
181
pub unsafe fn bitmap_and_owner<O>(
182
data: &[u8],
183
offset: usize,
184
length: usize,
185
owner: O,
186
) -> PolarsResult<BooleanArray> {
187
if offset >= 8 {
188
polars_bail!(InvalidOperation: "offset should be < 8")
189
};
190
if length > data.len() * 8 - offset {
191
polars_bail!(InvalidOperation: "given length is oob")
192
}
193
let null_count = 0;
194
let validity = None;
195
196
let ptr = data.as_ptr();
197
let data = Arc::new(owner);
198
199
// SAFETY: the underlying assumption of this function: the array will not be used
200
// beyond the
201
let array = create_array(
202
data,
203
length,
204
null_count,
205
[validity, Some(ptr)].into_iter(),
206
[].into_iter(),
207
None,
208
Some(offset),
209
);
210
let array = InternalArrowArray::new(array, ArrowDataType::Boolean);
211
212
// SAFETY: we just created a valid array
213
Ok(unsafe { BooleanArray::try_from_ffi(array) }.unwrap())
214
}
215
216