Path: blob/main/crates/polars-arrow/src/buffer/immutable.rs
6939 views
#![allow(unsafe_op_in_unsafe_fn)]1use std::ops::Deref;23use bytemuck::{Pod, Zeroable};4use either::Either;56use super::IntoIter;7use crate::array::{ArrayAccessor, Splitable};8use crate::storage::SharedStorage;910/// [`Buffer`] is a contiguous memory region that can be shared across11/// thread boundaries.12///13/// The easiest way to think about [`Buffer<T>`] is being equivalent to14/// a `Arc<Vec<T>>`, with the following differences:15/// * slicing and cloning is `O(1)`.16/// * it supports external allocated memory17///18/// The easiest way to create one is to use its implementation of `From<Vec<T>>`.19///20/// # Examples21/// ```22/// use polars_arrow::buffer::Buffer;23///24/// let mut buffer: Buffer<u32> = vec![1, 2, 3].into();25/// assert_eq!(buffer.as_ref(), [1, 2, 3].as_ref());26///27/// // it supports copy-on-write semantics (i.e. back to a `Vec`)28/// let vec: Vec<u32> = buffer.into_mut().right().unwrap();29/// assert_eq!(vec, vec![1, 2, 3]);30///31/// // cloning and slicing is `O(1)` (data is shared)32/// let mut buffer: Buffer<u32> = vec![1, 2, 3].into();33/// let mut sliced = buffer.clone();34/// sliced.slice(1, 1);35/// assert_eq!(sliced.as_ref(), [2].as_ref());36/// // but cloning forbids getting mut since `slice` and `buffer` now share data37/// assert_eq!(buffer.get_mut_slice(), None);38/// ```39#[derive(Clone)]40pub struct Buffer<T> {41/// The internal byte buffer.42storage: SharedStorage<T>,4344/// A pointer into the buffer where our data starts.45ptr: *const T,4647// The length of the buffer.48length: usize,49}5051unsafe impl<T: Send + Sync> Sync for Buffer<T> {}52unsafe impl<T: Send + Sync> Send for Buffer<T> {}5354impl<T: PartialEq> PartialEq for Buffer<T> {55#[inline]56fn eq(&self, other: &Self) -> bool {57self.deref() == other.deref()58}59}6061impl<T: Eq> Eq for Buffer<T> {}6263impl<T: std::hash::Hash> std::hash::Hash for Buffer<T> {64#[inline]65fn hash<H: std::hash::Hasher>(&self, state: &mut H) {66self.as_slice().hash(state);67}68}6970impl<T: std::fmt::Debug> std::fmt::Debug for Buffer<T> {71fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {72std::fmt::Debug::fmt(&**self, f)73}74}7576impl<T> Default for Buffer<T> {77#[inline]78fn default() -> Self {79Vec::new().into()80}81}8283impl<T> Buffer<T> {84/// Creates an empty [`Buffer`].85#[inline]86pub fn new() -> Self {87Self::default()88}8990/// Auxiliary method to create a new Buffer91pub fn from_storage(storage: SharedStorage<T>) -> Self {92let ptr = storage.as_ptr();93let length = storage.len();94Buffer {95storage,96ptr,97length,98}99}100101pub fn from_static(data: &'static [T]) -> Self {102Self::from_storage(SharedStorage::from_static(data))103}104105/// Returns the number of bytes in the buffer106#[inline]107pub fn len(&self) -> usize {108self.length109}110111/// Returns whether the buffer is empty.112#[inline]113pub fn is_empty(&self) -> bool {114self.length == 0115}116117/// Returns whether underlying data is sliced.118/// If sliced the [`Buffer`] is backed by119/// more data than the length of `Self`.120pub fn is_sliced(&self) -> bool {121self.storage.len() != self.length122}123124/// Expands this slice to the maximum allowed by the underlying storage.125/// Only expands towards the end, the offset isn't changed. That is, element126/// i before and after this operation refer to the same element.127pub fn expand_end_to_storage(self) -> Self {128unsafe {129let offset = self.ptr.offset_from(self.storage.as_ptr()) as usize;130Self {131ptr: self.ptr,132length: self.storage.len() - offset,133storage: self.storage,134}135}136}137138/// Returns the byte slice stored in this buffer139#[inline]140pub fn as_slice(&self) -> &[T] {141// SAFETY:142// invariant of this struct `offset + length <= data.len()`143debug_assert!(self.offset() + self.length <= self.storage.len());144unsafe { std::slice::from_raw_parts(self.ptr, self.length) }145}146147/// Returns the byte slice stored in this buffer148///149/// # Safety150/// `index` must be smaller than `len`151#[inline]152pub(super) unsafe fn get_unchecked(&self, index: usize) -> &T {153// SAFETY:154// invariant of this function155debug_assert!(index < self.length);156unsafe { &*self.ptr.add(index) }157}158159/// Returns a new [`Buffer`] that is a slice of this buffer starting at `offset`.160/// Doing so allows the same memory region to be shared between buffers.161/// # Panics162/// Panics iff `offset + length` is larger than `len`.163#[inline]164pub fn sliced(self, offset: usize, length: usize) -> Self {165assert!(166offset + length <= self.len(),167"the offset of the new Buffer cannot exceed the existing length"168);169// SAFETY: we just checked bounds170unsafe { self.sliced_unchecked(offset, length) }171}172173/// Slices this buffer starting at `offset`.174/// # Panics175/// Panics iff `offset + length` is larger than `len`.176#[inline]177pub fn slice(&mut self, offset: usize, length: usize) {178assert!(179offset + length <= self.len(),180"the offset of the new Buffer cannot exceed the existing length"181);182// SAFETY: we just checked bounds183unsafe { self.slice_unchecked(offset, length) }184}185186/// Returns a new [`Buffer`] that is a slice of this buffer starting at `offset`.187/// Doing so allows the same memory region to be shared between buffers.188///189/// # Safety190/// The caller must ensure `offset + length <= self.len()`191#[inline]192#[must_use]193pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {194debug_assert!(offset + length <= self.len());195196self.slice_unchecked(offset, length);197self198}199200/// Slices this buffer starting at `offset`.201///202/// # Safety203/// The caller must ensure `offset + length <= self.len()`204#[inline]205pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {206self.ptr = self.ptr.add(offset);207self.length = length;208}209210/// Returns a pointer to the start of the storage underlying this buffer.211#[inline]212pub(crate) fn storage_ptr(&self) -> *const T {213self.storage.as_ptr()214}215216/// Returns the start offset of this buffer within the underlying storage.217#[inline]218pub fn offset(&self) -> usize {219unsafe {220let ret = self.ptr.offset_from(self.storage.as_ptr()) as usize;221debug_assert!(ret <= self.storage.len());222ret223}224}225226/// # Safety227/// The caller must ensure that the buffer was properly initialized up to `len`.228#[inline]229pub unsafe fn set_len(&mut self, len: usize) {230self.length = len;231}232233/// Returns a mutable reference to its underlying [`Vec`], if possible.234///235/// This operation returns [`Either::Right`] iff this [`Buffer`]:236/// * has no alive clones237/// * has not been imported from the C data interface (FFI)238#[inline]239pub fn into_mut(mut self) -> Either<Self, Vec<T>> {240// We lose information if the data is sliced.241if self.is_sliced() {242return Either::Left(self);243}244match self.storage.try_into_vec() {245Ok(v) => Either::Right(v),246Err(slf) => {247self.storage = slf;248Either::Left(self)249},250}251}252253/// Returns a mutable reference to its slice, if possible.254///255/// This operation returns [`Some`] iff this [`Buffer`]:256/// * has no alive clones257/// * has not been imported from the C data interface (FFI)258#[inline]259pub fn get_mut_slice(&mut self) -> Option<&mut [T]> {260let offset = self.offset();261let slice = self.storage.try_as_mut_slice()?;262Some(unsafe { slice.get_unchecked_mut(offset..offset + self.length) })263}264265/// Since this takes a shared reference to self, beware that others might266/// increment this after you've checked it's equal to 1.267pub fn storage_refcount(&self) -> u64 {268self.storage.refcount()269}270}271272impl<T: Pod> Buffer<T> {273pub fn try_transmute<U: Pod>(mut self) -> Result<Buffer<U>, Self> {274assert_ne!(size_of::<U>(), 0);275let ptr = self.ptr as *const U;276let length = self.length;277match self.storage.try_transmute() {278Err(v) => {279self.storage = v;280Err(self)281},282Ok(storage) => Ok(Buffer {283storage,284ptr,285length: length.checked_mul(size_of::<T>()).expect("overflow") / size_of::<U>(),286}),287}288}289}290291impl<T: Clone> Buffer<T> {292pub fn make_mut(self) -> Vec<T> {293match self.into_mut() {294Either::Right(v) => v,295Either::Left(same) => same.as_slice().to_vec(),296}297}298}299300impl<T: Zeroable + Copy> Buffer<T> {301pub fn zeroed(len: usize) -> Self {302vec![T::zeroed(); len].into()303}304}305306impl<T> From<Vec<T>> for Buffer<T> {307#[inline]308fn from(v: Vec<T>) -> Self {309Self::from_storage(SharedStorage::from_vec(v))310}311}312313impl<T> Deref for Buffer<T> {314type Target = [T];315316#[inline(always)]317fn deref(&self) -> &[T] {318self.as_slice()319}320}321322impl<T> AsRef<[T]> for Buffer<T> {323#[inline(always)]324fn as_ref(&self) -> &[T] {325self.as_slice()326}327}328329impl<T> FromIterator<T> for Buffer<T> {330#[inline]331fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {332Vec::from_iter(iter).into()333}334}335336impl<T: Copy> IntoIterator for Buffer<T> {337type Item = T;338339type IntoIter = IntoIter<T>;340341fn into_iter(self) -> Self::IntoIter {342IntoIter::new(self)343}344}345346unsafe impl<'a, T: 'a> ArrayAccessor<'a> for Buffer<T> {347type Item = &'a T;348349unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {350unsafe { &*self.ptr.add(index) }351}352353fn len(&self) -> usize {354Buffer::len(self)355}356}357358impl<T> Splitable for Buffer<T> {359#[inline(always)]360fn check_bound(&self, offset: usize) -> bool {361offset <= self.len()362}363364unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {365let storage = &self.storage;366367(368Self {369storage: storage.clone(),370ptr: self.ptr,371length: offset,372},373Self {374storage: storage.clone(),375ptr: self.ptr.wrapping_add(offset),376length: self.length - offset,377},378)379}380}381382383