#![allow(unsafe_op_in_unsafe_fn)]
use std::hint::unreachable_unchecked;
use std::ops::Deref;
use polars_error::{PolarsError, PolarsResult, polars_bail, polars_err};
use crate::array::Splitable;
use crate::buffer::Buffer;
pub use crate::types::Offset;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Offsets<O: Offset>(Vec<O>);
impl<O: Offset> Default for Offsets<O> {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl<O: Offset> Deref for Offsets<O> {
type Target = [O];
fn deref(&self) -> &Self::Target {
self.as_slice()
}
}
impl<O: Offset> TryFrom<Vec<O>> for Offsets<O> {
type Error = PolarsError;
#[inline]
fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
try_check_offsets(&offsets)?;
Ok(Self(offsets))
}
}
impl<O: Offset> TryFrom<Buffer<O>> for OffsetsBuffer<O> {
type Error = PolarsError;
#[inline]
fn try_from(offsets: Buffer<O>) -> Result<Self, Self::Error> {
try_check_offsets(&offsets)?;
Ok(Self(offsets))
}
}
impl<O: Offset> TryFrom<Vec<O>> for OffsetsBuffer<O> {
type Error = PolarsError;
#[inline]
fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
try_check_offsets(&offsets)?;
Ok(Self(offsets.into()))
}
}
impl<O: Offset> From<Offsets<O>> for OffsetsBuffer<O> {
#[inline]
fn from(offsets: Offsets<O>) -> Self {
Self(offsets.0.into())
}
}
impl<O: Offset> Offsets<O> {
#[inline]
pub fn new() -> Self {
Self(vec![O::zero()])
}
#[inline]
pub fn new_zeroed(length: usize) -> Self {
Self(vec![O::zero(); length + 1])
}
#[inline]
pub fn try_from_iter<I: IntoIterator<Item = usize>>(iter: I) -> PolarsResult<Self> {
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let mut offsets = Self::with_capacity(lower);
for item in iterator {
offsets.try_push(item)?
}
Ok(offsets)
}
pub fn with_capacity(capacity: usize) -> Self {
let mut offsets = Vec::with_capacity(capacity + 1);
offsets.push(O::zero());
Self(offsets)
}
pub fn capacity(&self) -> usize {
self.0.capacity() - 1
}
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional);
}
pub fn shrink_to_fit(&mut self) {
self.0.shrink_to_fit();
}
#[inline]
pub fn try_push(&mut self, length: usize) -> PolarsResult<()> {
if O::IS_LARGE {
let length = O::from_as_usize(length);
let old_length = self.last();
let new_length = *old_length + length;
self.0.push(new_length);
Ok(())
} else {
let length =
O::from_usize(length).ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
let old_length = self.last();
let new_length = old_length
.checked_add(&length)
.ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
self.0.push(new_length);
Ok(())
}
}
#[inline]
pub unsafe fn new_unchecked(offsets: Vec<O>) -> Self {
#[cfg(debug_assertions)]
{
let mut prev_offset = O::default();
let mut is_monotonely_increasing = true;
for offset in &offsets {
is_monotonely_increasing &= *offset >= prev_offset;
prev_offset = *offset;
}
assert!(
is_monotonely_increasing,
"Unsafe precondition violated. Invariant of offsets broken."
);
}
Self(offsets)
}
#[inline]
pub fn last(&self) -> &O {
match self.0.last() {
Some(element) => element,
None => unsafe { unreachable_unchecked() },
}
}
#[inline]
pub fn length_at(&self, index: usize) -> usize {
let (start, end) = self.start_end(index);
end - start
}
#[inline]
pub fn start_end(&self, index: usize) -> (usize, usize) {
assert!(index < self.len_proxy());
unsafe { self.start_end_unchecked(index) }
}
#[inline]
pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
let start = self.0.get_unchecked(index).to_usize();
let end = self.0.get_unchecked(index + 1).to_usize();
(start, end)
}
#[inline]
pub fn len_proxy(&self) -> usize {
self.0.len() - 1
}
#[inline]
pub fn as_slice(&self) -> &[O] {
self.0.as_slice()
}
#[inline]
pub fn pop(&mut self) -> Option<O> {
if self.len_proxy() == 0 {
None
} else {
self.0.pop()
}
}
#[inline]
pub fn extend_constant(&mut self, additional: usize) {
let offset = *self.last();
if additional == 1 {
self.0.push(offset)
} else {
self.0.resize(self.0.len() + additional, offset)
}
}
#[inline]
pub fn try_from_lengths<I: Iterator<Item = usize>>(lengths: I) -> PolarsResult<Self> {
let mut self_ = Self::with_capacity(lengths.size_hint().0);
self_.try_extend_from_lengths(lengths)?;
Ok(self_)
}
#[inline]
pub fn try_extend_from_lengths<I: Iterator<Item = usize>>(
&mut self,
lengths: I,
) -> PolarsResult<()> {
let mut total_length = 0;
let mut offset = *self.last();
let original_offset = offset.to_usize();
let lengths = lengths.map(|length| {
total_length += length;
O::from_as_usize(length)
});
let offsets = lengths.map(|length| {
offset += length;
offset
});
self.0.extend(offsets);
let last_offset = original_offset
.checked_add(total_length)
.ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
O::from_usize(last_offset).ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
Ok(())
}
pub fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
let mut length = *self.last();
let other_length = *other.last();
length
.checked_add(&other_length)
.ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
let lengths = other.as_slice().windows(2).map(|w| w[1] - w[0]);
let offsets = lengths.map(|new_length| {
length += new_length;
length
});
self.0.extend(offsets);
Ok(())
}
pub fn try_extend_from_slice(
&mut self,
other: &OffsetsBuffer<O>,
start: usize,
length: usize,
) -> PolarsResult<()> {
if length == 0 {
return Ok(());
}
let other = &other.0[start..start + length + 1];
let other_length = other.last().expect("Length to be non-zero");
let mut length = *self.last();
length
.checked_add(other_length)
.ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
let lengths = other.windows(2).map(|w| w[1] - w[0]);
let offsets = lengths.map(|new_length| {
length += new_length;
length
});
self.0.extend(offsets);
Ok(())
}
#[inline]
pub fn into_inner(self) -> Vec<O> {
self.0
}
}
fn try_check_offsets<O: Offset>(offsets: &[O]) -> PolarsResult<()> {
match offsets.first() {
None => polars_bail!(ComputeError: "offsets must have at least one element"),
Some(first) => {
if *first < O::zero() {
polars_bail!(ComputeError: "offsets must be larger than 0")
}
let mut previous = *first;
let mut any_invalid = false;
for offset in offsets {
if previous > *offset {
any_invalid = true
}
previous = *offset;
}
if any_invalid {
polars_bail!(ComputeError: "offsets must be monotonically increasing")
} else {
Ok(())
}
},
}
}
#[derive(Clone, PartialEq, Debug)]
pub struct OffsetsBuffer<O: Offset>(Buffer<O>);
impl<O: Offset> Default for OffsetsBuffer<O> {
#[inline]
fn default() -> Self {
Self(vec![O::zero()].into())
}
}
impl<O: Offset> OffsetsBuffer<O> {
#[inline]
pub unsafe fn new_unchecked(offsets: Buffer<O>) -> Self {
Self(offsets)
}
#[inline]
pub fn new() -> Self {
Self(vec![O::zero()].into())
}
#[inline]
pub fn one_with_length(length: O) -> Self {
Self(vec![O::zero(), length].into())
}
#[inline]
pub fn into_mut(self) -> either::Either<Self, Offsets<O>> {
self.0
.into_mut()
.map_right(|offsets| unsafe { Offsets::new_unchecked(offsets) })
.map_left(Self)
}
#[inline]
pub fn buffer(&self) -> &Buffer<O> {
&self.0
}
#[inline]
pub fn len_proxy(&self) -> usize {
self.0.len() - 1
}
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}
#[inline]
pub fn as_slice(&self) -> &[O] {
self.0.as_slice()
}
#[inline]
pub fn range(&self) -> O {
*self.last() - *self.first()
}
#[inline]
pub fn first(&self) -> &O {
match self.0.first() {
Some(element) => element,
None => unsafe { unreachable_unchecked() },
}
}
#[inline]
pub fn last(&self) -> &O {
match self.0.last() {
Some(element) => element,
None => unsafe { unreachable_unchecked() },
}
}
#[inline]
pub fn length_at(&self, index: usize) -> usize {
let (start, end) = self.start_end(index);
end - start
}
#[inline]
pub fn start_end(&self, index: usize) -> (usize, usize) {
assert!(index < self.len_proxy());
unsafe { self.start_end_unchecked(index) }
}
#[inline]
pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
let start = self.0.get_unchecked(index).to_usize();
let end = self.0.get_unchecked(index + 1).to_usize();
(start, end)
}
#[inline]
pub fn slice(&mut self, offset: usize, length: usize) {
assert!(length > 0);
self.0.slice(offset, length);
}
#[inline]
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
self.0.slice_unchecked(offset, length);
}
#[inline]
pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
}
#[inline]
pub fn offset_and_length_iter(&self) -> impl ExactSizeIterator<Item = (usize, usize)> + '_ {
self.windows(2).map(|x| {
let [l, r] = x else { unreachable!() };
let l = l.to_usize();
let r = r.to_usize();
(l, r - l)
})
}
pub fn leaf_ranges_iter(
offsets: &[Self],
) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
let others = &offsets[1..];
offsets[0].windows(2).map(move |x| {
let [l, r] = x else { unreachable!() };
let mut l = l.to_usize();
let mut r = r.to_usize();
for o in others {
let slc = o.as_slice();
l = slc[l].to_usize();
r = slc[r].to_usize();
}
l..r
})
}
pub fn leaf_full_start_end(offsets: &[Self]) -> core::ops::Range<usize> {
let mut l = offsets[0].first().to_usize();
let mut r = offsets[0].last().to_usize();
for o in &offsets[1..] {
let slc = o.as_slice();
l = slc[l].to_usize();
r = slc[r].to_usize();
}
l..r
}
#[inline]
pub fn into_inner(self) -> Buffer<O> {
self.0
}
#[inline]
pub fn delta(&self, start: usize, end: usize) -> usize {
assert!(start <= end);
(self.0[end + 1] - self.0[start]).to_usize()
}
}
impl From<&OffsetsBuffer<i32>> for OffsetsBuffer<i64> {
fn from(offsets: &OffsetsBuffer<i32>) -> Self {
Self(
offsets
.buffer()
.iter()
.map(|x| *x as i64)
.collect::<Vec<_>>()
.into(),
)
}
}
impl TryFrom<&OffsetsBuffer<i64>> for OffsetsBuffer<i32> {
type Error = PolarsError;
fn try_from(offsets: &OffsetsBuffer<i64>) -> Result<Self, Self::Error> {
i32::try_from(*offsets.last()).map_err(|_| polars_err!(ComputeError: "overflow"))?;
Ok(Self(
offsets
.buffer()
.iter()
.map(|x| *x as i32)
.collect::<Vec<_>>()
.into(),
))
}
}
impl From<Offsets<i32>> for Offsets<i64> {
fn from(offsets: Offsets<i32>) -> Self {
Self(
offsets
.as_slice()
.iter()
.map(|x| *x as i64)
.collect::<Vec<_>>(),
)
}
}
impl TryFrom<Offsets<i64>> for Offsets<i32> {
type Error = PolarsError;
fn try_from(offsets: Offsets<i64>) -> Result<Self, Self::Error> {
i32::try_from(*offsets.last()).map_err(|_| polars_err!(ComputeError: "overflow"))?;
Ok(Self(
offsets
.as_slice()
.iter()
.map(|x| *x as i32)
.collect::<Vec<_>>(),
))
}
}
impl<O: Offset> std::ops::Deref for OffsetsBuffer<O> {
type Target = [O];
#[inline]
fn deref(&self) -> &[O] {
self.0.as_slice()
}
}
impl<O: Offset> Splitable for OffsetsBuffer<O> {
fn check_bound(&self, offset: usize) -> bool {
offset <= self.len_proxy()
}
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
let mut lhs = self.0.clone();
let mut rhs = self.0.clone();
lhs.slice(0, offset + 1);
rhs.slice(offset, self.0.len() - offset);
(Self(lhs), Self(rhs))
}
}