Path: blob/main/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs
7887 views
//! This module implements the `DELTA_BINARY_PACKED` encoding.1//!2//! For performance reasons this is done without iterators. Instead, we have `gather_n` functions3//! and a `DeltaGatherer` trait. These allow efficient decoding and mapping of the decoded values.4//!5//! Full information on the delta encoding can be found on the Apache Parquet Format repository.6//!7//! <https://github.com/apache/parquet-format/blob/e517ac4dbe08d518eb5c2e58576d4c711973db94/Encodings.md#delta-encoding-delta_binary_packed--5>8//!9//! Delta encoding compresses sequential integer values by encoding the first value and the10//! differences between consequentive values. This variant encodes the data into `Block`s and11//! `MiniBlock`s.12//!13//! - A `Block` contains a minimum delta, bitwidths and one or more miniblocks.14//! - A `MiniBlock` contains many deltas that are encoded in [`bitpacked`] encoding.15//!16//! The decoder keeps track of the last value and calculates a new value with the following17//! function.18//!19//! ```text20//! NextValue(Delta) = {21//! Value = Decoder.LastValue + Delta + Block.MinDelta22//! Decoder.LastValue = Value23//! return Value24//! }25//! ```26//!27//! Note that all these additions need to be wrapping.2829use super::super::{bitpacked, uleb128, zigzag_leb128};30use super::lin_natural_sum;31use crate::parquet::encoding::bitpacked::{Unpackable, Unpacked};32use crate::parquet::error::{ParquetError, ParquetResult};3334const MAX_BITWIDTH: u8 = 64;3536/// Decoder of parquets' `DELTA_BINARY_PACKED`.37#[derive(Debug)]38pub struct Decoder<'a> {39num_miniblocks_per_block: usize,40values_per_block: usize,4142values_remaining: usize,4344last_value: i64,4546values: &'a [u8],4748block: Block<'a>,49}5051#[derive(Debug)]52struct Block<'a> {53min_delta: i64,5455/// Bytes that give the `num_bits` for the [`bitpacked::Decoder`].56///57/// Invariant: `bitwidth[i] <= MAX_BITWIDTH` for all `i`58bitwidths: &'a [u8],59values_remaining: usize,60miniblock: MiniBlock<'a>,61}6263#[derive(Debug)]64struct MiniBlock<'a> {65decoder: bitpacked::Decoder<'a, u64>,66buffered: <u64 as Unpackable>::Unpacked,67unpacked_start: usize,68unpacked_end: usize,69}7071pub(crate) struct SumGatherer(pub(crate) usize);7273pub trait DeltaGatherer {74type Target: std::fmt::Debug;7576fn target_len(&self, target: &Self::Target) -> usize;77fn target_reserve(&self, target: &mut Self::Target, n: usize);7879/// Gather one element with value `v` into `target`.80fn gather_one(&mut self, target: &mut Self::Target, v: i64) -> ParquetResult<()>;8182/// Gather `num_repeats` elements into `target`.83///84/// The first value is `v` and the `n`-th value is `v + (n-1)*delta`.85fn gather_constant(86&mut self,87target: &mut Self::Target,88v: i64,89delta: i64,90num_repeats: usize,91) -> ParquetResult<()> {92for i in 0..num_repeats {93self.gather_one(target, v + (i as i64) * delta)?;94}95Ok(())96}97/// Gather a `slice` of elements into `target`.98fn gather_slice(&mut self, target: &mut Self::Target, slice: &[i64]) -> ParquetResult<()> {99for &v in slice {100self.gather_one(target, v)?;101}102Ok(())103}104/// Gather a `chunk` of elements into `target`.105fn gather_chunk(&mut self, target: &mut Self::Target, chunk: &[i64; 64]) -> ParquetResult<()> {106self.gather_slice(target, chunk)107}108}109110impl DeltaGatherer for SumGatherer {111type Target = usize;112113fn target_len(&self, _target: &Self::Target) -> usize {114self.0115}116fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {}117118fn gather_one(&mut self, target: &mut Self::Target, v: i64) -> ParquetResult<()> {119if v < 0 {120return Err(ParquetError::oos(format!(121"Invalid delta encoding length {v}"122)));123}124125*target += v as usize;126self.0 += 1;127Ok(())128}129fn gather_constant(130&mut self,131target: &mut Self::Target,132v: i64,133delta: i64,134num_repeats: usize,135) -> ParquetResult<()> {136if v < 0 || (delta < 0 && num_repeats > 0 && (num_repeats - 1) as i64 * delta + v < 0) {137return Err(ParquetError::oos("Invalid delta encoding length"));138}139140*target += lin_natural_sum(v, delta, num_repeats) as usize;141142Ok(())143}144fn gather_slice(&mut self, target: &mut Self::Target, slice: &[i64]) -> ParquetResult<()> {145let min = slice.iter().copied().min().unwrap_or_default();146if min < 0 {147return Err(ParquetError::oos(format!(148"Invalid delta encoding length {min}"149)));150}151152*target += slice.iter().copied().map(|v| v as usize).sum::<usize>();153self.0 += slice.len();154Ok(())155}156fn gather_chunk(&mut self, target: &mut Self::Target, chunk: &[i64; 64]) -> ParquetResult<()> {157let min = chunk.iter().copied().min().unwrap_or_default();158if min < 0 {159return Err(ParquetError::oos(format!(160"Invalid delta encoding length {min}"161)));162}163*target += chunk.iter().copied().map(|v| v as usize).sum::<usize>();164self.0 += chunk.len();165Ok(())166}167}168169/// Gather the rest of the [`bitpacked::Decoder`] into `target`170fn gather_bitpacked<G: DeltaGatherer>(171target: &mut G::Target,172min_delta: i64,173last_value: &mut i64,174mut decoder: bitpacked::Decoder<u64>,175gatherer: &mut G,176) -> ParquetResult<()> {177let mut chunked = decoder.chunked();178for mut chunk in chunked.by_ref() {179for value in &mut chunk {180*last_value = last_value181.wrapping_add(*value as i64)182.wrapping_add(min_delta);183*value = *last_value as u64;184}185186let chunk = bytemuck::cast_ref(&chunk);187gatherer.gather_chunk(target, chunk)?;188}189190if let Some((mut chunk, length)) = chunked.remainder() {191let slice = &mut chunk[..length];192193for value in slice.iter_mut() {194*last_value = last_value195.wrapping_add(*value as i64)196.wrapping_add(min_delta);197*value = *last_value as u64;198}199200let slice = bytemuck::cast_slice(slice);201gatherer.gather_slice(target, slice)?;202}203204Ok(())205}206207/// Gather an entire [`MiniBlock`] into `target`208fn gather_miniblock<G: DeltaGatherer>(209target: &mut G::Target,210min_delta: i64,211bitwidth: u8,212values: &[u8],213values_per_miniblock: usize,214last_value: &mut i64,215gatherer: &mut G,216) -> ParquetResult<()> {217let bitwidth = bitwidth as usize;218219if bitwidth == 0 {220let v = last_value.wrapping_add(min_delta);221gatherer.gather_constant(target, v, min_delta, values_per_miniblock)?;222*last_value = last_value.wrapping_add(min_delta * values_per_miniblock as i64);223return Ok(());224}225226debug_assert!(bitwidth <= 64);227debug_assert_eq!((bitwidth * values_per_miniblock).div_ceil(8), values.len());228229let start_length = gatherer.target_len(target);230gather_bitpacked(231target,232min_delta,233last_value,234bitpacked::Decoder::new(values, bitwidth, values_per_miniblock),235gatherer,236)?;237let target_length = gatherer.target_len(target);238239debug_assert_eq!(target_length - start_length, values_per_miniblock);240241Ok(())242}243244/// Gather an entire [`Block`] into `target`245fn gather_block<'a, G: DeltaGatherer>(246target: &mut G::Target,247num_miniblocks: usize,248values_per_miniblock: usize,249mut values: &'a [u8],250last_value: &mut i64,251gatherer: &mut G,252) -> ParquetResult<&'a [u8]> {253let (min_delta, consumed) = zigzag_leb128::decode(values);254values = &values[consumed..];255let bitwidths;256(bitwidths, values) = values257.split_at_checked(num_miniblocks)258.ok_or_else(|| ParquetError::oos("Not enough bitwidths available in delta encoding"))?;259260gatherer.target_reserve(target, num_miniblocks * values_per_miniblock);261for &bitwidth in bitwidths {262let miniblock;263(miniblock, values) = values264.split_at_checked((bitwidth as usize * values_per_miniblock).div_ceil(8))265.ok_or_else(|| ParquetError::oos("Not enough bytes for miniblock in delta encoding"))?;266gather_miniblock(267target,268min_delta,269bitwidth,270miniblock,271values_per_miniblock,272last_value,273gatherer,274)?;275}276277Ok(values)278}279280impl<'a> Decoder<'a> {281pub fn try_new(mut values: &'a [u8]) -> ParquetResult<(Self, &'a [u8])> {282let header_err = || ParquetError::oos("Insufficient bytes for Delta encoding header");283284// header:285// <block size in values> <number of miniblocks in a block> <total value count> <first value>286287let (values_per_block, consumed) = uleb128::decode(values);288let values_per_block = values_per_block as usize;289values = values.get(consumed..).ok_or_else(header_err)?;290291assert_eq!(values_per_block % 128, 0);292293let (num_miniblocks_per_block, consumed) = uleb128::decode(values);294let num_miniblocks_per_block = num_miniblocks_per_block as usize;295values = values.get(consumed..).ok_or_else(header_err)?;296297let (total_count, consumed) = uleb128::decode(values);298let total_count = total_count as usize;299values = values.get(consumed..).ok_or_else(header_err)?;300301let (first_value, consumed) = zigzag_leb128::decode(values);302values = values.get(consumed..).ok_or_else(header_err)?;303304assert_eq!(values_per_block % num_miniblocks_per_block, 0);305assert_eq!((values_per_block / num_miniblocks_per_block) % 32, 0);306307let values_per_miniblock = values_per_block / num_miniblocks_per_block;308assert_eq!(values_per_miniblock % 8, 0);309310// We skip over all the values to determine where the slice stops.311//312// This also has the added benefit of error checking in advance, thus we can unwrap in313// other places.314315let mut rem = values;316if total_count > 1 {317let mut num_values_left = total_count - 1;318while num_values_left > 0 {319// If the number of values is does not need all the miniblocks anymore, we need to320// ignore the later miniblocks and regard them as having bitwidth = 0.321//322// Quoted from the specification:323//324// > If, in the last block, less than <number of miniblocks in a block> miniblocks325// > are needed to store the values, the bytes storing the bit widths of the326// > unneeded miniblocks are still present, their value should be zero, but readers327// > must accept arbitrary values as well. There are no additional padding bytes for328// > the miniblock bodies though, as if their bit widths were 0 (regardless of the329// > actual byte values). The reader knows when to stop reading by keeping track of330// > the number of values read.331let num_remaining_mini_blocks = usize::min(332num_miniblocks_per_block,333num_values_left.div_ceil(values_per_miniblock),334);335336// block:337// <min delta> <list of bitwidths of miniblocks> <miniblocks>338339let (_, consumed) = zigzag_leb128::decode(rem);340rem = rem.get(consumed..).ok_or_else(|| {341ParquetError::oos("No min-delta value in delta encoding miniblock")342})?;343344if rem.len() < num_miniblocks_per_block {345return Err(ParquetError::oos(346"Not enough bitwidths available in delta encoding",347));348}349if let Some(err_bitwidth) = rem350.get(..num_remaining_mini_blocks)351.expect("num_remaining_mini_blocks <= num_miniblocks_per_block")352.iter()353.copied()354.find(|&bitwidth| bitwidth > MAX_BITWIDTH)355{356return Err(ParquetError::oos(format!(357"Delta encoding miniblock with bitwidth {err_bitwidth} higher than maximum {MAX_BITWIDTH} bits",358)));359}360361let num_bitpacking_bytes = rem[..num_remaining_mini_blocks]362.iter()363.copied()364.map(|bitwidth| (bitwidth as usize * values_per_miniblock).div_ceil(8))365.sum::<usize>();366367rem = rem368.get(num_miniblocks_per_block + num_bitpacking_bytes..)369.ok_or_else(|| {370ParquetError::oos(371"Not enough bytes for all bitpacked values in delta encoding",372)373})?;374375num_values_left = num_values_left.saturating_sub(values_per_block);376}377}378379let values = &values[..values.len() - rem.len()];380381let decoder = Self {382num_miniblocks_per_block,383values_per_block,384values_remaining: total_count.saturating_sub(1),385last_value: first_value,386values,387388block: Block {389// @NOTE:390// We add one delta=0 into the buffered block which allows us to391// prepend like the `first_value` is just any normal value.392//393// This is a bit of a hack, but makes the rest of the logic394// **A LOT** simpler.395values_remaining: usize::from(total_count > 0),396min_delta: 0,397bitwidths: &[],398miniblock: MiniBlock {399decoder: bitpacked::Decoder::try_new_allow_zero(&[], 0, 1)?,400buffered: <u64 as Unpackable>::Unpacked::zero(),401unpacked_start: 0,402unpacked_end: 0,403},404},405};406407Ok((decoder, rem))408}409410/// Consume a new [`Block`] from `self.values`.411fn consume_block(&mut self) {412// @NOTE: All the panics here should be prevented in the `Decoder::try_new`.413414debug_assert!(!self.values.is_empty());415416let values_per_miniblock = self.values_per_miniblock();417418let length = usize::min(self.values_remaining, self.values_per_block);419let actual_num_miniblocks = usize::min(420self.num_miniblocks_per_block,421length.div_ceil(values_per_miniblock),422);423424debug_assert!(actual_num_miniblocks > 0);425426// <min delta> <list of bitwidths of miniblocks> <miniblocks>427428let (min_delta, consumed) = zigzag_leb128::decode(self.values);429430self.values = &self.values[consumed..];431let (bitwidths, remainder) = self.values.split_at(self.num_miniblocks_per_block);432433let first_bitwidth = bitwidths[0];434let bitwidths = &bitwidths[1..actual_num_miniblocks];435debug_assert!(first_bitwidth <= MAX_BITWIDTH);436let first_bitwidth = first_bitwidth as usize;437438let values_in_first_miniblock = usize::min(length, values_per_miniblock);439let num_allocated_bytes = (first_bitwidth * values_per_miniblock).div_ceil(8);440let num_actual_bytes = (first_bitwidth * values_in_first_miniblock).div_ceil(8);441let (bytes, remainder) = remainder.split_at(num_allocated_bytes);442let bytes = &bytes[..num_actual_bytes];443444let decoder =445bitpacked::Decoder::new_allow_zero(bytes, first_bitwidth, values_in_first_miniblock);446447self.block = Block {448min_delta,449bitwidths,450values_remaining: length,451miniblock: MiniBlock {452decoder,453// We can leave this as it should not be read before it is updated454buffered: self.block.miniblock.buffered,455unpacked_start: 0,456unpacked_end: 0,457},458};459460self.values_remaining -= length;461self.values = remainder;462}463464/// Gather `n` elements from the current [`MiniBlock`] to `target`465fn gather_miniblock_n_into<G: DeltaGatherer>(466&mut self,467target: &mut G::Target,468mut n: usize,469gatherer: &mut G,470) -> ParquetResult<()> {471debug_assert!(n > 0);472debug_assert!(self.miniblock_len() >= n);473474// If the `num_bits == 0`, the delta is constant and equal to `min_delta`. The475// `bitpacked::Decoder` basically only keeps track of the length.476if self.block.miniblock.decoder.num_bits() == 0 {477let num_repeats = usize::min(self.miniblock_len(), n);478let v = self.last_value.wrapping_add(self.block.min_delta);479gatherer.gather_constant(target, v, self.block.min_delta, num_repeats)?;480self.last_value = self481.last_value482.wrapping_add(self.block.min_delta * num_repeats as i64);483self.block.miniblock.decoder.length -= num_repeats;484return Ok(());485}486487if self.block.miniblock.unpacked_start < self.block.miniblock.unpacked_end {488let length = usize::min(489n,490self.block.miniblock.unpacked_end - self.block.miniblock.unpacked_start,491);492self.block.miniblock.buffered493[self.block.miniblock.unpacked_start..self.block.miniblock.unpacked_start + length]494.iter_mut()495.for_each(|v| {496self.last_value = self497.last_value498.wrapping_add(*v as i64)499.wrapping_add(self.block.min_delta);500*v = self.last_value as u64;501});502gatherer.gather_slice(503target,504bytemuck::cast_slice(505&self.block.miniblock.buffered[self.block.miniblock.unpacked_start506..self.block.miniblock.unpacked_start + length],507),508)?;509n -= length;510self.block.miniblock.unpacked_start += length;511}512513if n == 0 {514return Ok(());515}516517const ITEMS_PER_PACK: usize = <<u64 as Unpackable>::Unpacked as Unpacked<u64>>::LENGTH;518for _ in 0..n / ITEMS_PER_PACK {519let mut chunk = self.block.miniblock.decoder.chunked().next().unwrap();520chunk.iter_mut().for_each(|v| {521self.last_value = self522.last_value523.wrapping_add(*v as i64)524.wrapping_add(self.block.min_delta);525*v = self.last_value as u64;526});527gatherer.gather_chunk(target, bytemuck::cast_ref(&chunk))?;528n -= ITEMS_PER_PACK;529}530531if n == 0 {532return Ok(());533}534535let Some((chunk, len)) = self.block.miniblock.decoder.chunked().next_inexact() else {536debug_assert_eq!(n, 0);537self.block.miniblock.buffered = <u64 as Unpackable>::Unpacked::zero();538self.block.miniblock.unpacked_start = 0;539self.block.miniblock.unpacked_end = 0;540return Ok(());541};542543self.block.miniblock.buffered = chunk;544self.block.miniblock.unpacked_start = 0;545self.block.miniblock.unpacked_end = len;546547if n > 0 {548let length = usize::min(n, self.block.miniblock.unpacked_end);549self.block.miniblock.buffered[..length]550.iter_mut()551.for_each(|v| {552self.last_value = self553.last_value554.wrapping_add(*v as i64)555.wrapping_add(self.block.min_delta);556*v = self.last_value as u64;557});558gatherer.gather_slice(559target,560bytemuck::cast_slice(&self.block.miniblock.buffered[..length]),561)?;562self.block.miniblock.unpacked_start = length;563}564565Ok(())566}567568/// Gather `n` elements from the current [`Block`] to `target`569fn gather_block_n_into<G: DeltaGatherer>(570&mut self,571target: &mut G::Target,572n: usize,573gatherer: &mut G,574) -> ParquetResult<()> {575let values_per_miniblock = self.values_per_miniblock();576577debug_assert!(n <= self.values_per_block);578debug_assert!(self.values_per_block >= values_per_miniblock);579debug_assert_eq!(self.values_per_block % values_per_miniblock, 0);580581let mut n = usize::min(self.block.values_remaining, n);582583if n == 0 {584return Ok(());585}586587let miniblock_len = self.miniblock_len();588if n < miniblock_len {589self.gather_miniblock_n_into(target, n, gatherer)?;590debug_assert_eq!(self.miniblock_len(), miniblock_len - n);591self.block.values_remaining -= n;592return Ok(());593}594595if miniblock_len > 0 {596self.gather_miniblock_n_into(target, miniblock_len, gatherer)?;597n -= miniblock_len;598self.block.values_remaining -= miniblock_len;599}600601while n >= values_per_miniblock {602let bitwidth = self.block.bitwidths[0];603self.block.bitwidths = &self.block.bitwidths[1..];604605let miniblock;606(miniblock, self.values) = self607.values608.split_at((bitwidth as usize * values_per_miniblock).div_ceil(8));609gather_miniblock(610target,611self.block.min_delta,612bitwidth,613miniblock,614values_per_miniblock,615&mut self.last_value,616gatherer,617)?;618n -= values_per_miniblock;619self.block.values_remaining -= values_per_miniblock;620}621622if n == 0 {623return Ok(());624}625626if !self.block.bitwidths.is_empty() {627let bitwidth = self.block.bitwidths[0];628self.block.bitwidths = &self.block.bitwidths[1..];629630if bitwidth > MAX_BITWIDTH {631return Err(ParquetError::oos(format!(632"Delta encoding bitwidth '{bitwidth}' is larger than maximum {MAX_BITWIDTH})"633)));634}635636let length = usize::min(values_per_miniblock, self.block.values_remaining);637638let num_allocated_bytes = (bitwidth as usize * values_per_miniblock).div_ceil(8);639let num_actual_bytes = (bitwidth as usize * length).div_ceil(8);640641let miniblock;642(miniblock, self.values) =643self.values644.split_at_checked(num_allocated_bytes)645.ok_or(ParquetError::oos(646"Not enough space for delta encoded miniblock",647))?;648649let miniblock = &miniblock[..num_actual_bytes];650651let decoder =652bitpacked::Decoder::try_new_allow_zero(miniblock, bitwidth as usize, length)?;653self.block.miniblock = MiniBlock {654decoder,655buffered: self.block.miniblock.buffered,656unpacked_start: 0,657unpacked_end: 0,658};659660if n > 0 {661self.gather_miniblock_n_into(target, n, gatherer)?;662self.block.values_remaining -= n;663}664}665666Ok(())667}668669/// Gather `n` elements to `target`670pub fn gather_n_into<G: DeltaGatherer>(671&mut self,672target: &mut G::Target,673mut n: usize,674gatherer: &mut G,675) -> ParquetResult<()> {676n = usize::min(n, self.len());677678if n == 0 {679return Ok(());680}681682let values_per_miniblock = self.values_per_block / self.num_miniblocks_per_block;683684let start_num_values_remaining = self.block.values_remaining;685if n <= self.block.values_remaining {686self.gather_block_n_into(target, n, gatherer)?;687debug_assert_eq!(self.block.values_remaining, start_num_values_remaining - n);688return Ok(());689}690691n -= self.block.values_remaining;692self.gather_block_n_into(target, self.block.values_remaining, gatherer)?;693debug_assert_eq!(self.block.values_remaining, 0);694695while usize::min(n, self.values_remaining) >= self.values_per_block {696self.values = gather_block(697target,698self.num_miniblocks_per_block,699values_per_miniblock,700self.values,701&mut self.last_value,702gatherer,703)?;704n -= self.values_per_block;705self.values_remaining -= self.values_per_block;706}707708if n == 0 {709return Ok(());710}711712self.consume_block();713self.gather_block_n_into(target, n, gatherer)?;714715Ok(())716}717718pub(crate) fn collect_n<E: std::fmt::Debug + Extend<i64>>(719&mut self,720e: &mut E,721n: usize,722) -> ParquetResult<()> {723struct ExtendGatherer<'a, E: std::fmt::Debug + Extend<i64>>(724std::marker::PhantomData<&'a E>,725);726727impl<'a, E: std::fmt::Debug + Extend<i64>> DeltaGatherer for ExtendGatherer<'a, E> {728type Target = (usize, &'a mut E);729730fn target_len(&self, target: &Self::Target) -> usize {731target.0732}733734fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {}735736fn gather_one(&mut self, target: &mut Self::Target, v: i64) -> ParquetResult<()> {737target.1.extend(Some(v));738target.0 += 1;739Ok(())740}741}742743let mut gatherer = ExtendGatherer(std::marker::PhantomData);744let mut target = (0, e);745746self.gather_n_into(&mut target, n, &mut gatherer)747}748749pub(crate) fn collect<E: std::fmt::Debug + Extend<i64> + Default>(750mut self,751) -> ParquetResult<E> {752let mut e = E::default();753self.collect_n(&mut e, self.len())?;754Ok(e)755}756757pub fn len(&self) -> usize {758self.values_remaining + self.block.values_remaining759}760761fn values_per_miniblock(&self) -> usize {762debug_assert_eq!(self.values_per_block % self.num_miniblocks_per_block, 0);763self.values_per_block / self.num_miniblocks_per_block764}765766fn miniblock_len(&self) -> usize {767self.block.miniblock.unpacked_end - self.block.miniblock.unpacked_start768+ self.block.miniblock.decoder.len()769}770}771772#[cfg(test)]773mod tests {774use super::*;775776#[test]777fn single_value() {778// Generated by parquet-rs779//780// header: [128, 1, 4, 1, 2]781// block size: 128, 1782// mini-blocks: 4783// elements: 1784// first_value: 2 <=z> 1785let data = &[128, 1, 4, 1, 2];786787let (decoder, rem) = Decoder::try_new(data).unwrap();788let r = decoder.collect::<Vec<_>>().unwrap();789790assert_eq!(&r[..], &[1]);791assert_eq!(data.len() - rem.len(), 5);792}793794#[test]795fn test_from_spec() {796let expected = (1..=5).collect::<Vec<_>>();797// VALIDATED FROM SPARK==3.1.1798// header: [128, 1, 4, 5, 2]799// block size: 128, 1800// mini-blocks: 4801// elements: 5802// first_value: 2 <=z> 1803// block1: [2, 0, 0, 0, 0]804// min_delta: 2 <=z> 1805// bit_width: 0806let data = &[128, 1, 4, 5, 2, 2, 0, 0, 0, 0];807808let (decoder, rem) = Decoder::try_new(data).unwrap();809let r = decoder.collect::<Vec<_>>().unwrap();810811assert_eq!(expected, r);812813assert_eq!(data.len() - rem.len(), 10);814}815816#[test]817fn case2() {818let expected = vec![1, 2, 3, 4, 5, 1];819// VALIDATED FROM SPARK==3.1.1820// header: [128, 1, 4, 6, 2]821// block size: 128, 1 <=u> 128822// mini-blocks: 4 <=u> 4823// elements: 6 <=u> 6824// first_value: 2 <=z> 1825// block1: [7, 3, 0, 0, 0]826// min_delta: 7 <=z> -4827// bit_widths: [3, 0, 0, 0]828// values: [829// 0b01101101830// 0b00001011831// ...832// ] <=b> [3, 3, 3, 3, 0]833let data = &[834128, 1, 4, 6, 2, 7, 3, 0, 0, 0, 0b01101101, 0b00001011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,835// these should not be consumed8361, 2, 3,837];838839let (decoder, rem) = Decoder::try_new(data).unwrap();840let r = decoder.collect::<Vec<_>>().unwrap();841842assert_eq!(expected, r);843assert_eq!(rem, &[1, 2, 3]);844}845846#[test]847fn multiple_miniblocks() {848#[rustfmt::skip]849let data = &[850// Header: [128, 1, 4, 65, 100]851128, 1, // block size <=u> 1288524, // number of mini-blocks <=u> 485365, // number of elements <=u> 65854100, // first_value <=z> 50855856// Block 1 header: [7, 3, 4, 0, 0]8577, // min_delta <=z> -48583, 4, 255, 0, // bit_widths (255 should not be used as only two miniblocks are needed)859860// 32 3-bit values of 0 for mini-block 1 (12 bytes)8610, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,862863// 32 4-bit values of 8 for mini-block 2 (16 bytes)8640x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,8650x88, 0x88,866867// these should not be consumed8681, 2, 3,869];870871#[rustfmt::skip]872let expected = [873// First value87450,875876// Mini-block 1: 32 deltas of -487746, 42, 38, 34, 30, 26, 22, 18, 14, 10, 6, 2, -2, -6, -10, -14, -18, -22, -26, -30, -34,878-38, -42, -46, -50, -54, -58, -62, -66, -70, -74, -78,879880// Mini-block 2: 32 deltas of 4881-74, -70, -66, -62, -58, -54, -50, -46, -42, -38, -34, -30, -26, -22, -18, -14, -10, -6,882-2, 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50,883];884885let (decoder, rem) = Decoder::try_new(data).unwrap();886let r = decoder.collect::<Vec<_>>().unwrap();887888assert_eq!(&expected[..], &r[..]);889assert_eq!(data.len() - rem.len(), data.len() - 3);890assert_eq!(rem.len(), 3);891}892}893894895