Path: blob/main/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs
7887 views
macro_rules! seq_macro {1($i:ident in 1..15 $block:block) => {2seq_macro!($i in [31, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,4] $block)5};6($i:ident in 0..16 $block:block) => {7seq_macro!($i in [80, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,9] $block)10};11($i:ident in 0..=16 $block:block) => {12seq_macro!($i in [130, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,1416,15] $block)16};17($i:ident in 1..31 $block:block) => {18seq_macro!($i in [191, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,2016, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,21] $block)22};23($i:ident in 0..32 $block:block) => {24seq_macro!($i in [250, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,2616, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,27] $block)28};29($i:ident in 0..=32 $block:block) => {30seq_macro!($i in [310, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,3216, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,3332,34] $block)35};36($i:ident in 1..63 $block:block) => {37seq_macro!($i in [381, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,3916, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,4032, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,4148, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,42] $block)43};44($i:ident in 0..64 $block:block) => {45seq_macro!($i in [460, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,4716, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,4832, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,4948, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,50] $block)51};52($i:ident in 0..=64 $block:block) => {53seq_macro!($i in [540, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,5516, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,5632, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,5748, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,5864,59] $block)60};61($i:ident in [$($value:literal),+ $(,)?] $block:block) => {62$({63#[allow(non_upper_case_globals)]64const $i: usize = $value;65{ $block }66})+67};68}6970mod decode;71mod encode;72mod pack;73mod unpack;7475pub use decode::{ChunkedDecoder, Decoder};76pub use encode::{encode, encode_pack};7778/// A byte slice (e.g. `[u8; 8]`) denoting types that represent complete packs.79pub trait Packed:80Copy81+ Sized82+ AsRef<[u8]>83+ AsMut<[u8]>84+ std::ops::IndexMut<usize, Output = u8>85+ for<'a> TryFrom<&'a [u8]>86{87const LENGTH: usize;88fn zero() -> Self;89}9091impl Packed for [u8; 8] {92const LENGTH: usize = 8;93#[inline]94fn zero() -> Self {95[0; 8]96}97}9899impl Packed for [u8; 16 * 2] {100const LENGTH: usize = 16 * 2;101#[inline]102fn zero() -> Self {103[0; 16 * 2]104}105}106107impl Packed for [u8; 32 * 4] {108const LENGTH: usize = 32 * 4;109#[inline]110fn zero() -> Self {111[0; 32 * 4]112}113}114115impl Packed for [u8; 64 * 8] {116const LENGTH: usize = 64 * 8;117#[inline]118fn zero() -> Self {119[0; 64 * 8]120}121}122123/// A byte slice of [`Unpackable`] denoting complete unpacked arrays.124pub trait Unpacked<T>:125Copy126+ Sized127+ AsRef<[T]>128+ AsMut<[T]>129+ std::ops::Index<usize, Output = T>130+ std::ops::IndexMut<usize, Output = T>131+ for<'a> TryFrom<&'a [T], Error = std::array::TryFromSliceError>132{133const LENGTH: usize;134fn zero() -> Self;135}136137impl Unpacked<u8> for [u8; 8] {138const LENGTH: usize = 8;139#[inline]140fn zero() -> Self {141[0; 8]142}143}144145impl Unpacked<u16> for [u16; 16] {146const LENGTH: usize = 16;147#[inline]148fn zero() -> Self {149[0; 16]150}151}152153impl Unpacked<u32> for [u32; 32] {154const LENGTH: usize = 32;155#[inline]156fn zero() -> Self {157[0; 32]158}159}160161impl Unpacked<u64> for [u64; 64] {162const LENGTH: usize = 64;163#[inline]164fn zero() -> Self {165[0; 64]166}167}168169/// A type representing a type that can be bitpacked and unpacked by this crate.170pub trait Unpackable: Copy + Sized + Default {171type Packed: Packed;172type Unpacked: Unpacked<Self>;173174fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked);175fn pack(unpacked: &Self::Unpacked, num_bits: usize, packed: &mut [u8]);176}177178impl Unpackable for u16 {179type Packed = [u8; 16 * 2];180type Unpacked = [u16; 16];181182#[inline]183fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {184unpack::unpack16(packed, unpacked, num_bits)185}186187#[inline]188fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {189pack::pack16(packed, unpacked, num_bits)190}191}192193impl Unpackable for u32 {194type Packed = [u8; 32 * 4];195type Unpacked = [u32; 32];196197#[inline]198fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {199unpack::unpack32(packed, unpacked, num_bits)200}201202#[inline]203fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {204pack::pack32(packed, unpacked, num_bits)205}206}207208impl Unpackable for u64 {209type Packed = [u8; 64 * 8];210type Unpacked = [u64; 64];211212#[inline]213fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {214unpack::unpack64(packed, unpacked, num_bits)215}216217#[inline]218fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {219pack::pack64(packed, unpacked, num_bits)220}221}222223#[cfg(test)]224mod tests {225use super::*;226227pub fn case1() -> (usize, Vec<u32>, Vec<u8>) {228let num_bits = 3;229let compressed = vec![2300b10001000u8,2310b11000110,2320b11111010,2330b10001000u8,2340b11000110,2350b11111010,2360b10001000u8,2370b11000110,2380b11111010,2390b10001000u8,2400b11000110,2410b11111010,2420b10001000u8,2430b11000110,2440b11111010,245];246let decompressed = vec![2470, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4,2485, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,249];250(num_bits, decompressed, compressed)251}252253#[test]254fn encode_large() {255let (num_bits, unpacked, expected) = case1();256let mut packed = vec![0u8; 4 * 32];257258encode(&unpacked, num_bits, &mut packed);259assert_eq!(&packed[..15], expected);260}261262#[test]263fn test_encode() {264let num_bits = 3;265let unpacked = vec![0, 1, 2, 3, 4, 5, 6, 7];266267let mut packed = vec![0u8; 4 * 32];268269encode::<u32>(&unpacked, num_bits, &mut packed);270271let expected = vec![0b10001000u8, 0b11000110, 0b11111010];272273assert_eq!(&packed[..3], expected);274}275}276277278