Path: blob/main/crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs
7887 views
#![allow(unsafe_op_in_unsafe_fn)]1/// Macro that generates a packing function taking the number of bits as a const generic2macro_rules! pack_impl {3($t:ty, $bytes:literal, $bits:tt, $bits_minus_one:tt) => {4// Adapted from https://github.com/quickwit-oss/bitpacking5pub unsafe fn pack<const NUM_BITS: usize>(input: &[$t; $bits], output: &mut [u8]) {6if NUM_BITS == 0 {7for out in output {8*out = 0;9}10return;11}12assert!(NUM_BITS <= $bits);13assert!(output.len() >= NUM_BITS * $bytes);1415let input_ptr = input.as_ptr();16let mut output_ptr = output.as_mut_ptr() as *mut $t;17let mut out_register: $t = read_unaligned(input_ptr);1819if $bits == NUM_BITS {20write_unaligned(output_ptr, out_register);21output_ptr = output_ptr.offset(1);22}2324// Using microbenchmark (79d1fff), unrolling this loop is over 10x25// faster than not (>20x faster than old algorithm)26seq_macro!(i in 1..$bits_minus_one {27let bits_filled: usize = i * NUM_BITS;28let inner_cursor: usize = bits_filled % $bits;29let remaining: usize = $bits - inner_cursor;3031let offset_ptr = input_ptr.add(i);32let in_register: $t = read_unaligned(offset_ptr);3334out_register =35if inner_cursor > 0 {36out_register | (in_register << inner_cursor)37} else {38in_register39};4041if remaining <= NUM_BITS {42write_unaligned(output_ptr, out_register);43output_ptr = output_ptr.offset(1);44if 0 < remaining && remaining < NUM_BITS {45out_register = in_register >> remaining46}47}48});4950let in_register: $t = read_unaligned(input_ptr.add($bits - 1));51out_register = if $bits - NUM_BITS > 0 {52out_register | (in_register << ($bits - NUM_BITS))53} else {54in_register55};56write_unaligned(output_ptr, out_register)57}58};59}6061/// Macro that generates pack functions that accept num_bits as a parameter62macro_rules! pack {63($name:ident, $t:ty, $bytes:literal, $bits:tt, $bits_minus_one:tt) => {64mod $name {65use std::ptr::{read_unaligned, write_unaligned};66pack_impl!($t, $bytes, $bits, $bits_minus_one);67}6869/// Pack unpacked `input` into `output` with a bit width of `num_bits`70pub fn $name(input: &[$t; $bits], output: &mut [u8], num_bits: usize) {71// This will get optimised into a jump table72seq_macro!(i in 0..=$bits {73if i == num_bits {74unsafe {75return $name::pack::<i>(input, output);76}77}78});79unreachable!("invalid num_bits {}", num_bits);80}81};82}8384pack!(pack16, u16, 2, 16, 15);85pack!(pack32, u32, 4, 32, 31);86pack!(pack64, u64, 8, 64, 63);8788#[cfg(test)]89mod tests {90use rand::distr::{Distribution, Uniform};9192use super::super::unpack::*;93use super::*;9495#[test]96fn test_u32() {97let input = [980u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0u32, 1, 2, 3, 4, 5, 6, 7, 8,999, 10, 11, 12, 13, 14, 15,100];101for num_bits in 4..32 {102let mut output = [0u8; 32 * 4];103pack32(&input, &mut output, num_bits);104let mut other = [0u32; 32];105unpack32(&output, &mut other, num_bits);106assert_eq!(other, input);107}108}109110#[test]111fn test_u32_random() {112let mut rng = rand::rng();113let mut random_array = [0u32; 32];114let between = Uniform::new(0, 131_072).unwrap();115for num_bits in 17..=32 {116for i in &mut random_array {117*i = between.sample(&mut rng);118}119let mut output = [0u8; 32 * 4];120pack32(&random_array, &mut output, num_bits);121let mut other = [0u32; 32];122unpack32(&output, &mut other, num_bits);123assert_eq!(other, random_array);124}125}126127#[test]128fn test_u64_random() {129let mut rng = rand::rng();130let mut random_array = [0u64; 64];131let between = Uniform::new(0, 131_072).unwrap();132for num_bits in 17..=64 {133for i in &mut random_array {134*i = between.sample(&mut rng);135}136let mut output = [0u8; 64 * 8];137pack64(&random_array, &mut output, num_bits);138let mut other = [0u64; 64];139unpack64(&output, &mut other, num_bits);140assert_eq!(other, random_array);141}142}143}144145146