Path: blob/main/crates/polars-io/src/utils/compression.rs
6939 views
use std::io::Read;12use polars_core::prelude::*;3use polars_error::{feature_gated, to_compute_err};45/// Represents the compression algorithms that we have decoders for6pub enum SupportedCompression {7GZIP,8ZLIB,9ZSTD,10}1112impl SupportedCompression {13/// If the given byte slice starts with the "magic" bytes for a supported compression family, return14/// that family, for unsupported/uncompressed slices, return None.15/// Based on <https://en.wikipedia.org/wiki/List_of_file_signatures>.16pub fn check(bytes: &[u8]) -> Option<Self> {17if bytes.len() < 4 {18// not enough bytes to perform prefix checks19return None;20}21match bytes[..4] {22[0x1f, 0x8b, _, _] => Some(Self::GZIP),23// Different zlib compression levels without preset dictionary.24[0x78, 0x01, _, _] => Some(Self::ZLIB),25[0x78, 0x5e, _, _] => Some(Self::ZLIB),26[0x78, 0x9c, _, _] => Some(Self::ZLIB),27[0x78, 0xda, _, _] => Some(Self::ZLIB),28[0x28, 0xb5, 0x2f, 0xfd] => Some(Self::ZSTD),29_ => None,30}31}32}3334/// Decompress `bytes` if compression is detected, otherwise simply return it.35/// An `out` vec must be given for ownership of the decompressed data.36#[allow(clippy::ptr_arg)]37pub fn maybe_decompress_bytes<'a>(bytes: &'a [u8], out: &'a mut Vec<u8>) -> PolarsResult<&'a [u8]> {38assert!(out.is_empty());3940if let Some(algo) = SupportedCompression::check(bytes) {41feature_gated!("decompress", {42match algo {43SupportedCompression::GZIP => {44flate2::read::MultiGzDecoder::new(bytes)45.read_to_end(out)46.map_err(to_compute_err)?;47},48SupportedCompression::ZLIB => {49flate2::read::ZlibDecoder::new(bytes)50.read_to_end(out)51.map_err(to_compute_err)?;52},53SupportedCompression::ZSTD => {54zstd::Decoder::with_buffer(bytes)?.read_to_end(out)?;55},56}5758Ok(out)59})60} else {61Ok(bytes)62}63}646566