CoCalc -- compression.rs

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/utils/compression.rs
⁶⁹³⁹ views
1
use std::io::Read;
2

3
use polars_core::prelude::*;
4
use polars_error::{feature_gated, to_compute_err};
5

6
/// Represents the compression algorithms that we have decoders for
7
pub enum SupportedCompression {
8
    GZIP,
9
    ZLIB,
10
    ZSTD,
11
}
12

13
impl SupportedCompression {
14
    /// If the given byte slice starts with the "magic" bytes for a supported compression family, return
15
    /// that family, for unsupported/uncompressed slices, return None.
16
    /// Based on <https://en.wikipedia.org/wiki/List_of_file_signatures>.
17
    pub fn check(bytes: &[u8]) -> Option<Self> {
18
        if bytes.len() < 4 {
19
            // not enough bytes to perform prefix checks
20
            return None;
21
        }
22
        match bytes[..4] {
23
            [0x1f, 0x8b, _, _] => Some(Self::GZIP),
24
            // Different zlib compression levels without preset dictionary.
25
            [0x78, 0x01, _, _] => Some(Self::ZLIB),
26
            [0x78, 0x5e, _, _] => Some(Self::ZLIB),
27
            [0x78, 0x9c, _, _] => Some(Self::ZLIB),
28
            [0x78, 0xda, _, _] => Some(Self::ZLIB),
29
            [0x28, 0xb5, 0x2f, 0xfd] => Some(Self::ZSTD),
30
            _ => None,
31
        }
32
    }
33
}
34

35
/// Decompress `bytes` if compression is detected, otherwise simply return it.
36
/// An `out` vec must be given for ownership of the decompressed data.
37
#[allow(clippy::ptr_arg)]
38
pub fn maybe_decompress_bytes<'a>(bytes: &'a [u8], out: &'a mut Vec<u8>) -> PolarsResult<&'a [u8]> {
39
    assert!(out.is_empty());
40

41
    if let Some(algo) = SupportedCompression::check(bytes) {
42
        feature_gated!("decompress", {
43
            match algo {
44
                SupportedCompression::GZIP => {
45
                    flate2::read::MultiGzDecoder::new(bytes)
46
                        .read_to_end(out)
47
                        .map_err(to_compute_err)?;
48
                },
49
                SupportedCompression::ZLIB => {
50
                    flate2::read::ZlibDecoder::new(bytes)
51
                        .read_to_end(out)
52
                        .map_err(to_compute_err)?;
53
                },
54
                SupportedCompression::ZSTD => {
55
                    zstd::Decoder::with_buffer(bytes)?.read_to_end(out)?;
56
                },
57
            }
58

59
            Ok(out)
60
        })
61
    } else {
62
        Ok(bytes)
63
    }
64
}
65

66
Product

Resources

Company