Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/utils/compression.rs
6939 views
1
use std::io::Read;
2
3
use polars_core::prelude::*;
4
use polars_error::{feature_gated, to_compute_err};
5
6
/// Represents the compression algorithms that we have decoders for
7
pub enum SupportedCompression {
8
GZIP,
9
ZLIB,
10
ZSTD,
11
}
12
13
impl SupportedCompression {
14
/// If the given byte slice starts with the "magic" bytes for a supported compression family, return
15
/// that family, for unsupported/uncompressed slices, return None.
16
/// Based on <https://en.wikipedia.org/wiki/List_of_file_signatures>.
17
pub fn check(bytes: &[u8]) -> Option<Self> {
18
if bytes.len() < 4 {
19
// not enough bytes to perform prefix checks
20
return None;
21
}
22
match bytes[..4] {
23
[0x1f, 0x8b, _, _] => Some(Self::GZIP),
24
// Different zlib compression levels without preset dictionary.
25
[0x78, 0x01, _, _] => Some(Self::ZLIB),
26
[0x78, 0x5e, _, _] => Some(Self::ZLIB),
27
[0x78, 0x9c, _, _] => Some(Self::ZLIB),
28
[0x78, 0xda, _, _] => Some(Self::ZLIB),
29
[0x28, 0xb5, 0x2f, 0xfd] => Some(Self::ZSTD),
30
_ => None,
31
}
32
}
33
}
34
35
/// Decompress `bytes` if compression is detected, otherwise simply return it.
36
/// An `out` vec must be given for ownership of the decompressed data.
37
#[allow(clippy::ptr_arg)]
38
pub fn maybe_decompress_bytes<'a>(bytes: &'a [u8], out: &'a mut Vec<u8>) -> PolarsResult<&'a [u8]> {
39
assert!(out.is_empty());
40
41
if let Some(algo) = SupportedCompression::check(bytes) {
42
feature_gated!("decompress", {
43
match algo {
44
SupportedCompression::GZIP => {
45
flate2::read::MultiGzDecoder::new(bytes)
46
.read_to_end(out)
47
.map_err(to_compute_err)?;
48
},
49
SupportedCompression::ZLIB => {
50
flate2::read::ZlibDecoder::new(bytes)
51
.read_to_end(out)
52
.map_err(to_compute_err)?;
53
},
54
SupportedCompression::ZSTD => {
55
zstd::Decoder::with_buffer(bytes)?.read_to_end(out)?;
56
},
57
}
58
59
Ok(out)
60
})
61
} else {
62
Ok(bytes)
63
}
64
}
65
66