Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/mmap.rs
6939 views
1
use std::fs::File;
2
use std::io::{BufReader, Cursor, Read, Seek};
3
4
use polars_core::config::verbose;
5
use polars_utils::file::ClosableFile;
6
use polars_utils::mmap::MemSlice;
7
8
/// Trait used to get a hold to file handler or to the underlying bytes
9
/// without performing a Read.
10
pub trait MmapBytesReader: Read + Seek + Send + Sync {
11
fn to_file(&self) -> Option<&File> {
12
None
13
}
14
15
fn to_bytes(&self) -> Option<&[u8]> {
16
None
17
}
18
}
19
20
impl MmapBytesReader for File {
21
fn to_file(&self) -> Option<&File> {
22
Some(self)
23
}
24
}
25
26
impl MmapBytesReader for ClosableFile {
27
fn to_file(&self) -> Option<&File> {
28
Some(self.as_ref())
29
}
30
}
31
32
impl MmapBytesReader for BufReader<File> {
33
fn to_file(&self) -> Option<&File> {
34
Some(self.get_ref())
35
}
36
}
37
38
impl MmapBytesReader for BufReader<&File> {
39
fn to_file(&self) -> Option<&File> {
40
Some(self.get_ref())
41
}
42
}
43
44
impl<T> MmapBytesReader for Cursor<T>
45
where
46
T: AsRef<[u8]> + Send + Sync,
47
{
48
fn to_bytes(&self) -> Option<&[u8]> {
49
Some(self.get_ref().as_ref())
50
}
51
}
52
53
impl<T: MmapBytesReader + ?Sized> MmapBytesReader for Box<T> {
54
fn to_file(&self) -> Option<&File> {
55
T::to_file(self)
56
}
57
58
fn to_bytes(&self) -> Option<&[u8]> {
59
T::to_bytes(self)
60
}
61
}
62
63
impl<T: MmapBytesReader> MmapBytesReader for &mut T {
64
fn to_file(&self) -> Option<&File> {
65
T::to_file(self)
66
}
67
68
fn to_bytes(&self) -> Option<&[u8]> {
69
T::to_bytes(self)
70
}
71
}
72
73
// Handle various forms of input bytes
74
pub enum ReaderBytes<'a> {
75
Borrowed(&'a [u8]),
76
Owned(MemSlice),
77
}
78
79
impl std::ops::Deref for ReaderBytes<'_> {
80
type Target = [u8];
81
fn deref(&self) -> &[u8] {
82
match self {
83
Self::Borrowed(ref_bytes) => ref_bytes,
84
Self::Owned(vec) => vec,
85
}
86
}
87
}
88
89
/// There are some places that perform manual lifetime management after transmuting `ReaderBytes`
90
/// to have a `'static` inner lifetime. The advantage to doing this is that it lets you construct a
91
/// `MemSlice` from the `ReaderBytes` in a zero-copy manner regardless of the underlying enum
92
/// variant.
93
impl ReaderBytes<'static> {
94
/// Construct a `MemSlice` in a zero-copy manner from the underlying bytes, with the assumption
95
/// that the underlying bytes have a `'static` lifetime.
96
pub fn to_memslice(&self) -> MemSlice {
97
match self {
98
ReaderBytes::Borrowed(v) => MemSlice::from_static(v),
99
ReaderBytes::Owned(v) => v.clone(),
100
}
101
}
102
}
103
104
impl<'a, T: 'a + MmapBytesReader> From<&'a mut T> for ReaderBytes<'a> {
105
fn from(m: &'a mut T) -> Self {
106
match m.to_bytes() {
107
// , but somehow bchk doesn't see that lifetime is 'a.
108
Some(s) => {
109
let s = unsafe { std::mem::transmute::<&[u8], &'a [u8]>(s) };
110
ReaderBytes::Borrowed(s)
111
},
112
None => {
113
if let Some(f) = m.to_file() {
114
ReaderBytes::Owned(MemSlice::from_file(f).unwrap())
115
} else {
116
if verbose() {
117
eprintln!("could not memory map file; read to buffer.")
118
}
119
let mut buf = vec![];
120
m.read_to_end(&mut buf).expect("could not read");
121
ReaderBytes::Owned(MemSlice::from_vec(buf))
122
}
123
},
124
}
125
}
126
}
127
128