Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/utils/slice.rs
6939 views
1
/// Given a `slice` that is relative to the start of a list of files, calculate the slice to apply
2
/// at a file with a row offset of `current_row_offset`.
3
pub fn split_slice_at_file(
4
current_row_offset_ref: &mut usize,
5
n_rows_this_file: usize,
6
global_slice_start: usize,
7
global_slice_end: usize,
8
) -> (usize, usize) {
9
let current_row_offset = *current_row_offset_ref;
10
*current_row_offset_ref += n_rows_this_file;
11
match SplitSlicePosition::split_slice_at_file(
12
current_row_offset,
13
n_rows_this_file,
14
global_slice_start..global_slice_end,
15
) {
16
SplitSlicePosition::Overlapping(offset, len) => (offset, len),
17
SplitSlicePosition::Before | SplitSlicePosition::After => (0, 0),
18
}
19
}
20
21
#[derive(Debug)]
22
pub enum SplitSlicePosition {
23
Before,
24
Overlapping(usize, usize),
25
After,
26
}
27
28
impl SplitSlicePosition {
29
pub fn split_slice_at_file(
30
current_row_offset: usize,
31
n_rows_this_file: usize,
32
global_slice: std::ops::Range<usize>,
33
) -> Self {
34
// e.g.
35
// slice: (start: 1, end: 2)
36
// files:
37
// 0: (1 row): current_offset: 0, next_file_offset: 1
38
// 1: (1 row): current_offset: 1, next_file_offset: 2
39
// 2: (1 row): current_offset: 2, next_file_offset: 3
40
// in this example we want to include only file 1.
41
42
let next_row_offset = current_row_offset + n_rows_this_file;
43
44
if next_row_offset <= global_slice.start {
45
Self::Before
46
} else if current_row_offset >= global_slice.end {
47
Self::After
48
} else {
49
let n_rows_to_skip = global_slice.start.saturating_sub(current_row_offset);
50
let n_excess_rows = next_row_offset.saturating_sub(global_slice.end);
51
52
Self::Overlapping(
53
n_rows_to_skip,
54
n_rows_this_file - n_rows_to_skip - n_excess_rows,
55
)
56
}
57
}
58
}
59
60