Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-row/src/widths.rs
6939 views
1
/// Container of byte-widths for (partial) rows.
2
///
3
/// The `RowWidths` keeps track of the sum of all widths and allows to efficiently deal with a
4
/// constant row-width (i.e. with primitive types).
5
#[derive(Debug, Clone)]
6
pub(crate) enum RowWidths {
7
Constant { num_rows: usize, width: usize },
8
// @TODO: Maybe turn this into a Box<[usize]>
9
Variable { widths: Vec<usize>, sum: usize },
10
}
11
12
impl Default for RowWidths {
13
fn default() -> Self {
14
Self::Constant {
15
num_rows: 0,
16
width: 0,
17
}
18
}
19
}
20
21
impl RowWidths {
22
pub fn new(num_rows: usize) -> Self {
23
Self::Constant { num_rows, width: 0 }
24
}
25
26
/// Push a constant width into the widths
27
pub fn push_constant(&mut self, constant: usize) {
28
match self {
29
Self::Constant { width, .. } => *width += constant,
30
Self::Variable { widths, sum } => {
31
widths.iter_mut().for_each(|w| *w += constant);
32
*sum += constant * widths.len();
33
},
34
}
35
}
36
/// Push an another [`RowWidths`] into the widths
37
pub fn push(&mut self, other: &Self) {
38
debug_assert_eq!(self.num_rows(), other.num_rows());
39
40
match (std::mem::take(self), other) {
41
(mut slf, RowWidths::Constant { width, num_rows: _ }) => {
42
slf.push_constant(*width);
43
*self = slf;
44
},
45
(RowWidths::Constant { num_rows, width }, RowWidths::Variable { widths, sum }) => {
46
*self = RowWidths::Variable {
47
widths: widths.iter().map(|w| *w + width).collect(),
48
sum: num_rows * width + sum,
49
};
50
},
51
(
52
RowWidths::Variable { mut widths, sum },
53
RowWidths::Variable {
54
widths: other_widths,
55
sum: other_sum,
56
},
57
) => {
58
widths
59
.iter_mut()
60
.zip(other_widths.iter())
61
.for_each(|(l, r)| *l += *r);
62
*self = RowWidths::Variable {
63
widths,
64
sum: sum + other_sum,
65
};
66
},
67
}
68
}
69
70
/// Create a [`RowWidths`] with the chunked sum with a certain `chunk_size`.
71
pub fn collapse_chunks(&self, chunk_size: usize, output_num_rows: usize) -> RowWidths {
72
if chunk_size == 0 {
73
assert_eq!(self.num_rows(), 0);
74
return RowWidths::new(output_num_rows);
75
}
76
77
assert_eq!(self.num_rows() % chunk_size, 0);
78
assert_eq!(self.num_rows() / chunk_size, output_num_rows);
79
match self {
80
Self::Constant { num_rows, width } => Self::Constant {
81
num_rows: num_rows / chunk_size,
82
width: width * chunk_size,
83
},
84
Self::Variable { widths, sum } => Self::Variable {
85
widths: widths
86
.chunks_exact(chunk_size)
87
.map(|chunk| chunk.iter().copied().sum())
88
.collect(),
89
sum: *sum,
90
},
91
}
92
}
93
94
pub fn extend_with_offsets(&self, out: &mut Vec<usize>) {
95
match self {
96
RowWidths::Constant { num_rows, width } => {
97
out.extend((0..*num_rows).map(|i| i * width));
98
},
99
RowWidths::Variable { widths, sum: _ } => {
100
let mut next = 0;
101
out.extend(widths.iter().map(|w| {
102
let current = next;
103
next += w;
104
current
105
}));
106
},
107
}
108
}
109
110
pub fn num_rows(&self) -> usize {
111
match self {
112
Self::Constant { num_rows, .. } => *num_rows,
113
Self::Variable { widths, .. } => widths.len(),
114
}
115
}
116
117
pub fn push_iter(&mut self, mut iter: impl ExactSizeIterator<Item = usize>) {
118
assert_eq!(self.num_rows(), iter.len());
119
120
match self {
121
RowWidths::Constant { num_rows, width } => {
122
let Some(constant) = iter.by_ref().next() else {
123
return;
124
};
125
126
// If the iterator turns out to be constant anyway. We would like to keep that
127
// benefit.
128
match iter.by_ref().enumerate().find(|(_, v)| *v != constant) {
129
None => *width += constant,
130
Some((i, v)) => {
131
let mut push_sum = (i + 1) * constant + v;
132
let mut slf = Vec::with_capacity(*num_rows);
133
134
slf.resize(i + 1, *width + constant);
135
slf.push(*width + v);
136
137
slf.extend(iter.map(|v| {
138
push_sum += v;
139
v + *width
140
}));
141
142
*self = Self::Variable {
143
widths: slf,
144
sum: *num_rows * *width + push_sum,
145
};
146
},
147
}
148
},
149
RowWidths::Variable { widths, sum } => {
150
let mut push_sum = 0;
151
iter.zip(widths).for_each(|(v, w)| {
152
push_sum += v;
153
*w += v;
154
});
155
*sum += push_sum;
156
},
157
}
158
}
159
160
pub fn get(&self, index: usize) -> usize {
161
assert!(index < self.num_rows());
162
match self {
163
Self::Constant { width, .. } => *width,
164
Self::Variable { widths, .. } => widths[index],
165
}
166
}
167
168
pub fn sum(&self) -> usize {
169
match self {
170
Self::Constant { num_rows, width } => *num_rows * *width,
171
Self::Variable { sum, .. } => *sum,
172
}
173
}
174
}
175
176