Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/frame/horizontal.rs
6940 views
1
use polars_error::{PolarsResult, polars_err};
2
3
use super::Column;
4
use crate::datatypes::AnyValue;
5
use crate::frame::DataFrame;
6
7
impl DataFrame {
8
/// Add columns horizontally.
9
///
10
/// # Safety
11
/// The caller must ensure:
12
/// - the length of all [`Column`] is equal to the height of this [`DataFrame`]
13
/// - the columns names are unique
14
///
15
/// Note: If `self` is empty, `self.height` will always be overridden by the height of the first
16
/// column in `columns`.
17
///
18
/// Note that on a debug build this will panic on duplicates / height mismatch.
19
pub unsafe fn hstack_mut_unchecked(&mut self, columns: &[Column]) -> &mut Self {
20
self.clear_schema();
21
self.columns.extend_from_slice(columns);
22
23
if cfg!(debug_assertions) {
24
if let err @ Err(_) = DataFrame::validate_columns_slice(&self.columns) {
25
// Reset DataFrame state to before extend.
26
self.columns.truncate(self.columns.len() - columns.len());
27
err.unwrap();
28
}
29
}
30
31
if let Some(c) = self.columns.first() {
32
unsafe { self.set_height(c.len()) };
33
}
34
35
self
36
}
37
38
/// Add multiple [`Column`] to a [`DataFrame`].
39
/// Errors if the resulting DataFrame columns have duplicate names or unequal heights.
40
///
41
/// Note: If `self` is empty, `self.height` will always be overridden by the height of the first
42
/// column in `columns`.
43
///
44
/// # Example
45
///
46
/// ```rust
47
/// # use polars_core::prelude::*;
48
/// fn stack(df: &mut DataFrame, columns: &[Column]) {
49
/// df.hstack_mut(columns);
50
/// }
51
/// ```
52
pub fn hstack_mut(&mut self, columns: &[Column]) -> PolarsResult<&mut Self> {
53
self.clear_schema();
54
self.columns.extend_from_slice(columns);
55
56
if let err @ Err(_) = DataFrame::validate_columns_slice(&self.columns) {
57
// Reset DataFrame state to before extend.
58
self.columns.truncate(self.columns.len() - columns.len());
59
err?;
60
}
61
62
if let Some(c) = self.columns.first() {
63
unsafe { self.set_height(c.len()) };
64
}
65
66
Ok(self)
67
}
68
}
69
70
/// Concat [`DataFrame`]s horizontally.
71
/// Concat horizontally and extend with null values if lengths don't match
72
pub fn concat_df_horizontal(dfs: &[DataFrame], check_duplicates: bool) -> PolarsResult<DataFrame> {
73
let output_height = dfs
74
.iter()
75
.map(|df| df.height())
76
.max()
77
.ok_or_else(|| polars_err!(ComputeError: "cannot concat empty dataframes"))?;
78
79
let owned_df;
80
81
let mut out_width = 0;
82
83
let all_equal_height = dfs.iter().all(|df| {
84
out_width += df.width();
85
df.height() == output_height
86
});
87
88
// if not all equal length, extend the DataFrame with nulls
89
let dfs = if !all_equal_height {
90
out_width = 0;
91
92
owned_df = dfs
93
.iter()
94
.cloned()
95
.map(|mut df| {
96
out_width += df.width();
97
98
if df.height() != output_height {
99
let diff = output_height - df.height();
100
101
// SAFETY: We extend each column with nulls to the point of being of length
102
// `output_height`. Then, we set the height of the resulting dataframe.
103
unsafe { df.get_columns_mut() }.iter_mut().for_each(|c| {
104
*c = c.extend_constant(AnyValue::Null, diff).unwrap();
105
});
106
df.clear_schema();
107
unsafe {
108
df.set_height(output_height);
109
}
110
}
111
df
112
})
113
.collect::<Vec<_>>();
114
owned_df.as_slice()
115
} else {
116
dfs
117
};
118
119
let mut acc_cols = Vec::with_capacity(out_width);
120
121
for df in dfs {
122
acc_cols.extend(df.get_columns().iter().cloned());
123
}
124
125
if check_duplicates {
126
DataFrame::validate_columns_slice(&acc_cols)?;
127
}
128
129
let df = unsafe { DataFrame::new_no_checks_height_from_first(acc_cols) };
130
131
Ok(df)
132
}
133
134
#[cfg(test)]
135
mod tests {
136
use polars_error::PolarsError;
137
138
#[test]
139
fn test_hstack_mut_empty_frame_height_validation() {
140
use crate::frame::DataFrame;
141
use crate::prelude::{Column, DataType};
142
let mut df = DataFrame::empty();
143
let result = df.hstack_mut(&[
144
Column::full_null("a".into(), 1, &DataType::Null),
145
Column::full_null("b".into(), 3, &DataType::Null),
146
]);
147
148
assert!(
149
matches!(result, Err(PolarsError::ShapeMismatch(_))),
150
"expected shape mismatch error"
151
);
152
153
// Ensure the DataFrame is not mutated in the error case.
154
assert_eq!(df.width(), 0);
155
}
156
}
157
158