Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/frame/unpivot.rs
8468 views
1
use arrow::array::{MutableArray, MutablePlString};
2
use arrow::compute::concatenate::concatenate_unchecked;
3
use polars_core::datatypes::{DataType, PlSmallStr};
4
use polars_core::frame::DataFrame;
5
use polars_core::frame::column::Column;
6
use polars_core::prelude::{IntoVec, Series, UnpivotArgsIR};
7
use polars_core::utils::merge_dtypes_many;
8
use polars_error::{PolarsResult, polars_err};
9
10
use crate::frame::IntoDf;
11
12
pub trait UnpivotDF: IntoDf {
13
/// Unpivot a `DataFrame` from wide to long format.
14
///
15
/// # Example
16
///
17
/// # Arguments
18
///
19
/// * `on` - String slice that represent the columns to use as value variables.
20
///
21
/// * `index` - String slice that represent the columns to use as id variables.
22
///
23
/// If `on` is empty no columns will be used. If set to `None` all columns that are not in
24
/// `index` will be used.
25
///
26
/// ```ignore
27
/// # use polars_core::prelude::*;
28
/// let df = df!("A" => &["a", "b", "a"],
29
/// "B" => &[1, 3, 5],
30
/// "C" => &[10, 11, 12],
31
/// "D" => &[2, 4, 6]
32
/// )?;
33
///
34
/// let unpivoted = df.unpivot(&["A", "B"], &["C", "D"])?;
35
/// println!("{:?}", df);
36
/// println!("{:?}", unpivoted);
37
/// # Ok::<(), PolarsError>(())
38
/// ```
39
/// Outputs:
40
/// ```text
41
/// +-----+-----+-----+-----+
42
/// | A | B | C | D |
43
/// | --- | --- | --- | --- |
44
/// | str | i32 | i32 | i32 |
45
/// +=====+=====+=====+=====+
46
/// | "a" | 1 | 10 | 2 |
47
/// +-----+-----+-----+-----+
48
/// | "b" | 3 | 11 | 4 |
49
/// +-----+-----+-----+-----+
50
/// | "a" | 5 | 12 | 6 |
51
/// +-----+-----+-----+-----+
52
///
53
/// +-----+-----+----------+-------+
54
/// | A | B | variable | value |
55
/// | --- | --- | --- | --- |
56
/// | str | i32 | str | i32 |
57
/// +=====+=====+==========+=======+
58
/// | "a" | 1 | "C" | 10 |
59
/// +-----+-----+----------+-------+
60
/// | "b" | 3 | "C" | 11 |
61
/// +-----+-----+----------+-------+
62
/// | "a" | 5 | "C" | 12 |
63
/// +-----+-----+----------+-------+
64
/// | "a" | 1 | "D" | 2 |
65
/// +-----+-----+----------+-------+
66
/// | "b" | 3 | "D" | 4 |
67
/// +-----+-----+----------+-------+
68
/// | "a" | 5 | "D" | 6 |
69
/// +-----+-----+----------+-------+
70
///
71
/// The resulting row order is unspecified.
72
/// ```
73
fn unpivot<I, J>(&self, on: Option<I>, index: J) -> PolarsResult<DataFrame>
74
where
75
I: IntoVec<PlSmallStr>,
76
J: IntoVec<PlSmallStr>,
77
{
78
self.unpivot2(UnpivotArgsIR::new(
79
self.to_df().get_column_names_owned(),
80
on.map(|on| on.into_vec()),
81
index.into_vec(),
82
None,
83
None,
84
))
85
}
86
87
/// Similar to unpivot, but without generics. This may be easier if you want to pass
88
/// an empty `index` or empty `on`.
89
fn unpivot2(&self, args: UnpivotArgsIR) -> PolarsResult<DataFrame> {
90
let UnpivotArgsIR {
91
on,
92
index,
93
variable_name,
94
value_name,
95
} = args;
96
97
let self_ = self.to_df();
98
99
let variable_col_empty = Column::new_empty(variable_name.clone(), &DataType::String);
100
let value_col_empty = Column::new_empty(value_name.clone(), &DataType::Null);
101
102
if self_.width() == 0 {
103
return Ok(unsafe {
104
DataFrame::new_unchecked(0, vec![variable_col_empty, value_col_empty])
105
});
106
}
107
108
// If the parameter `on` is empty or there are no columns available to use as value vars. we
109
// want to produce an empty DataFrame but with the standard unpivot schema.
110
if on.is_empty() {
111
let mut out = self_.select(index)?.clear().into_columns();
112
113
out.push(variable_col_empty);
114
out.push(value_col_empty);
115
116
return Ok(unsafe { DataFrame::new_unchecked(0, out) });
117
}
118
119
let len = self_.height();
120
121
// Values will all be placed in single column, so we must find their supertype
122
let schema = self_.schema();
123
let dtypes = on
124
.iter()
125
.map(|v| schema.get(v).ok_or_else(|| polars_err!(col_not_found = v)))
126
.collect::<PolarsResult<Vec<_>>>()?;
127
128
let st = merge_dtypes_many(dtypes.iter())?;
129
130
// The column name of the variable that is unpivoted
131
let mut variable_col = MutablePlString::with_capacity(len * on.len() + 1);
132
// prepare ids
133
let ids_ = unsafe { self_.select_unchecked(index.as_slice())? };
134
let mut ids = ids_.clone();
135
if ids.width() > 0 {
136
for _ in 0..on.len() - 1 {
137
ids.vstack_mut_unchecked(&ids_);
138
}
139
} else {
140
unsafe { ids.set_height(0) };
141
}
142
ids.rechunk_mut_par();
143
drop(ids_);
144
145
let mut values = Vec::with_capacity(on.len());
146
let columns = self_.columns();
147
148
for value_column_name in &on {
149
variable_col.extend_constant(len, Some(value_column_name.as_str()));
150
// ensure we go via the schema so we are O(1)
151
// self.column() is linear
152
// together with this loop that would make it O^2 over `on`
153
let (pos, _name, _dtype) = schema.try_get_full(value_column_name)?;
154
let col = &columns[pos];
155
let value_col = col.cast(&st).map_err(
156
|_| polars_err!(InvalidOperation: "'unpivot' not supported for dtype: {}\n\nConsider casting to String.", col.dtype()),
157
)?;
158
values.extend_from_slice(value_col.as_materialized_series().chunks())
159
}
160
let values_arr = concatenate_unchecked(&values)?;
161
// SAFETY:
162
// The given dtype is correct
163
let values_col =
164
unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) }
165
.into();
166
167
let variable_arr = variable_col.as_box();
168
// SAFETY:
169
// The given dtype is correct
170
let variable_col = unsafe {
171
Series::from_chunks_and_dtype_unchecked(
172
variable_name,
173
vec![variable_arr],
174
&DataType::String,
175
)
176
}
177
.into();
178
179
ids.hstack_mut(&[variable_col, values_col])?;
180
181
Ok(ids)
182
}
183
}
184
185
impl UnpivotDF for DataFrame {}
186
187
#[cfg(test)]
188
mod test {
189
use polars_core::df;
190
191
use super::*;
192
193
#[test]
194
fn test_unpivot() -> PolarsResult<()> {
195
let df = df!("A" => &["a", "b", "a"],
196
"B" => &[1, 3, 5],
197
"C" => &[10, 11, 12],
198
"D" => &[2, 4, 6]
199
)
200
.unwrap();
201
202
// Specify on and index
203
let unpivoted = df.unpivot(Some(["C", "D"]), ["A", "B"])?;
204
assert_eq!(
205
unpivoted.get_column_names(),
206
&["A", "B", "variable", "value"]
207
);
208
assert_eq!(
209
Vec::from(unpivoted.column("value")?.i32()?),
210
&[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)]
211
);
212
213
Ok(())
214
}
215
}
216
217