Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/frame/unpivot.rs
7884 views
1
use arrow::array::{MutableArray, MutablePlString};
2
use arrow::compute::concatenate::concatenate_unchecked;
3
use polars_core::datatypes::{DataType, PlSmallStr};
4
use polars_core::frame::DataFrame;
5
use polars_core::frame::column::Column;
6
use polars_core::prelude::{IntoVec, Series, UnpivotArgsIR};
7
use polars_core::utils::merge_dtypes_many;
8
use polars_error::{PolarsResult, polars_err};
9
10
use crate::frame::IntoDf;
11
12
pub trait UnpivotDF: IntoDf {
13
/// Unpivot a `DataFrame` from wide to long format.
14
///
15
/// # Example
16
///
17
/// # Arguments
18
///
19
/// * `on` - String slice that represent the columns to use as value variables.
20
///
21
/// * `index` - String slice that represent the columns to use as id variables.
22
///
23
/// If `on` is empty no columns will be used. If set to `None` all columns that are not in
24
/// `index` will be used.
25
///
26
/// ```ignore
27
/// # use polars_core::prelude::*;
28
/// let df = df!("A" => &["a", "b", "a"],
29
/// "B" => &[1, 3, 5],
30
/// "C" => &[10, 11, 12],
31
/// "D" => &[2, 4, 6]
32
/// )?;
33
///
34
/// let unpivoted = df.unpivot(&["A", "B"], &["C", "D"])?;
35
/// println!("{:?}", df);
36
/// println!("{:?}", unpivoted);
37
/// # Ok::<(), PolarsError>(())
38
/// ```
39
/// Outputs:
40
/// ```text
41
/// +-----+-----+-----+-----+
42
/// | A | B | C | D |
43
/// | --- | --- | --- | --- |
44
/// | str | i32 | i32 | i32 |
45
/// +=====+=====+=====+=====+
46
/// | "a" | 1 | 10 | 2 |
47
/// +-----+-----+-----+-----+
48
/// | "b" | 3 | 11 | 4 |
49
/// +-----+-----+-----+-----+
50
/// | "a" | 5 | 12 | 6 |
51
/// +-----+-----+-----+-----+
52
///
53
/// +-----+-----+----------+-------+
54
/// | A | B | variable | value |
55
/// | --- | --- | --- | --- |
56
/// | str | i32 | str | i32 |
57
/// +=====+=====+==========+=======+
58
/// | "a" | 1 | "C" | 10 |
59
/// +-----+-----+----------+-------+
60
/// | "b" | 3 | "C" | 11 |
61
/// +-----+-----+----------+-------+
62
/// | "a" | 5 | "C" | 12 |
63
/// +-----+-----+----------+-------+
64
/// | "a" | 1 | "D" | 2 |
65
/// +-----+-----+----------+-------+
66
/// | "b" | 3 | "D" | 4 |
67
/// +-----+-----+----------+-------+
68
/// | "a" | 5 | "D" | 6 |
69
/// +-----+-----+----------+-------+
70
///
71
/// The resulting row order is unspecified.
72
/// ```
73
fn unpivot<I, J>(&self, on: Option<I>, index: J) -> PolarsResult<DataFrame>
74
where
75
I: IntoVec<PlSmallStr>,
76
J: IntoVec<PlSmallStr>,
77
{
78
self.unpivot2(UnpivotArgsIR::new(
79
self.to_df().get_column_names_owned(),
80
on.map(|on| on.into_vec()),
81
index.into_vec(),
82
None,
83
None,
84
))
85
}
86
87
/// Similar to unpivot, but without generics. This may be easier if you want to pass
88
/// an empty `index` or empty `on`.
89
fn unpivot2(&self, args: UnpivotArgsIR) -> PolarsResult<DataFrame> {
90
let UnpivotArgsIR {
91
on,
92
index,
93
variable_name,
94
value_name,
95
} = args;
96
97
let self_ = self.to_df();
98
99
let variable_col_empty = Column::new_empty(variable_name.clone(), &DataType::String);
100
let value_col_empty = Column::new_empty(value_name.clone(), &DataType::Null);
101
102
if self_.get_columns().is_empty() {
103
return DataFrame::new(vec![variable_col_empty, value_col_empty]);
104
}
105
106
// If the parameter `on` is empty or there are no columns available to use as value vars. we
107
// want to produce an empty DataFrame but with the standard unpivot schema.
108
if on.is_empty() {
109
let mut out = self_.select(index)?.clear().take_columns();
110
111
out.push(variable_col_empty);
112
out.push(value_col_empty);
113
114
return Ok(unsafe { DataFrame::new_no_checks(0, out) });
115
}
116
117
let len = self_.height();
118
119
// Values will all be placed in single column, so we must find their supertype
120
let schema = self_.schema();
121
let dtypes = on
122
.iter()
123
.map(|v| schema.get(v).ok_or_else(|| polars_err!(col_not_found = v)))
124
.collect::<PolarsResult<Vec<_>>>()?;
125
126
let st = merge_dtypes_many(dtypes.iter())?;
127
128
// The column name of the variable that is unpivoted
129
let mut variable_col = MutablePlString::with_capacity(len * on.len() + 1);
130
// prepare ids
131
let ids_ = self_.select_with_schema_unchecked(index, schema)?;
132
let mut ids = ids_.clone();
133
if ids.width() > 0 {
134
for _ in 0..on.len() - 1 {
135
ids.vstack_mut_unchecked(&ids_)
136
}
137
}
138
ids.as_single_chunk_par();
139
drop(ids_);
140
141
let mut values = Vec::with_capacity(on.len());
142
let columns = self_.get_columns();
143
144
for value_column_name in &on {
145
variable_col.extend_constant(len, Some(value_column_name.as_str()));
146
// ensure we go via the schema so we are O(1)
147
// self.column() is linear
148
// together with this loop that would make it O^2 over `on`
149
let (pos, _name, _dtype) = schema.try_get_full(value_column_name)?;
150
let col = &columns[pos];
151
let value_col = col.cast(&st).map_err(
152
|_| polars_err!(InvalidOperation: "'unpivot' not supported for dtype: {}\n\nConsider casting to String.", col.dtype()),
153
)?;
154
values.extend_from_slice(value_col.as_materialized_series().chunks())
155
}
156
let values_arr = concatenate_unchecked(&values)?;
157
// SAFETY:
158
// The given dtype is correct
159
let values_col =
160
unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) }
161
.into();
162
163
let variable_arr = variable_col.as_box();
164
// SAFETY:
165
// The given dtype is correct
166
let variable_col = unsafe {
167
Series::from_chunks_and_dtype_unchecked(
168
variable_name,
169
vec![variable_arr],
170
&DataType::String,
171
)
172
}
173
.into();
174
175
ids.hstack_mut(&[variable_col, values_col])?;
176
177
Ok(ids)
178
}
179
}
180
181
impl UnpivotDF for DataFrame {}
182
183
#[cfg(test)]
184
mod test {
185
use polars_core::df;
186
187
use super::*;
188
189
#[test]
190
fn test_unpivot() -> PolarsResult<()> {
191
let df = df!("A" => &["a", "b", "a"],
192
"B" => &[1, 3, 5],
193
"C" => &[10, 11, 12],
194
"D" => &[2, 4, 6]
195
)
196
.unwrap();
197
198
// Specify on and index
199
let unpivoted = df.unpivot(Some(["C", "D"]), ["A", "B"])?;
200
assert_eq!(
201
unpivoted.get_column_names(),
202
&["A", "B", "variable", "value"]
203
);
204
assert_eq!(
205
Vec::from(unpivoted.column("value")?.i32()?),
206
&[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)]
207
);
208
209
Ok(())
210
}
211
}
212
213