Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/series/ops/to_dummies.rs
6939 views
1
use polars_utils::format_pl_smallstr;
2
3
use super::*;
4
5
#[cfg(feature = "dtype-u8")]
6
type DummyType = u8;
7
#[cfg(feature = "dtype-u8")]
8
type DummyCa = UInt8Chunked;
9
10
#[cfg(not(feature = "dtype-u8"))]
11
type DummyType = i32;
12
#[cfg(not(feature = "dtype-u8"))]
13
type DummyCa = Int32Chunked;
14
15
pub trait ToDummies {
16
fn to_dummies(
17
&self,
18
separator: Option<&str>,
19
drop_first: bool,
20
drop_nulls: bool,
21
) -> PolarsResult<DataFrame>;
22
}
23
24
impl ToDummies for Series {
25
fn to_dummies(
26
&self,
27
separator: Option<&str>,
28
drop_first: bool,
29
drop_nulls: bool,
30
) -> PolarsResult<DataFrame> {
31
let sep = separator.unwrap_or("_");
32
let col_name = self.name();
33
let groups = self.group_tuples(true, drop_first)?;
34
35
// SAFETY: groups are in bounds
36
let columns = unsafe { self.agg_first(&groups) };
37
let columns = columns.iter().zip(groups.iter()).skip(drop_first as usize);
38
let columns = columns
39
.filter_map(|(av, group)| {
40
// strings are formatted with extra \" \" in polars, so we
41
// extract the string
42
let name = if let Some(s) = av.get_str() {
43
format_pl_smallstr!("{col_name}{sep}{s}")
44
} else {
45
// other types don't have this formatting issue
46
format_pl_smallstr!("{col_name}{sep}{av}")
47
};
48
49
if av.is_null() && drop_nulls {
50
return None;
51
}
52
53
let ca = match group {
54
GroupsIndicator::Idx((_, group)) => dummies_helper_idx(group, self.len(), name),
55
GroupsIndicator::Slice([offset, len]) => {
56
dummies_helper_slice(offset, len, self.len(), name)
57
},
58
};
59
Some(ca.into_column())
60
})
61
.collect::<Vec<_>>();
62
63
DataFrame::new(sort_columns(columns))
64
}
65
}
66
67
fn dummies_helper_idx(groups: &[IdxSize], len: usize, name: PlSmallStr) -> DummyCa {
68
let mut av = vec![0 as DummyType; len];
69
70
for &idx in groups {
71
let elem = unsafe { av.get_unchecked_mut(idx as usize) };
72
*elem = 1;
73
}
74
75
ChunkedArray::from_vec(name, av)
76
}
77
78
fn dummies_helper_slice(
79
group_offset: IdxSize,
80
group_len: IdxSize,
81
len: usize,
82
name: PlSmallStr,
83
) -> DummyCa {
84
let mut av = vec![0 as DummyType; len];
85
86
for idx in group_offset..(group_offset + group_len) {
87
let elem = unsafe { av.get_unchecked_mut(idx as usize) };
88
*elem = 1;
89
}
90
91
ChunkedArray::from_vec(name, av)
92
}
93
94
fn sort_columns(mut columns: Vec<Column>) -> Vec<Column> {
95
columns.sort_by(|a, b| a.name().partial_cmp(b.name()).unwrap());
96
columns
97
}
98
99