Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/series/ops/to_dummies.rs
8480 views
1
use polars_utils::format_pl_smallstr;
2
3
use super::*;
4
5
#[cfg(feature = "dtype-u8")]
6
type DummyType = u8;
7
#[cfg(feature = "dtype-u8")]
8
type DummyCa = UInt8Chunked;
9
10
#[cfg(not(feature = "dtype-u8"))]
11
type DummyType = i32;
12
#[cfg(not(feature = "dtype-u8"))]
13
type DummyCa = Int32Chunked;
14
15
pub trait ToDummies {
16
fn to_dummies(
17
&self,
18
separator: Option<&str>,
19
drop_first: bool,
20
drop_nulls: bool,
21
) -> PolarsResult<DataFrame>;
22
}
23
24
impl ToDummies for Series {
25
fn to_dummies(
26
&self,
27
separator: Option<&str>,
28
drop_first: bool,
29
drop_nulls: bool,
30
) -> PolarsResult<DataFrame> {
31
let sep = separator.unwrap_or("_");
32
let col_name = self.name();
33
34
// We only need to maintain order if we need to drop the first non-null item.
35
let maintain_order = drop_first;
36
let groups = self.group_tuples(true, maintain_order)?;
37
38
// SAFETY: groups are in bounds.
39
let columns = unsafe { self.agg_first(&groups) };
40
let columns = columns.iter().zip(groups.iter());
41
let mut seen_first = false;
42
let columns = columns
43
.filter_map(|(av, group)| {
44
if av.is_null() && drop_nulls {
45
return None;
46
} else if !seen_first && !av.is_null() && drop_first {
47
// The position of the first non-null item could be either 0 or 1.
48
seen_first = true;
49
return None;
50
}
51
// strings are formatted with extra \" \" in polars, so we
52
// extract the string
53
let name = if let Some(s) = av.get_str() {
54
format_pl_smallstr!("{col_name}{sep}{s}")
55
} else {
56
// other types don't have this formatting issue
57
format_pl_smallstr!("{col_name}{sep}{av}")
58
};
59
60
let ca = match group {
61
GroupsIndicator::Idx((_, group)) => dummies_helper_idx(group, self.len(), name),
62
GroupsIndicator::Slice([offset, len]) => {
63
dummies_helper_slice(offset, len, self.len(), name)
64
},
65
};
66
Some(ca.into_column())
67
})
68
.collect::<Vec<_>>();
69
70
DataFrame::new_infer_height(sort_columns(columns))
71
}
72
}
73
74
fn dummies_helper_idx(groups: &[IdxSize], len: usize, name: PlSmallStr) -> DummyCa {
75
let mut av = vec![0 as DummyType; len];
76
77
for &idx in groups {
78
let elem = unsafe { av.get_unchecked_mut(idx as usize) };
79
*elem = 1;
80
}
81
82
ChunkedArray::from_vec(name, av)
83
}
84
85
fn dummies_helper_slice(
86
group_offset: IdxSize,
87
group_len: IdxSize,
88
len: usize,
89
name: PlSmallStr,
90
) -> DummyCa {
91
let mut av = vec![0 as DummyType; len];
92
93
for idx in group_offset..(group_offset + group_len) {
94
let elem = unsafe { av.get_unchecked_mut(idx as usize) };
95
*elem = 1;
96
}
97
98
ChunkedArray::from_vec(name, av)
99
}
100
101
fn sort_columns(mut columns: Vec<Column>) -> Vec<Column> {
102
columns.sort_by(|a, b| a.name().partial_cmp(b.name()).unwrap());
103
columns
104
}
105
106