Path: blob/main/crates/polars-ops/src/series/ops/to_dummies.rs
6939 views
use polars_utils::format_pl_smallstr;12use super::*;34#[cfg(feature = "dtype-u8")]5type DummyType = u8;6#[cfg(feature = "dtype-u8")]7type DummyCa = UInt8Chunked;89#[cfg(not(feature = "dtype-u8"))]10type DummyType = i32;11#[cfg(not(feature = "dtype-u8"))]12type DummyCa = Int32Chunked;1314pub trait ToDummies {15fn to_dummies(16&self,17separator: Option<&str>,18drop_first: bool,19drop_nulls: bool,20) -> PolarsResult<DataFrame>;21}2223impl ToDummies for Series {24fn to_dummies(25&self,26separator: Option<&str>,27drop_first: bool,28drop_nulls: bool,29) -> PolarsResult<DataFrame> {30let sep = separator.unwrap_or("_");31let col_name = self.name();32let groups = self.group_tuples(true, drop_first)?;3334// SAFETY: groups are in bounds35let columns = unsafe { self.agg_first(&groups) };36let columns = columns.iter().zip(groups.iter()).skip(drop_first as usize);37let columns = columns38.filter_map(|(av, group)| {39// strings are formatted with extra \" \" in polars, so we40// extract the string41let name = if let Some(s) = av.get_str() {42format_pl_smallstr!("{col_name}{sep}{s}")43} else {44// other types don't have this formatting issue45format_pl_smallstr!("{col_name}{sep}{av}")46};4748if av.is_null() && drop_nulls {49return None;50}5152let ca = match group {53GroupsIndicator::Idx((_, group)) => dummies_helper_idx(group, self.len(), name),54GroupsIndicator::Slice([offset, len]) => {55dummies_helper_slice(offset, len, self.len(), name)56},57};58Some(ca.into_column())59})60.collect::<Vec<_>>();6162DataFrame::new(sort_columns(columns))63}64}6566fn dummies_helper_idx(groups: &[IdxSize], len: usize, name: PlSmallStr) -> DummyCa {67let mut av = vec![0 as DummyType; len];6869for &idx in groups {70let elem = unsafe { av.get_unchecked_mut(idx as usize) };71*elem = 1;72}7374ChunkedArray::from_vec(name, av)75}7677fn dummies_helper_slice(78group_offset: IdxSize,79group_len: IdxSize,80len: usize,81name: PlSmallStr,82) -> DummyCa {83let mut av = vec![0 as DummyType; len];8485for idx in group_offset..(group_offset + group_len) {86let elem = unsafe { av.get_unchecked_mut(idx as usize) };87*elem = 1;88}8990ChunkedArray::from_vec(name, av)91}9293fn sort_columns(mut columns: Vec<Column>) -> Vec<Column> {94columns.sort_by(|a, b| a.name().partial_cmp(b.name()).unwrap());95columns96}979899