Path: blob/main/crates/polars-ops/src/series/ops/to_dummies.rs
8480 views
use polars_utils::format_pl_smallstr;12use super::*;34#[cfg(feature = "dtype-u8")]5type DummyType = u8;6#[cfg(feature = "dtype-u8")]7type DummyCa = UInt8Chunked;89#[cfg(not(feature = "dtype-u8"))]10type DummyType = i32;11#[cfg(not(feature = "dtype-u8"))]12type DummyCa = Int32Chunked;1314pub trait ToDummies {15fn to_dummies(16&self,17separator: Option<&str>,18drop_first: bool,19drop_nulls: bool,20) -> PolarsResult<DataFrame>;21}2223impl ToDummies for Series {24fn to_dummies(25&self,26separator: Option<&str>,27drop_first: bool,28drop_nulls: bool,29) -> PolarsResult<DataFrame> {30let sep = separator.unwrap_or("_");31let col_name = self.name();3233// We only need to maintain order if we need to drop the first non-null item.34let maintain_order = drop_first;35let groups = self.group_tuples(true, maintain_order)?;3637// SAFETY: groups are in bounds.38let columns = unsafe { self.agg_first(&groups) };39let columns = columns.iter().zip(groups.iter());40let mut seen_first = false;41let columns = columns42.filter_map(|(av, group)| {43if av.is_null() && drop_nulls {44return None;45} else if !seen_first && !av.is_null() && drop_first {46// The position of the first non-null item could be either 0 or 1.47seen_first = true;48return None;49}50// strings are formatted with extra \" \" in polars, so we51// extract the string52let name = if let Some(s) = av.get_str() {53format_pl_smallstr!("{col_name}{sep}{s}")54} else {55// other types don't have this formatting issue56format_pl_smallstr!("{col_name}{sep}{av}")57};5859let ca = match group {60GroupsIndicator::Idx((_, group)) => dummies_helper_idx(group, self.len(), name),61GroupsIndicator::Slice([offset, len]) => {62dummies_helper_slice(offset, len, self.len(), name)63},64};65Some(ca.into_column())66})67.collect::<Vec<_>>();6869DataFrame::new_infer_height(sort_columns(columns))70}71}7273fn dummies_helper_idx(groups: &[IdxSize], len: usize, name: PlSmallStr) -> DummyCa {74let mut av = vec![0 as DummyType; len];7576for &idx in groups {77let elem = unsafe { av.get_unchecked_mut(idx as usize) };78*elem = 1;79}8081ChunkedArray::from_vec(name, av)82}8384fn dummies_helper_slice(85group_offset: IdxSize,86group_len: IdxSize,87len: usize,88name: PlSmallStr,89) -> DummyCa {90let mut av = vec![0 as DummyType; len];9192for idx in group_offset..(group_offset + group_len) {93let elem = unsafe { av.get_unchecked_mut(idx as usize) };94*elem = 1;95}9697ChunkedArray::from_vec(name, av)98}99100fn sort_columns(mut columns: Vec<Column>) -> Vec<Column> {101columns.sort_by(|a, b| a.name().partial_cmp(b.name()).unwrap());102columns103}104105106