use core::fmt::Debug;
use core::hash::{Hash, Hasher};
use indexmap::map::MutableKeys;
use polars_error::{PolarsError, PolarsResult, polars_bail, polars_ensure, polars_err};
use polars_utils::aliases::{InitHashMaps, PlIndexMap};
use polars_utils::pl_str::PlSmallStr;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
pub struct Schema<Field, Metadata> {
fields: PlIndexMap<PlSmallStr, Field>,
metadata: Metadata,
}
impl<Field, Metadata: Default> Default for Schema<Field, Metadata> {
fn default() -> Self {
Self {
fields: PlIndexMap::default(),
metadata: Metadata::default(),
}
}
}
impl<Field: Eq, Metadata: Eq> Eq for Schema<Field, Metadata> {}
impl<Field, Metadata: Default> Schema<Field, Metadata> {
pub fn with_capacity(capacity: usize) -> Self {
let fields = PlIndexMap::with_capacity(capacity);
Self {
fields,
metadata: Metadata::default(),
}
}
pub fn from_iter_check_duplicates<I, F>(iter: I) -> PolarsResult<Self>
where
I: IntoIterator<Item = F>,
F: Into<(PlSmallStr, Field)>,
{
Self::try_from_iter_check_duplicates(
iter.into_iter().map(PolarsResult::Ok),
|name: &str| polars_err!(Duplicate: "duplicate name when building schema '{}'", &name),
)
}
pub fn try_from_iter_check_duplicates<I, F, E>(iter: I, err_func: E) -> PolarsResult<Self>
where
I: IntoIterator<Item = PolarsResult<F>>,
F: Into<(PlSmallStr, Field)>,
E: Fn(&str) -> PolarsError,
{
let iter = iter.into_iter();
let mut slf = Self::with_capacity(iter.size_hint().1.unwrap_or(0));
for v in iter {
let (name, d) = v?.into();
if slf.contains(&name) {
return Err(err_func(&name));
}
slf.fields.insert(name, d);
}
Ok(slf)
}
}
impl<Field, Metadata> Schema<Field, Metadata> {
pub fn reserve(&mut self, additional: usize) {
self.fields.reserve(additional);
}
#[inline]
pub fn len(&self) -> usize {
self.fields.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.fields.is_empty()
}
pub fn metadata(&self) -> &Metadata {
&self.metadata
}
pub fn metadata_mut(&mut self) -> &mut Metadata {
&mut self.metadata
}
pub fn rename(&mut self, old: &str, new: PlSmallStr) -> Option<PlSmallStr> {
let (old_index, old_name, dtype) = self.fields.swap_remove_full(old)?;
let (new_index, _) = self.fields.insert_full(new, dtype);
self.fields.swap_indices(old_index, new_index);
Some(old_name)
}
pub fn insert(&mut self, key: PlSmallStr, value: Field) -> Option<Field> {
self.fields.insert(key, value)
}
pub fn insert_at_index(
&mut self,
mut index: usize,
name: PlSmallStr,
dtype: Field,
) -> PolarsResult<Option<Field>> {
polars_ensure!(
index <= self.len(),
OutOfBounds:
"index {} is out of bounds for schema with length {} (the max index allowed is self.len())",
index,
self.len()
);
let (old_index, old_dtype) = self.fields.insert_full(name, dtype);
if old_dtype.is_some() && index == self.len() {
index -= 1;
}
self.fields.move_index(old_index, index);
Ok(old_dtype)
}
pub fn get(&self, name: &str) -> Option<&Field> {
self.fields.get(name)
}
pub fn get_mut(&mut self, name: &str) -> Option<&mut Field> {
self.fields.get_mut(name)
}
pub fn try_get(&self, name: &str) -> PolarsResult<&Field> {
self.get(name)
.ok_or_else(|| polars_err!(SchemaFieldNotFound: "{name}"))
}
pub fn try_get_mut(&mut self, name: &str) -> PolarsResult<&mut Field> {
self.fields
.get_mut(name)
.ok_or_else(|| polars_err!(SchemaFieldNotFound: "{name}"))
}
pub fn get_full(&self, name: &str) -> Option<(usize, &PlSmallStr, &Field)> {
self.fields.get_full(name)
}
pub fn try_get_full(&self, name: &str) -> PolarsResult<(usize, &PlSmallStr, &Field)> {
self.fields
.get_full(name)
.ok_or_else(|| polars_err!(SchemaFieldNotFound: "{name}"))
}
pub fn get_at_index(&self, index: usize) -> Option<(&PlSmallStr, &Field)> {
self.fields.get_index(index)
}
pub fn try_get_at_index(&self, index: usize) -> PolarsResult<(&PlSmallStr, &Field)> {
self.fields.get_index(index).ok_or_else(|| polars_err!(ComputeError: "index {index} out of bounds with 'schema' of len: {}", self.len()))
}
pub fn get_at_index_mut(&mut self, index: usize) -> Option<(&mut PlSmallStr, &mut Field)> {
self.fields.get_index_mut2(index)
}
pub fn remove(&mut self, name: &str) -> Option<Field> {
self.fields.swap_remove(name)
}
pub fn shift_remove(&mut self, name: &str) -> Option<Field> {
self.fields.shift_remove(name)
}
pub fn shift_remove_index(&mut self, index: usize) -> Option<(PlSmallStr, Field)> {
self.fields.shift_remove_index(index)
}
pub fn contains(&self, name: &str) -> bool {
self.get(name).is_some()
}
pub fn set_dtype(&mut self, name: &str, dtype: Field) -> Option<Field> {
let old_dtype = self.fields.get_mut(name)?;
Some(std::mem::replace(old_dtype, dtype))
}
pub fn set_dtype_at_index(&mut self, index: usize, dtype: Field) -> Option<Field> {
let (_, old_dtype) = self.fields.get_index_mut(index)?;
Some(std::mem::replace(old_dtype, dtype))
}
pub fn with_column(&mut self, name: PlSmallStr, dtype: Field) -> Option<Field> {
self.fields.insert(name, dtype)
}
pub fn try_insert(&mut self, name: PlSmallStr, value: Field) -> PolarsResult<()> {
if self.fields.contains_key(&name) {
polars_bail!(Duplicate: "column '{}' is duplicate", name)
}
self.fields.insert(name, value);
Ok(())
}
pub fn hstack_mut(
&mut self,
columns: impl IntoIterator<Item = impl Into<(PlSmallStr, Field)>>,
) -> PolarsResult<()> {
for v in columns {
let (k, v) = v.into();
self.try_insert(k, v)?;
}
Ok(())
}
pub fn hstack(
mut self,
columns: impl IntoIterator<Item = impl Into<(PlSmallStr, Field)>>,
) -> PolarsResult<Self> {
self.hstack_mut(columns)?;
Ok(self)
}
pub fn sort_by_key<T, F>(&mut self, sort_key: F)
where
T: Ord,
F: FnMut(&PlSmallStr, &Field) -> T,
{
self.fields.sort_by_key(sort_key);
}
pub fn merge(&mut self, other: Self) {
self.fields.extend(other.fields)
}
pub fn iter(&self) -> impl ExactSizeIterator<Item = (&PlSmallStr, &Field)> + '_ {
self.fields.iter()
}
pub fn iter_mut(&mut self) -> impl ExactSizeIterator<Item = (&PlSmallStr, &mut Field)> + '_ {
self.fields.iter_mut()
}
pub fn iter_names(&self) -> impl '_ + ExactSizeIterator<Item = &PlSmallStr> {
self.fields.iter().map(|(name, _dtype)| name)
}
pub fn iter_names_cloned(&self) -> impl '_ + ExactSizeIterator<Item = PlSmallStr> {
self.iter_names().cloned()
}
pub fn iter_values(&self) -> impl '_ + ExactSizeIterator<Item = &Field> {
self.fields.iter().map(|(_name, dtype)| dtype)
}
pub fn into_iter_values(self) -> impl ExactSizeIterator<Item = Field> {
self.fields.into_values()
}
pub fn iter_values_mut(&mut self) -> impl '_ + ExactSizeIterator<Item = &mut Field> {
self.fields.iter_mut().map(|(_name, dtype)| dtype)
}
pub fn index_of(&self, name: &str) -> Option<usize> {
self.fields.get_index_of(name)
}
pub fn try_index_of(&self, name: &str) -> PolarsResult<usize> {
let Some(i) = self.fields.get_index_of(name) else {
polars_bail!(
ColumnNotFound:
"unable to find column {:?}; valid columns: {:?}",
name, self.iter_names().collect::<Vec<_>>(),
)
};
Ok(i)
}
pub fn field_compare<'a, 'b>(
&'a self,
other: &'b Self,
self_extra: &mut Vec<(usize, (&'a PlSmallStr, &'a Field))>,
other_extra: &mut Vec<(usize, (&'b PlSmallStr, &'b Field))>,
) {
self_extra.extend(
self.iter()
.enumerate()
.filter(|(_, (n, _))| !other.contains(n)),
);
other_extra.extend(
other
.iter()
.enumerate()
.filter(|(_, (n, _))| !self.contains(n)),
);
}
}
impl<Field, Metadata> Schema<Field, Metadata>
where
Field: Clone,
Metadata: Clone,
{
pub fn new_inserting_at_index(
&self,
index: usize,
name: PlSmallStr,
field: Field,
) -> PolarsResult<Self> {
polars_ensure!(
index <= self.len(),
OutOfBounds:
"index {} is out of bounds for schema with length {} (the max index allowed is self.len())",
index,
self.len()
);
let mut new = Self {
fields: Default::default(),
metadata: self.metadata().clone(),
};
let mut iter = self.fields.iter().filter_map(|(fld_name, dtype)| {
(fld_name != &name).then_some((fld_name.clone(), dtype.clone()))
});
new.fields.extend(iter.by_ref().take(index));
new.fields.insert(name.clone(), field);
new.fields.extend(iter);
Ok(new)
}
pub fn merge_from_ref(&mut self, other: &Self) {
self.fields.extend(
other
.iter()
.map(|(column, field)| (column.clone(), field.clone())),
)
}
pub fn try_project<I>(&self, columns: I) -> PolarsResult<Self>
where
I: IntoIterator,
I::Item: AsRef<str>,
{
let fields = columns
.into_iter()
.map(|c| {
let name = c.as_ref();
let (_, name, dtype) = self
.fields
.get_full(name)
.ok_or_else(|| polars_err!(col_not_found = name))?;
PolarsResult::Ok((name.clone(), dtype.clone()))
})
.collect::<PolarsResult<PlIndexMap<PlSmallStr, _>>>()?;
Ok(Self {
fields,
metadata: self.metadata().clone(),
})
}
pub fn try_project_indices(&self, indices: &[usize]) -> PolarsResult<Self> {
let fields = indices
.iter()
.map(|&i| {
let Some((k, v)) = self.fields.get_index(i) else {
polars_bail!(
SchemaFieldNotFound:
"projection index {} is out of bounds for schema of length {}",
i, self.fields.len()
);
};
Ok((k.clone(), v.clone()))
})
.collect::<PolarsResult<PlIndexMap<_, _>>>()?;
Ok(Self {
fields,
metadata: self.metadata().clone(),
})
}
pub fn filter<F: Fn(usize, &Field) -> bool>(self, predicate: F) -> Self {
let metadata = self.metadata().clone();
let fields = self
.fields
.into_iter()
.enumerate()
.filter_map(|(index, (name, d))| {
if (predicate)(index, &d) {
Some((name, d))
} else {
None
}
})
.collect();
Self { fields, metadata }
}
}
impl<Field: Hash, Metadata: Hash> Hash for Schema<Field, Metadata> {
fn hash<H: Hasher>(&self, state: &mut H) {
Hash::hash(&SchemaHashEqWrap::from(self), state)
}
}
impl<Field: PartialEq, Metadata: PartialEq> PartialEq for Schema<Field, Metadata> {
fn eq(&self, other: &Self) -> bool {
PartialEq::eq(
&SchemaHashEqWrap::from(self),
&SchemaHashEqWrap::from(other),
)
}
}
#[derive(Hash, PartialEq)]
struct SchemaHashEqWrap<'a, Field, Metadata> {
fields: &'a indexmap::map::Slice<PlSmallStr, Field>,
metadata: &'a Metadata,
}
impl<'a, Field, Metadata> From<&'a Schema<Field, Metadata>>
for SchemaHashEqWrap<'a, Field, Metadata>
{
fn from(value: &'a Schema<Field, Metadata>) -> Self {
let Schema { fields, metadata } = value;
Self {
fields: fields.as_slice(),
metadata,
}
}
}
impl<Field, Metadata: Default> From<PlIndexMap<PlSmallStr, Field>> for Schema<Field, Metadata> {
fn from(fields: PlIndexMap<PlSmallStr, Field>) -> Self {
Self {
fields,
metadata: Metadata::default(),
}
}
}
impl<F, Field, Metadata: Default> FromIterator<F> for Schema<Field, Metadata>
where
F: Into<(PlSmallStr, Field)>,
{
fn from_iter<I: IntoIterator<Item = F>>(iter: I) -> Self {
let fields = PlIndexMap::from_iter(iter.into_iter().map(|x| x.into()));
Self {
fields,
metadata: Metadata::default(),
}
}
}
impl<F, Field, Metadata> Extend<F> for Schema<Field, Metadata>
where
F: Into<(PlSmallStr, Field)>,
{
fn extend<T: IntoIterator<Item = F>>(&mut self, iter: T) {
self.fields.extend(iter.into_iter().map(|x| x.into()))
}
}
impl<Field, Metadata> IntoIterator for Schema<Field, Metadata> {
type IntoIter = <PlIndexMap<PlSmallStr, Field> as IntoIterator>::IntoIter;
type Item = (PlSmallStr, Field);
fn into_iter(self) -> Self::IntoIter {
self.fields.into_iter()
}
}
#[cfg(test)]
mod tests {
use super::Schema;
#[test]
fn test_schema_eq_checks_key_order() {
let lhs: Schema<(), ()> = Schema::from_iter([("a".into(), ()), ("b".into(), ())]);
let rhs: Schema<(), ()> = Schema::from_iter([("b".into(), ()), ("a".into(), ())]);
assert_ne!(lhs, rhs);
}
}