Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/frame/validation.rs
8424 views
1
use polars_error::{PolarsResult, polars_bail};
2
use polars_utils::aliases::{InitHashMaps, PlHashSet};
3
4
use crate::frame::column::Column;
5
6
/// Checks for duplicates and mismatching heights.
7
pub(super) fn validate_columns_slice(
8
expected_height: usize,
9
columns: &[Column],
10
) -> PolarsResult<()> {
11
if columns.is_empty() {
12
return Ok(());
13
}
14
15
let expected_height_msg = || {
16
if let Some(c) = columns.iter().find(|c| c.len() == expected_height) {
17
format!("height of column '{}' ({})", c.name(), c.len())
18
} else {
19
format!("DataFrame height ({expected_height})")
20
}
21
};
22
23
if columns.len() <= 4 {
24
// Too small to be worth spawning a hashmap for, this is at most 6 comparisons.
25
for (i, col) in columns.iter().enumerate() {
26
if col.len() != expected_height {
27
polars_bail!(
28
ShapeMismatch:
29
"height of column '{}' ({}) does not match {}",
30
col.name(), col.len(), expected_height_msg()
31
)
32
}
33
34
let name = col.name();
35
36
for other in columns.iter().skip(i + 1) {
37
if other.name() == name {
38
polars_bail!(duplicate = name);
39
}
40
}
41
}
42
} else {
43
let mut names = PlHashSet::with_capacity(columns.len());
44
45
for col in columns {
46
let col_name = col.name();
47
let col_len = col.len();
48
49
if col_len != expected_height {
50
polars_bail!(
51
ShapeMismatch:
52
"height of column '{}' ({}) does not match {}",
53
col_name, col_len, expected_height_msg()
54
)
55
}
56
57
if names.contains(col_name) {
58
polars_bail!(duplicate = col_name)
59
}
60
61
names.insert(col_name);
62
}
63
}
64
65
Ok(())
66
}
67
68
pub(super) fn ensure_names_unique<T>(names: &[T]) -> PolarsResult<()>
69
where
70
T: AsRef<str>,
71
{
72
// Always unique.
73
if names.len() <= 1 {
74
return Ok(());
75
}
76
77
if names.len() <= 4 {
78
// Too small to be worth spawning a hashmap for, this is at most 6 comparisons.
79
for i in 0..names.len() - 1 {
80
let name = names[i].as_ref();
81
82
for other in names.iter().skip(i + 1) {
83
if name == other.as_ref() {
84
polars_bail!(duplicate = name);
85
}
86
}
87
}
88
} else {
89
let mut names_set: PlHashSet<&str> = PlHashSet::with_capacity(names.len());
90
91
for name in names {
92
let name = name.as_ref();
93
94
if !names_set.insert(name) {
95
polars_bail!(duplicate = name);
96
}
97
}
98
}
99
Ok(())
100
}
101
102