Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/dsl/selector.rs
6939 views
1
use std::fmt;
2
use std::ops::{
3
BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not, Sub, SubAssign,
4
};
5
6
#[cfg(feature = "serde")]
7
use serde::{Deserialize, Serialize};
8
9
use super::*;
10
11
#[cfg(feature = "dsl-schema")]
12
impl schemars::JsonSchema for TimeUnitSet {
13
fn schema_name() -> String {
14
"TimeUnitSet".to_owned()
15
}
16
17
fn schema_id() -> std::borrow::Cow<'static, str> {
18
std::borrow::Cow::Borrowed(concat!(module_path!(), "::", "TimeUnitSet"))
19
}
20
21
fn json_schema(_generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
22
use serde_json::{Map, Value};
23
24
let name_to_bits: Map<String, Value> = Self::all()
25
.iter_names()
26
.map(|(name, flag)| (name.to_owned(), flag.bits().into()))
27
.collect();
28
29
schemars::schema::Schema::Object(schemars::schema::SchemaObject {
30
instance_type: Some(schemars::schema::InstanceType::String.into()),
31
format: Some("bitflags".to_owned()),
32
extensions: schemars::Map::from_iter([
33
// Add a map of flag names and bit patterns to detect schema changes
34
("bitflags".to_owned(), Value::Object(name_to_bits)),
35
]),
36
..Default::default()
37
})
38
}
39
}
40
41
#[derive(Clone, Hash, PartialEq, Eq, Debug)]
42
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
43
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
44
pub enum TimeZoneSet {
45
Any,
46
AnySet,
47
Unset,
48
UnsetOrAnyOf(Arc<[TimeZone]>),
49
AnyOf(Arc<[TimeZone]>),
50
}
51
52
bitflags::bitflags! {
53
#[repr(transparent)]
54
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
55
#[cfg_attr(
56
feature = "serde",
57
derive(serde::Serialize, serde::Deserialize)
58
)]
59
pub struct TimeUnitSet: u8 {
60
const NANO_SECONDS = 0x01;
61
const MICRO_SECONDS = 0x02;
62
const MILLI_SECONDS = 0x04;
63
}
64
}
65
66
impl From<TimeUnit> for TimeUnitSet {
67
fn from(value: TimeUnit) -> Self {
68
match value {
69
TimeUnit::Nanoseconds => TimeUnitSet::NANO_SECONDS,
70
TimeUnit::Microseconds => TimeUnitSet::MICRO_SECONDS,
71
TimeUnit::Milliseconds => TimeUnitSet::MILLI_SECONDS,
72
}
73
}
74
}
75
76
impl fmt::Display for TimeUnitSet {
77
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78
if self.is_all() {
79
f.write_str("*")?;
80
} else {
81
if self.bits().count_ones() != 1 {
82
f.write_str("[")?;
83
}
84
85
if self.contains(TimeUnitSet::NANO_SECONDS) {
86
f.write_str("'ns'")?;
87
if self.intersects(TimeUnitSet::MICRO_SECONDS | TimeUnitSet::MILLI_SECONDS) {
88
f.write_str(", ")?;
89
}
90
}
91
if self.contains(TimeUnitSet::MICRO_SECONDS) {
92
f.write_str("'us'")?;
93
if self.contains(TimeUnitSet::MILLI_SECONDS) {
94
f.write_str(", ")?;
95
}
96
}
97
if self.contains(TimeUnitSet::MILLI_SECONDS) {
98
f.write_str("'ms'")?;
99
}
100
101
if self.bits().count_ones() != 1 {
102
f.write_str("]")?;
103
}
104
}
105
106
Ok(())
107
}
108
}
109
110
#[derive(Clone, PartialEq, Hash, Debug, Eq)]
111
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
112
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
113
pub enum DataTypeSelector {
114
Union(Arc<DataTypeSelector>, Arc<DataTypeSelector>),
115
Difference(Arc<DataTypeSelector>, Arc<DataTypeSelector>),
116
ExclusiveOr(Arc<DataTypeSelector>, Arc<DataTypeSelector>),
117
Intersect(Arc<DataTypeSelector>, Arc<DataTypeSelector>),
118
119
Wildcard,
120
Empty,
121
122
AnyOf(Arc<[DataType]>),
123
124
Integer,
125
UnsignedInteger,
126
SignedInteger,
127
Float,
128
129
Enum,
130
Categorical,
131
132
Nested,
133
List(Option<Arc<DataTypeSelector>>),
134
Array(Option<Arc<DataTypeSelector>>, Option<usize>),
135
Struct,
136
137
Decimal,
138
Numeric,
139
Temporal,
140
/// Selector for `DataType::Datetime` with optional matching on TimeUnit and TimeZone.
141
Datetime(TimeUnitSet, TimeZoneSet),
142
/// Selector for `DataType::Duration` with optional matching on TimeUnit.
143
Duration(TimeUnitSet),
144
Object,
145
}
146
147
#[derive(Clone, PartialEq, Hash, Debug, Eq)]
148
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
149
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
150
pub enum Selector {
151
Union(Arc<Selector>, Arc<Selector>),
152
Difference(Arc<Selector>, Arc<Selector>),
153
ExclusiveOr(Arc<Selector>, Arc<Selector>),
154
Intersect(Arc<Selector>, Arc<Selector>),
155
156
// Leaf nodes
157
158
// These 2 return their inputs in given order not in schema order.
159
ByName {
160
names: Arc<[PlSmallStr]>,
161
strict: bool,
162
},
163
ByIndex {
164
indices: Arc<[i64]>,
165
strict: bool,
166
},
167
168
Matches(PlSmallStr),
169
ByDType(DataTypeSelector),
170
171
Wildcard,
172
Empty,
173
}
174
175
fn dtype_selector(
176
schema: &Schema,
177
ignored_columns: &PlHashSet<PlSmallStr>,
178
f: impl Fn(&DataType) -> bool,
179
) -> PlIndexSet<PlSmallStr> {
180
PlIndexSet::from_iter(
181
schema
182
.iter()
183
.filter(|(name, dtype)| !ignored_columns.contains(*name) && f(dtype))
184
.map(|(name, _)| name.clone()),
185
)
186
}
187
188
impl Selector {
189
/// Turns the selector into an ordered set of selected columns from the schema.
190
///
191
/// - The order of the columns corresponds to the order in the schema.
192
/// - Column names in `ignored_columns` are only used if they are explicitly mentioned by a
193
/// `ByName` or `Nth`.
194
pub fn into_columns(
195
&self,
196
schema: &Schema,
197
ignored_columns: &PlHashSet<PlSmallStr>,
198
) -> PolarsResult<PlIndexSet<PlSmallStr>> {
199
let out = match self {
200
Self::Union(lhs, rhs) => {
201
let mut lhs = lhs.into_columns(schema, ignored_columns)?;
202
let rhs = rhs.into_columns(schema, ignored_columns)?;
203
lhs.extend(rhs);
204
sort_schema_order(&mut lhs, schema);
205
lhs
206
},
207
Self::Difference(lhs, rhs) => {
208
let mut lhs = lhs.into_columns(schema, ignored_columns)?;
209
let rhs = rhs.into_columns(schema, ignored_columns)?;
210
lhs.retain(|n| !rhs.contains(n));
211
sort_schema_order(&mut lhs, schema);
212
lhs
213
},
214
Self::ExclusiveOr(lhs, rhs) => {
215
let lhs = lhs.into_columns(schema, ignored_columns)?;
216
let rhs = rhs.into_columns(schema, ignored_columns)?;
217
let mut out = PlIndexSet::with_capacity(lhs.len() + rhs.len());
218
out.extend(lhs.iter().filter(|n| !rhs.contains(*n)).cloned());
219
out.extend(rhs.into_iter().filter(|n| !lhs.contains(n)));
220
sort_schema_order(&mut out, schema);
221
out
222
},
223
Self::Intersect(lhs, rhs) => {
224
let mut lhs = lhs.into_columns(schema, ignored_columns)?;
225
let rhs = rhs.into_columns(schema, ignored_columns)?;
226
lhs.retain(|n| rhs.contains(n));
227
sort_schema_order(&mut lhs, schema);
228
lhs
229
},
230
231
Self::ByDType(dts) => dts.into_columns(schema, ignored_columns)?,
232
Self::ByName { names, strict } => {
233
let mut out = PlIndexSet::with_capacity(names.len());
234
for name in names.iter() {
235
if schema.contains(name) {
236
out.insert(name.clone());
237
} else if *strict {
238
polars_bail!(col_not_found = name);
239
}
240
}
241
out
242
},
243
Self::ByIndex { indices, strict } => {
244
let mut out = PlIndexSet::with_capacity(indices.len());
245
let mut set = PlIndexSet::with_capacity(indices.len());
246
for &idx in indices.iter() {
247
let Some(idx) = idx.negative_to_usize(schema.len()) else {
248
polars_ensure!(!strict, ColumnNotFound: "cannot get the {idx}-th column when schema has {} columns", schema.len());
249
continue;
250
};
251
let (name, _) = schema.get_at_index(idx).unwrap();
252
if !set.insert(idx) {
253
polars_bail!(Duplicate: "duplicate column name {name}");
254
}
255
out.insert(name.clone());
256
}
257
out
258
},
259
Self::Matches(regex_str) => {
260
let re = polars_utils::regex_cache::compile_regex(regex_str).map_err(
261
|_| polars_err!(InvalidOperation: "invalid regex in selector '{regex_str}'"),
262
)?;
263
PlIndexSet::from_iter(
264
schema
265
.iter_names()
266
.filter(|name| !ignored_columns.contains(*name) && re.is_match(name))
267
.cloned(),
268
)
269
},
270
Self::Wildcard => PlIndexSet::from_iter(
271
schema
272
.iter_names()
273
.filter(|name| !ignored_columns.contains(*name))
274
.cloned(),
275
),
276
Self::Empty => Default::default(),
277
};
278
Ok(out)
279
}
280
281
pub fn as_expr(self) -> Expr {
282
self.into()
283
}
284
285
pub fn to_dtype_selector(&self) -> Option<DataTypeSelector> {
286
use DataTypeSelector as DS;
287
match self {
288
Self::Union(l, r) => Some(DS::Union(
289
Arc::new(l.to_dtype_selector()?),
290
Arc::new(r.to_dtype_selector()?),
291
)),
292
Self::Difference(l, r) => Some(DS::Difference(
293
Arc::new(l.to_dtype_selector()?),
294
Arc::new(r.to_dtype_selector()?),
295
)),
296
Self::ExclusiveOr(l, r) => Some(DS::ExclusiveOr(
297
Arc::new(l.to_dtype_selector()?),
298
Arc::new(r.to_dtype_selector()?),
299
)),
300
Self::Intersect(l, r) => Some(DS::ExclusiveOr(
301
Arc::new(l.to_dtype_selector()?),
302
Arc::new(r.to_dtype_selector()?),
303
)),
304
Self::Wildcard => Some(DS::Wildcard),
305
Self::Empty => Some(DS::Empty),
306
307
Self::ByDType(dts) => Some(dts.clone()),
308
309
Self::ByName { .. } | Self::ByIndex { .. } | Self::Matches(_) => None,
310
}
311
}
312
313
/// Exclude a column from a wildcard/regex selection.
314
///
315
/// You may also use regexes in the exclude as long as they start with `^` and end with `$`.
316
pub fn exclude_cols(self, columns: impl IntoVec<PlSmallStr>) -> Self {
317
self - cols(columns.into_vec())
318
}
319
320
pub fn exclude_dtype<D: AsRef<[DataType]>>(self, dtypes: D) -> Self {
321
self - DataTypeSelector::AnyOf(dtypes.as_ref().into()).as_selector()
322
}
323
}
324
325
fn list_matches(inner_dts: Option<&DataTypeSelector>, dtype: &DataType) -> bool {
326
matches!(dtype, DataType::List(inner) if inner_dts.is_none_or(|dts| dts.matches(inner.as_ref())))
327
}
328
329
fn array_matches(
330
inner_dts: Option<&DataTypeSelector>,
331
swidth: Option<usize>,
332
dtype: &DataType,
333
) -> bool {
334
#[cfg(feature = "dtype-array")]
335
{
336
matches!(dtype, DataType::Array(inner, width) if inner_dts.is_none_or(|dts| dts.matches(inner.as_ref())) && swidth.is_none_or(|w| w == *width))
337
}
338
339
#[cfg(not(feature = "dtype-array"))]
340
{
341
false
342
}
343
}
344
345
fn datetime_matches(stu: TimeUnitSet, stz: &TimeZoneSet, dtype: &DataType) -> bool {
346
let DataType::Datetime(tu, tz) = dtype else {
347
return false;
348
};
349
350
if !stu.contains(TimeUnitSet::from(*tu)) {
351
return false;
352
}
353
354
use TimeZoneSet as TZS;
355
match (stz, tz) {
356
(TZS::Any, _)
357
| (TZS::Unset, None)
358
| (TZS::UnsetOrAnyOf(_), None)
359
| (TZS::AnySet, Some(_)) => true,
360
(TZS::AnyOf(stz) | TZS::UnsetOrAnyOf(stz), Some(tz)) => stz.contains(tz),
361
_ => false,
362
}
363
}
364
365
fn sort_schema_order(set: &mut PlIndexSet<PlSmallStr>, schema: &Schema) {
366
set.sort_unstable_by(|l, r| {
367
schema
368
.index_of(l)
369
.unwrap()
370
.cmp(&schema.index_of(r).unwrap())
371
})
372
}
373
374
fn duration_matches(stu: TimeUnitSet, dtype: &DataType) -> bool {
375
matches!(dtype, DataType::Duration(tu) if stu.contains(TimeUnitSet::from(*tu)))
376
}
377
378
impl DataTypeSelector {
379
pub fn matches(&self, dtype: &DataType) -> bool {
380
match self {
381
Self::Union(lhs, rhs) => lhs.matches(dtype) || rhs.matches(dtype),
382
Self::Difference(lhs, rhs) => lhs.matches(dtype) && !rhs.matches(dtype),
383
Self::ExclusiveOr(lhs, rhs) => lhs.matches(dtype) ^ rhs.matches(dtype),
384
Self::Intersect(lhs, rhs) => lhs.matches(dtype) && rhs.matches(dtype),
385
Self::Wildcard => true,
386
Self::Empty => false,
387
Self::AnyOf(dtypes) => dtypes.iter().any(|dt| dt == dtype),
388
Self::Integer => dtype.is_integer(),
389
Self::UnsignedInteger => dtype.is_unsigned_integer(),
390
Self::SignedInteger => dtype.is_signed_integer(),
391
Self::Float => dtype.is_float(),
392
Self::Enum => dtype.is_enum(),
393
Self::Categorical => dtype.is_categorical(),
394
Self::Nested => dtype.is_nested(),
395
Self::List(inner_dts) => list_matches(inner_dts.as_deref(), dtype),
396
Self::Array(inner_dts, swidth) => array_matches(inner_dts.as_deref(), *swidth, dtype),
397
Self::Struct => dtype.is_struct(),
398
Self::Decimal => dtype.is_decimal(),
399
Self::Numeric => dtype.is_numeric(),
400
Self::Temporal => dtype.is_temporal(),
401
Self::Datetime(stu, stz) => datetime_matches(*stu, stz, dtype),
402
Self::Duration(stu) => duration_matches(*stu, dtype),
403
Self::Object => dtype.is_object(),
404
}
405
}
406
407
#[allow(clippy::wrong_self_convention)]
408
fn into_columns(
409
&self,
410
schema: &Schema,
411
ignored_columns: &PlHashSet<PlSmallStr>,
412
) -> PolarsResult<PlIndexSet<PlSmallStr>> {
413
Ok(match self {
414
Self::Union(lhs, rhs) => {
415
let mut lhs = lhs.into_columns(schema, ignored_columns)?;
416
let rhs = rhs.into_columns(schema, ignored_columns)?;
417
lhs.extend(rhs);
418
sort_schema_order(&mut lhs, schema);
419
lhs
420
},
421
Self::Difference(lhs, rhs) => {
422
let mut lhs = lhs.into_columns(schema, ignored_columns)?;
423
let rhs = rhs.into_columns(schema, ignored_columns)?;
424
lhs.retain(|n| !rhs.contains(n));
425
sort_schema_order(&mut lhs, schema);
426
lhs
427
},
428
Self::ExclusiveOr(lhs, rhs) => {
429
let lhs = lhs.into_columns(schema, ignored_columns)?;
430
let rhs = rhs.into_columns(schema, ignored_columns)?;
431
let mut out = PlIndexSet::with_capacity(lhs.len() + rhs.len());
432
out.extend(lhs.iter().filter(|n| !rhs.contains(*n)).cloned());
433
out.extend(rhs.into_iter().filter(|n| !lhs.contains(n)));
434
sort_schema_order(&mut out, schema);
435
out
436
},
437
Self::Intersect(lhs, rhs) => {
438
let mut lhs = lhs.into_columns(schema, ignored_columns)?;
439
let rhs = rhs.into_columns(schema, ignored_columns)?;
440
lhs.retain(|n| rhs.contains(n));
441
sort_schema_order(&mut lhs, schema);
442
lhs
443
},
444
Self::Wildcard => schema
445
.iter_names()
446
.filter(|n| ignored_columns.contains(*n))
447
.cloned()
448
.collect(),
449
Self::Empty => Default::default(),
450
Self::AnyOf(dtypes) => {
451
let dtypes = PlHashSet::from_iter(dtypes.iter().cloned());
452
dtype_selector(schema, ignored_columns, |dtype| dtypes.contains(dtype))
453
},
454
Self::Integer => dtype_selector(schema, ignored_columns, |dtype| dtype.is_integer()),
455
Self::UnsignedInteger => {
456
dtype_selector(schema, ignored_columns, |dtype| dtype.is_unsigned_integer())
457
},
458
Self::SignedInteger => {
459
dtype_selector(schema, ignored_columns, |dtype| dtype.is_signed_integer())
460
},
461
Self::Float => dtype_selector(schema, ignored_columns, |dtype| dtype.is_float()),
462
Self::Enum => dtype_selector(schema, ignored_columns, |dtype| dtype.is_enum()),
463
Self::Categorical => {
464
dtype_selector(schema, ignored_columns, |dtype| dtype.is_categorical())
465
},
466
Self::Nested => dtype_selector(schema, ignored_columns, |dtype| dtype.is_nested()),
467
Self::List(inner_dts) => dtype_selector(schema, ignored_columns, |dtype| {
468
list_matches(inner_dts.as_deref(), dtype)
469
}),
470
Self::Array(inner_dts, swidth) => dtype_selector(schema, ignored_columns, |dtype| {
471
array_matches(inner_dts.as_deref(), *swidth, dtype)
472
}),
473
Self::Struct => dtype_selector(schema, ignored_columns, |dtype| dtype.is_struct()),
474
Self::Decimal => dtype_selector(schema, ignored_columns, |dtype| dtype.is_decimal()),
475
Self::Numeric => dtype_selector(schema, ignored_columns, |dtype| dtype.is_numeric()),
476
Self::Temporal => dtype_selector(schema, ignored_columns, |dtype| dtype.is_temporal()),
477
Self::Datetime(stu, stz) => dtype_selector(schema, ignored_columns, |dtype| {
478
datetime_matches(*stu, stz, dtype)
479
}),
480
Self::Duration(stu) => dtype_selector(schema, ignored_columns, |dtype| {
481
duration_matches(*stu, dtype)
482
}),
483
Self::Object => dtype_selector(schema, ignored_columns, |dtype| dtype.is_object()),
484
})
485
}
486
487
pub fn as_selector(self) -> Selector {
488
Selector::ByDType(self)
489
}
490
}
491
492
impl BitOr for Selector {
493
type Output = Self;
494
fn bitor(self, rhs: Self) -> Self::Output {
495
Selector::Union(Arc::new(self), Arc::new(rhs))
496
}
497
}
498
499
impl BitOrAssign for Selector {
500
fn bitor_assign(&mut self, rhs: Self) {
501
*self = Selector::Union(
502
Arc::new(std::mem::replace(self, Self::Empty)),
503
Arc::new(rhs),
504
)
505
}
506
}
507
508
impl BitAnd for Selector {
509
type Output = Self;
510
fn bitand(self, rhs: Self) -> Self::Output {
511
Selector::Intersect(Arc::new(self), Arc::new(rhs))
512
}
513
}
514
515
impl BitAndAssign for Selector {
516
fn bitand_assign(&mut self, rhs: Self) {
517
*self = Selector::Intersect(
518
Arc::new(std::mem::replace(self, Self::Empty)),
519
Arc::new(rhs),
520
)
521
}
522
}
523
524
impl BitXor for Selector {
525
type Output = Self;
526
fn bitxor(self, rhs: Self) -> Self::Output {
527
Selector::ExclusiveOr(Arc::new(self), Arc::new(rhs))
528
}
529
}
530
531
impl BitXorAssign for Selector {
532
fn bitxor_assign(&mut self, rhs: Self) {
533
*self = Selector::ExclusiveOr(
534
Arc::new(std::mem::replace(self, Self::Empty)),
535
Arc::new(rhs),
536
)
537
}
538
}
539
540
impl Sub for Selector {
541
type Output = Self;
542
fn sub(self, rhs: Self) -> Self::Output {
543
Selector::Difference(Arc::new(self), Arc::new(rhs))
544
}
545
}
546
547
impl SubAssign for Selector {
548
fn sub_assign(&mut self, rhs: Self) {
549
*self = Selector::Difference(
550
Arc::new(std::mem::replace(self, Self::Empty)),
551
Arc::new(rhs),
552
)
553
}
554
}
555
556
impl Not for Selector {
557
type Output = Self;
558
fn not(self) -> Self::Output {
559
Self::Wildcard - self
560
}
561
}
562
563
impl BitOr for DataTypeSelector {
564
type Output = Self;
565
fn bitor(self, rhs: Self) -> Self::Output {
566
Self::Union(Arc::new(self), Arc::new(rhs))
567
}
568
}
569
570
impl BitOrAssign for DataTypeSelector {
571
fn bitor_assign(&mut self, rhs: Self) {
572
*self = Self::Union(
573
Arc::new(std::mem::replace(self, Self::Empty)),
574
Arc::new(rhs),
575
)
576
}
577
}
578
579
impl BitAnd for DataTypeSelector {
580
type Output = Self;
581
fn bitand(self, rhs: Self) -> Self::Output {
582
Self::Intersect(Arc::new(self), Arc::new(rhs))
583
}
584
}
585
586
impl BitAndAssign for DataTypeSelector {
587
fn bitand_assign(&mut self, rhs: Self) {
588
*self = Self::Intersect(
589
Arc::new(std::mem::replace(self, Self::Empty)),
590
Arc::new(rhs),
591
)
592
}
593
}
594
595
impl BitXor for DataTypeSelector {
596
type Output = Self;
597
fn bitxor(self, rhs: Self) -> Self::Output {
598
Self::ExclusiveOr(Arc::new(self), Arc::new(rhs))
599
}
600
}
601
602
impl BitXorAssign for DataTypeSelector {
603
fn bitxor_assign(&mut self, rhs: Self) {
604
*self = Self::ExclusiveOr(
605
Arc::new(std::mem::replace(self, Self::Empty)),
606
Arc::new(rhs),
607
)
608
}
609
}
610
611
impl Sub for DataTypeSelector {
612
type Output = Self;
613
fn sub(self, rhs: Self) -> Self::Output {
614
Self::Difference(Arc::new(self), Arc::new(rhs))
615
}
616
}
617
618
impl SubAssign for DataTypeSelector {
619
fn sub_assign(&mut self, rhs: Self) {
620
*self = Self::Difference(
621
Arc::new(std::mem::replace(self, Self::Empty)),
622
Arc::new(rhs),
623
)
624
}
625
}
626
627
impl Not for DataTypeSelector {
628
type Output = Self;
629
fn not(self) -> Self::Output {
630
Self::Wildcard - self
631
}
632
}
633
634
impl From<Selector> for Expr {
635
fn from(value: Selector) -> Self {
636
Expr::Selector(value)
637
}
638
}
639
640
impl fmt::Display for Selector {
641
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
642
match self {
643
Self::Union(left, right) => write!(f, "[{left} | {right}]"),
644
Self::Difference(left, right) => write!(f, "[{left} - {right}]"),
645
Self::ExclusiveOr(left, right) => write!(f, "[{left} ^ {right}]"),
646
Self::Intersect(left, right) => write!(f, "[{left} & {right}]"),
647
648
Self::ByDType(dst) => fmt::Display::fmt(dst, f),
649
Self::ByName { names, strict } => {
650
f.write_str("cs.by_name(")?;
651
652
for e in names.iter() {
653
write!(f, "'{e}', ")?;
654
}
655
656
write!(f, "require_all={strict})")
657
},
658
Self::ByIndex { indices, strict } if indices.as_ref() == [0] => {
659
write!(f, "cs.first(require={strict})")
660
},
661
Self::ByIndex { indices, strict } if indices.as_ref() == [-1] => {
662
write!(f, "cs.last(require={strict})")
663
},
664
Self::ByIndex { indices, strict } if indices.len() == 1 => {
665
write!(f, "cs.nth({}, require_all={strict})", indices[0])
666
},
667
Self::ByIndex { indices, strict } => {
668
write!(
669
f,
670
"cs.by_index({:?}, require_all={strict})",
671
indices.as_ref()
672
)
673
},
674
Self::Matches(s) => write!(f, "cs.matches(\"{s}\")"),
675
Self::Wildcard => f.write_str("cs.all()"),
676
Self::Empty => f.write_str("cs.empty()"),
677
}
678
}
679
}
680
681
impl fmt::Display for DataTypeSelector {
682
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
683
match self {
684
Self::Union(left, right) => write!(f, "[{left} | {right}]"),
685
Self::Difference(left, right) => write!(f, "[{left} - {right}]"),
686
Self::ExclusiveOr(left, right) => write!(f, "[{left} ^ {right}]"),
687
Self::Intersect(left, right) => write!(f, "[{left} & {right}]"),
688
689
Self::Float => f.write_str("cs.float()"),
690
Self::Integer => f.write_str("cs.integer()"),
691
Self::SignedInteger => f.write_str("cs.signed_integer()"),
692
Self::UnsignedInteger => f.write_str("cs.unsigned_integer()"),
693
694
Self::Enum => f.write_str("cs.enum()"),
695
Self::Categorical => f.write_str("cs.categorical()"),
696
697
Self::Nested => f.write_str("cs.nested()"),
698
Self::List(inner_dst) => {
699
f.write_str("cs.list(")?;
700
if let Some(inner_dst) = inner_dst {
701
fmt::Display::fmt(inner_dst.as_ref(), f)?;
702
}
703
f.write_str(")")
704
},
705
Self::Array(inner_dst, swidth) => {
706
f.write_str("cs.list(")?;
707
if let Some(inner_dst) = inner_dst {
708
fmt::Display::fmt(inner_dst.as_ref(), f)?;
709
}
710
f.write_str(", width=")?;
711
match swidth {
712
None => f.write_str("*")?,
713
Some(swidth) => write!(f, "{swidth}")?,
714
}
715
f.write_str(")")
716
},
717
Self::Struct => f.write_str("cs.struct()"),
718
719
Self::Numeric => f.write_str("cs.numeric()"),
720
Self::Decimal => f.write_str("cs.decimal()"),
721
Self::Temporal => f.write_str("cs.temporal()"),
722
Self::Datetime(tu, tz) => {
723
write!(f, "cs.datetime(time_unit={tu}, time_zone=")?;
724
use TimeZoneSet as TZS;
725
match tz {
726
TZS::Any => f.write_str("*")?,
727
TZS::AnySet => f.write_str("*set")?,
728
TZS::Unset => f.write_str("None")?,
729
TZS::UnsetOrAnyOf(tz) => {
730
f.write_str("[None")?;
731
for e in tz.iter() {
732
write!(f, ", '{e}'")?;
733
}
734
f.write_str("]")?;
735
},
736
TZS::AnyOf(tz) => {
737
f.write_str("[")?;
738
if let Some(e) = tz.first() {
739
write!(f, "'{e}'")?;
740
for e in &tz[1..] {
741
write!(f, ", '{e}'")?;
742
}
743
}
744
f.write_str("]")?;
745
},
746
}
747
f.write_str(")")
748
},
749
Self::Duration(tu) => {
750
write!(f, "cs.duration(time_unit={tu})")
751
},
752
Self::Object => f.write_str("cs.object()"),
753
754
Self::AnyOf(dtypes) => {
755
use DataType as D;
756
match dtypes.as_ref() {
757
[D::Boolean] => f.write_str("cs.boolean()"),
758
[D::Binary] => f.write_str("cs.binary()"),
759
[D::Time] => f.write_str("cs.time()"),
760
[D::Date] => f.write_str("cs.date()"),
761
[D::String] => f.write_str("cs.string()"),
762
_ => write!(f, "cs.by_dtype({dtypes:?})"),
763
}
764
},
765
766
Self::Wildcard => f.write_str("cs.all()"),
767
Self::Empty => f.write_str("cs.empty()"),
768
}
769
}
770
}
771
772
impl fmt::Display for Excluded {
773
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
774
match self {
775
Excluded::Name(name) => write!(f, "\"{name}\""),
776
Excluded::Dtype(dtype) => fmt::Display::fmt(dtype, f),
777
}
778
}
779
}
780
781