Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/scalar/serde.rs
6940 views
1
use arrow::array::IntoBoxedArray;
2
use polars_error::{PolarsError, PolarsResult, polars_bail};
3
use polars_utils::pl_str::PlSmallStr;
4
#[cfg(feature = "serde")]
5
use serde::{Deserialize, Deserializer, Serialize, Serializer};
6
7
use super::Scalar;
8
use crate::prelude::{AnyValue, DataType, Field};
9
use crate::series::Series;
10
11
#[cfg(feature = "dsl-schema")]
12
impl schemars::JsonSchema for Scalar {
13
fn is_referenceable() -> bool {
14
<SerializableScalar as schemars::JsonSchema>::is_referenceable()
15
}
16
17
fn schema_id() -> std::borrow::Cow<'static, str> {
18
<SerializableScalar as schemars::JsonSchema>::schema_id()
19
}
20
21
fn schema_name() -> String {
22
<SerializableScalar as schemars::JsonSchema>::schema_name()
23
}
24
25
fn json_schema(generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
26
<SerializableScalar as schemars::JsonSchema>::json_schema(generator)
27
}
28
}
29
30
#[cfg(feature = "serde")]
31
impl Serialize for Scalar {
32
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
33
where
34
S: Serializer,
35
{
36
SerializableScalar::try_from(self.clone())
37
.map_err(serde::ser::Error::custom)?
38
.serialize(serializer)
39
}
40
}
41
42
#[cfg(feature = "serde")]
43
impl<'a> Deserialize<'a> for Scalar {
44
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
45
where
46
D: Deserializer<'a>,
47
{
48
SerializableScalar::deserialize(deserializer)
49
.and_then(|v| Self::try_from(v).map_err(serde::de::Error::custom))
50
}
51
}
52
53
#[derive(Serialize, Deserialize)]
54
#[serde(rename = "AnyValue")]
55
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
56
pub enum SerializableScalar {
57
Null(DataType),
58
/// An 8-bit integer number.
59
Int8(i8),
60
/// A 16-bit integer number.
61
Int16(i16),
62
/// A 32-bit integer number.
63
Int32(i32),
64
/// A 64-bit integer number.
65
Int64(i64),
66
/// A 128-bit integer number.
67
Int128(i128),
68
/// An unsigned 8-bit integer number.
69
UInt8(u8),
70
/// An unsigned 16-bit integer number.
71
UInt16(u16),
72
/// An unsigned 32-bit integer number.
73
UInt32(u32),
74
/// An unsigned 64-bit integer number.
75
UInt64(u64),
76
/// A 32-bit floating point number.
77
Float32(f32),
78
/// A 64-bit floating point number.
79
Float64(f64),
80
/// Nested type, contains arrays that are filled with one of the datatypes.
81
List(Series),
82
/// A binary true or false.
83
Boolean(bool),
84
/// A UTF8 encoded string type.
85
String(PlSmallStr),
86
Binary(Vec<u8>),
87
88
/// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
89
/// in days (32 bits).
90
#[cfg(feature = "dtype-date")]
91
Date(i32),
92
93
/// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
94
/// in nanoseconds (64 bits).
95
#[cfg(feature = "dtype-datetime")]
96
Datetime(
97
i64,
98
crate::prelude::TimeUnit,
99
Option<crate::prelude::TimeZone>,
100
),
101
102
/// A 64-bit integer representing difference between date-times in [`TimeUnit`]
103
#[cfg(feature = "dtype-duration")]
104
Duration(i64, crate::prelude::TimeUnit),
105
106
/// A 64-bit time representing the elapsed time since midnight in nanoseconds
107
#[cfg(feature = "dtype-time")]
108
Time(i64),
109
110
#[cfg(feature = "dtype-array")]
111
Array(Series, usize),
112
113
/// A 128-bit fixed point decimal number with a scale.
114
#[cfg(feature = "dtype-decimal")]
115
Decimal(i128, usize),
116
117
#[cfg(feature = "dtype-categorical")]
118
Categorical {
119
value: PlSmallStr,
120
name: PlSmallStr,
121
namespace: PlSmallStr,
122
physical: polars_dtype::categorical::CategoricalPhysical,
123
},
124
#[cfg(feature = "dtype-categorical")]
125
Enum {
126
value: polars_dtype::categorical::CatSize,
127
categories: Series,
128
},
129
130
#[cfg(feature = "dtype-struct")]
131
Struct(Vec<(PlSmallStr, SerializableScalar)>),
132
}
133
134
impl TryFrom<Scalar> for SerializableScalar {
135
type Error = PolarsError;
136
137
fn try_from(value: Scalar) -> Result<Self, Self::Error> {
138
let out = match value.value {
139
AnyValue::Null => Self::Null(value.dtype),
140
AnyValue::Int8(v) => Self::Int8(v),
141
AnyValue::Int16(v) => Self::Int16(v),
142
AnyValue::Int32(v) => Self::Int32(v),
143
AnyValue::Int64(v) => Self::Int64(v),
144
AnyValue::Int128(v) => Self::Int128(v),
145
AnyValue::UInt8(v) => Self::UInt8(v),
146
AnyValue::UInt16(v) => Self::UInt16(v),
147
AnyValue::UInt32(v) => Self::UInt32(v),
148
AnyValue::UInt64(v) => Self::UInt64(v),
149
AnyValue::Float32(v) => Self::Float32(v),
150
AnyValue::Float64(v) => Self::Float64(v),
151
AnyValue::List(series) => Self::List(series),
152
AnyValue::Boolean(v) => Self::Boolean(v),
153
AnyValue::String(v) => Self::String(PlSmallStr::from(v)),
154
AnyValue::StringOwned(v) => Self::String(v),
155
AnyValue::Binary(v) => Self::Binary(v.to_vec()),
156
AnyValue::BinaryOwned(v) => Self::Binary(v),
157
158
#[cfg(feature = "dtype-date")]
159
AnyValue::Date(v) => Self::Date(v),
160
161
#[cfg(feature = "dtype-datetime")]
162
AnyValue::Datetime(v, tu, tz) => Self::Datetime(v, tu, tz.cloned()),
163
#[cfg(feature = "dtype-datetime")]
164
AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
165
Self::Datetime(v, time_unit, time_zone.as_deref().cloned())
166
},
167
168
#[cfg(feature = "dtype-duration")]
169
AnyValue::Duration(v, time_unit) => Self::Duration(v, time_unit),
170
171
#[cfg(feature = "dtype-time")]
172
AnyValue::Time(v) => Self::Time(v),
173
174
#[cfg(feature = "dtype-categorical")]
175
AnyValue::Categorical(cat, _) | AnyValue::CategoricalOwned(cat, _) => {
176
let DataType::Categorical(categories, mapping) = value.dtype() else {
177
unreachable!();
178
};
179
180
Self::Categorical {
181
value: PlSmallStr::from(mapping.cat_to_str(cat).unwrap()),
182
name: categories.name().clone(),
183
namespace: categories.namespace().clone(),
184
physical: categories.physical(),
185
}
186
},
187
#[cfg(feature = "dtype-categorical")]
188
AnyValue::Enum(idx, _) | AnyValue::EnumOwned(idx, _) => {
189
let DataType::Enum(categories, _) = value.dtype() else {
190
unreachable!();
191
};
192
193
Self::Enum {
194
value: idx,
195
categories: Series::from_arrow(
196
PlSmallStr::EMPTY,
197
categories.categories().clone().into_boxed(),
198
)
199
.unwrap(),
200
}
201
},
202
203
#[cfg(feature = "dtype-array")]
204
AnyValue::Array(v, width) => Self::Array(v, width),
205
206
#[cfg(feature = "object")]
207
AnyValue::Object(..) | AnyValue::ObjectOwned(..) => {
208
polars_bail!(nyi = "Cannot serialize object value.")
209
},
210
211
#[cfg(feature = "dtype-struct")]
212
AnyValue::Struct(idx, arr, fields) => {
213
assert!(idx < arr.len());
214
assert_eq!(arr.values().len(), fields.len());
215
216
Self::Struct(
217
arr.values()
218
.iter()
219
.zip(fields.iter())
220
.map(|(arr, field)| {
221
let series = unsafe {
222
Series::from_chunks_and_dtype_unchecked(
223
PlSmallStr::EMPTY,
224
vec![arr.clone()],
225
field.dtype(),
226
)
227
};
228
let av = unsafe { series.get_unchecked(idx) };
229
PolarsResult::Ok((
230
field.name().clone(),
231
Self::try_from(Scalar::new(field.dtype.clone(), av.into_static()))?,
232
))
233
})
234
.collect::<Result<Vec<_>, _>>()?,
235
)
236
},
237
238
#[cfg(feature = "dtype-struct")]
239
AnyValue::StructOwned(v) => {
240
let (avs, fields) = *v;
241
assert_eq!(avs.len(), fields.len());
242
243
Self::Struct(
244
avs.into_iter()
245
.zip(fields.into_iter())
246
.map(|(av, field)| {
247
PolarsResult::Ok((
248
field.name,
249
Self::try_from(Scalar::new(field.dtype, av.into_static()))?,
250
))
251
})
252
.collect::<Result<Vec<_>, _>>()?,
253
)
254
},
255
256
#[cfg(feature = "dtype-decimal")]
257
AnyValue::Decimal(v, scale) => Self::Decimal(v, scale),
258
};
259
Ok(out)
260
}
261
}
262
263
impl TryFrom<SerializableScalar> for Scalar {
264
type Error = PolarsError;
265
266
fn try_from(value: SerializableScalar) -> Result<Self, Self::Error> {
267
type S = SerializableScalar;
268
Ok(match value {
269
S::Null(dtype) => Self::null(dtype),
270
S::Int8(v) => Self::from(v),
271
S::Int16(v) => Self::from(v),
272
S::Int32(v) => Self::from(v),
273
S::Int64(v) => Self::from(v),
274
S::Int128(v) => Self::from(v),
275
S::UInt8(v) => Self::from(v),
276
S::UInt16(v) => Self::from(v),
277
S::UInt32(v) => Self::from(v),
278
S::UInt64(v) => Self::from(v),
279
S::Float32(v) => Self::from(v),
280
S::Float64(v) => Self::from(v),
281
S::List(v) => Self::new_list(v),
282
S::Boolean(v) => Self::from(v),
283
S::String(v) => Self::from(v),
284
S::Binary(v) => Self::from(v),
285
#[cfg(feature = "dtype-date")]
286
S::Date(v) => Self::new_date(v),
287
#[cfg(feature = "dtype-datetime")]
288
S::Datetime(v, time_unit, time_zone) => Self::new_datetime(v, time_unit, time_zone),
289
#[cfg(feature = "dtype-duration")]
290
S::Duration(v, time_unit) => Self::new_duration(v, time_unit),
291
#[cfg(feature = "dtype-time")]
292
S::Time(v) => Self::new_time(v),
293
#[cfg(feature = "dtype-array")]
294
S::Array(v, width) => Self::new_array(v, width),
295
#[cfg(feature = "dtype-decimal")]
296
S::Decimal(v, scale) => Self::new_decimal(v, scale),
297
298
#[cfg(feature = "dtype-categorical")]
299
S::Categorical {
300
value,
301
name,
302
namespace,
303
physical,
304
} => Self::new_categorical(value.as_str(), name, namespace, physical)?,
305
#[cfg(feature = "dtype-categorical")]
306
S::Enum { value, categories } => {
307
Self::new_enum(value, categories.str()?.rechunk().downcast_as_array())?
308
},
309
#[cfg(feature = "dtype-struct")]
310
S::Struct(scs) => {
311
let (avs, fields) = scs
312
.into_iter()
313
.map(|(name, scalar)| {
314
let Scalar { dtype, value } = Scalar::try_from(scalar)?;
315
Ok((value, Field::new(name, dtype)))
316
})
317
.collect::<PolarsResult<(Vec<AnyValue<'static>>, Vec<Field>)>>()?;
318
319
let dtype = DataType::Struct(fields.clone());
320
Self::new(dtype, AnyValue::StructOwned(Box::new((avs, fields))))
321
},
322
})
323
}
324
}
325
326