Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/scalar/serde.rs
8458 views
1
use arrow::array::IntoBoxedArray;
2
use polars_error::{PolarsError, PolarsResult, polars_bail};
3
use polars_utils::float16::pf16;
4
use polars_utils::pl_str::PlSmallStr;
5
#[cfg(feature = "serde")]
6
use serde::{Deserialize, Deserializer, Serialize, Serializer};
7
8
use super::Scalar;
9
use crate::prelude::{AnyValue, DataType, Field};
10
use crate::series::Series;
11
12
#[cfg(feature = "dsl-schema")]
13
impl schemars::JsonSchema for Scalar {
14
fn inline_schema() -> bool {
15
<SerializableScalar as schemars::JsonSchema>::inline_schema()
16
}
17
18
fn schema_id() -> std::borrow::Cow<'static, str> {
19
<SerializableScalar as schemars::JsonSchema>::schema_id()
20
}
21
22
fn schema_name() -> std::borrow::Cow<'static, str> {
23
<SerializableScalar as schemars::JsonSchema>::schema_name()
24
}
25
26
fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
27
<SerializableScalar as schemars::JsonSchema>::json_schema(generator)
28
}
29
}
30
31
#[cfg(feature = "serde")]
32
impl Serialize for Scalar {
33
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
34
where
35
S: Serializer,
36
{
37
SerializableScalar::try_from(self.clone())
38
.map_err(serde::ser::Error::custom)?
39
.serialize(serializer)
40
}
41
}
42
43
#[cfg(feature = "serde")]
44
impl<'a> Deserialize<'a> for Scalar {
45
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
46
where
47
D: Deserializer<'a>,
48
{
49
SerializableScalar::deserialize(deserializer)
50
.and_then(|v| Self::try_from(v).map_err(serde::de::Error::custom))
51
}
52
}
53
54
#[derive(Serialize, Deserialize)]
55
#[serde(rename = "AnyValue")]
56
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
57
pub enum SerializableScalar {
58
Null(DataType),
59
/// An 8-bit integer number.
60
Int8(i8),
61
/// A 16-bit integer number.
62
Int16(i16),
63
/// A 32-bit integer number.
64
Int32(i32),
65
/// A 64-bit integer number.
66
Int64(i64),
67
/// A 128-bit integer number.
68
Int128(i128),
69
/// An unsigned 8-bit integer number.
70
UInt8(u8),
71
/// An unsigned 16-bit integer number.
72
UInt16(u16),
73
/// An unsigned 32-bit integer number.
74
UInt32(u32),
75
/// An unsigned 64-bit integer number.
76
UInt64(u64),
77
/// An unsigned 128-bit integer number.
78
UInt128(u128),
79
/// A 16-bit floating point number.
80
Float16(pf16),
81
/// A 32-bit floating point number.
82
Float32(f32),
83
/// A 64-bit floating point number.
84
Float64(f64),
85
/// Nested type, contains arrays that are filled with one of the datatypes.
86
List(Series),
87
/// A binary true or false.
88
Boolean(bool),
89
/// A UTF8 encoded string type.
90
String(PlSmallStr),
91
Binary(Vec<u8>),
92
93
/// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
94
/// in days (32 bits).
95
#[cfg(feature = "dtype-date")]
96
Date(i32),
97
98
/// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
99
/// in nanoseconds (64 bits).
100
#[cfg(feature = "dtype-datetime")]
101
Datetime(
102
i64,
103
crate::prelude::TimeUnit,
104
Option<crate::prelude::TimeZone>,
105
),
106
107
/// A 64-bit integer representing difference between date-times in [`TimeUnit`]
108
#[cfg(feature = "dtype-duration")]
109
Duration(i64, crate::prelude::TimeUnit),
110
111
/// A 64-bit time representing the elapsed time since midnight in nanoseconds
112
#[cfg(feature = "dtype-time")]
113
Time(i64),
114
115
#[cfg(feature = "dtype-array")]
116
Array(Series, usize),
117
118
/// A 128-bit fixed point decimal number with a scale.
119
#[cfg(feature = "dtype-decimal")]
120
Decimal(i128, usize, usize),
121
122
#[cfg(feature = "dtype-categorical")]
123
Categorical {
124
value: PlSmallStr,
125
name: PlSmallStr,
126
namespace: PlSmallStr,
127
physical: polars_dtype::categorical::CategoricalPhysical,
128
},
129
#[cfg(feature = "dtype-categorical")]
130
Enum {
131
value: polars_dtype::categorical::CatSize,
132
categories: Series,
133
},
134
135
#[cfg(feature = "dtype-struct")]
136
Struct(Vec<(PlSmallStr, SerializableScalar)>),
137
}
138
139
impl TryFrom<Scalar> for SerializableScalar {
140
type Error = PolarsError;
141
142
fn try_from(value: Scalar) -> Result<Self, Self::Error> {
143
let out = match value.value {
144
AnyValue::Null => Self::Null(value.dtype),
145
AnyValue::Int8(v) => Self::Int8(v),
146
AnyValue::Int16(v) => Self::Int16(v),
147
AnyValue::Int32(v) => Self::Int32(v),
148
AnyValue::Int64(v) => Self::Int64(v),
149
AnyValue::Int128(v) => Self::Int128(v),
150
AnyValue::UInt8(v) => Self::UInt8(v),
151
AnyValue::UInt16(v) => Self::UInt16(v),
152
AnyValue::UInt32(v) => Self::UInt32(v),
153
AnyValue::UInt64(v) => Self::UInt64(v),
154
AnyValue::UInt128(v) => Self::UInt128(v),
155
AnyValue::Float16(v) => Self::Float16(v),
156
AnyValue::Float32(v) => Self::Float32(v),
157
AnyValue::Float64(v) => Self::Float64(v),
158
AnyValue::List(series) => Self::List(series),
159
AnyValue::Boolean(v) => Self::Boolean(v),
160
AnyValue::String(v) => Self::String(PlSmallStr::from(v)),
161
AnyValue::StringOwned(v) => Self::String(v),
162
AnyValue::Binary(v) => Self::Binary(v.to_vec()),
163
AnyValue::BinaryOwned(v) => Self::Binary(v),
164
165
#[cfg(feature = "dtype-date")]
166
AnyValue::Date(v) => Self::Date(v),
167
168
#[cfg(feature = "dtype-datetime")]
169
AnyValue::Datetime(v, tu, tz) => Self::Datetime(v, tu, tz.cloned()),
170
#[cfg(feature = "dtype-datetime")]
171
AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
172
Self::Datetime(v, time_unit, time_zone.as_deref().cloned())
173
},
174
175
#[cfg(feature = "dtype-duration")]
176
AnyValue::Duration(v, time_unit) => Self::Duration(v, time_unit),
177
178
#[cfg(feature = "dtype-time")]
179
AnyValue::Time(v) => Self::Time(v),
180
181
#[cfg(feature = "dtype-categorical")]
182
AnyValue::Categorical(cat, _) | AnyValue::CategoricalOwned(cat, _) => {
183
let DataType::Categorical(categories, mapping) = value.dtype() else {
184
unreachable!();
185
};
186
187
Self::Categorical {
188
value: PlSmallStr::from(mapping.cat_to_str(cat).unwrap()),
189
name: categories.name().clone(),
190
namespace: categories.namespace().clone(),
191
physical: categories.physical(),
192
}
193
},
194
#[cfg(feature = "dtype-categorical")]
195
AnyValue::Enum(idx, _) | AnyValue::EnumOwned(idx, _) => {
196
let DataType::Enum(categories, _) = value.dtype() else {
197
unreachable!();
198
};
199
200
Self::Enum {
201
value: idx,
202
categories: Series::from_arrow(
203
PlSmallStr::EMPTY,
204
categories.categories().clone().into_boxed(),
205
)
206
.unwrap(),
207
}
208
},
209
210
#[cfg(feature = "dtype-array")]
211
AnyValue::Array(v, width) => Self::Array(v, width),
212
213
#[cfg(feature = "object")]
214
AnyValue::Object(..) | AnyValue::ObjectOwned(..) => {
215
polars_bail!(nyi = "Cannot serialize object value.")
216
},
217
218
#[cfg(feature = "dtype-struct")]
219
AnyValue::Struct(idx, arr, fields) => {
220
assert!(idx < arr.len());
221
assert_eq!(arr.values().len(), fields.len());
222
223
Self::Struct(
224
arr.values()
225
.iter()
226
.zip(fields.iter())
227
.map(|(arr, field)| {
228
let series = unsafe {
229
Series::from_chunks_and_dtype_unchecked(
230
PlSmallStr::EMPTY,
231
vec![arr.clone()],
232
field.dtype(),
233
)
234
};
235
let av = unsafe { series.get_unchecked(idx) };
236
PolarsResult::Ok((
237
field.name().clone(),
238
Self::try_from(Scalar::new(field.dtype.clone(), av.into_static()))?,
239
))
240
})
241
.collect::<Result<Vec<_>, _>>()?,
242
)
243
},
244
245
#[cfg(feature = "dtype-struct")]
246
AnyValue::StructOwned(v) => {
247
let (avs, fields) = *v;
248
assert_eq!(avs.len(), fields.len());
249
250
Self::Struct(
251
avs.into_iter()
252
.zip(fields.into_iter())
253
.map(|(av, field)| {
254
PolarsResult::Ok((
255
field.name,
256
Self::try_from(Scalar::new(field.dtype, av.into_static()))?,
257
))
258
})
259
.collect::<Result<Vec<_>, _>>()?,
260
)
261
},
262
263
#[cfg(feature = "dtype-decimal")]
264
AnyValue::Decimal(v, prec, scale) => Self::Decimal(v, prec, scale),
265
};
266
Ok(out)
267
}
268
}
269
270
impl TryFrom<SerializableScalar> for Scalar {
271
type Error = PolarsError;
272
273
fn try_from(value: SerializableScalar) -> Result<Self, Self::Error> {
274
type S = SerializableScalar;
275
Ok(match value {
276
S::Null(dtype) => Self::null(dtype),
277
S::Int8(v) => Self::from(v),
278
S::Int16(v) => Self::from(v),
279
S::Int32(v) => Self::from(v),
280
S::Int64(v) => Self::from(v),
281
S::Int128(v) => Self::from(v),
282
S::UInt8(v) => Self::from(v),
283
S::UInt16(v) => Self::from(v),
284
S::UInt32(v) => Self::from(v),
285
S::UInt64(v) => Self::from(v),
286
S::UInt128(v) => Self::from(v),
287
S::Float16(v) => Self::from(v),
288
S::Float32(v) => Self::from(v),
289
S::Float64(v) => Self::from(v),
290
S::List(v) => Self::new_list(v),
291
S::Boolean(v) => Self::from(v),
292
S::String(v) => Self::from(v),
293
S::Binary(v) => Self::from(v),
294
#[cfg(feature = "dtype-date")]
295
S::Date(v) => Self::new_date(v),
296
#[cfg(feature = "dtype-datetime")]
297
S::Datetime(v, time_unit, time_zone) => Self::new_datetime(v, time_unit, time_zone),
298
#[cfg(feature = "dtype-duration")]
299
S::Duration(v, time_unit) => Self::new_duration(v, time_unit),
300
#[cfg(feature = "dtype-time")]
301
S::Time(v) => Self::new_time(v),
302
#[cfg(feature = "dtype-array")]
303
S::Array(v, width) => Self::new_array(v, width),
304
#[cfg(feature = "dtype-decimal")]
305
S::Decimal(v, prec, scale) => Self::new_decimal(v, prec, scale),
306
307
#[cfg(feature = "dtype-categorical")]
308
S::Categorical {
309
value,
310
name,
311
namespace,
312
physical,
313
} => Self::new_categorical(value.as_str(), name, namespace, physical)?,
314
#[cfg(feature = "dtype-categorical")]
315
S::Enum { value, categories } => {
316
Self::new_enum(value, categories.str()?.rechunk().downcast_as_array())?
317
},
318
#[cfg(feature = "dtype-struct")]
319
S::Struct(scs) => {
320
let (avs, fields) = scs
321
.into_iter()
322
.map(|(name, scalar)| {
323
let Scalar { dtype, value } = Scalar::try_from(scalar)?;
324
Ok((value, Field::new(name, dtype)))
325
})
326
.collect::<PolarsResult<(Vec<AnyValue<'static>>, Vec<Field>)>>()?;
327
328
let dtype = DataType::Struct(fields.clone());
329
Self::new(dtype, AnyValue::StructOwned(Box::new((avs, fields))))
330
},
331
})
332
}
333
}
334
335