Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/catalog/unity/models.rs
6939 views
1
use polars_core::prelude::PlHashMap;
2
use polars_utils::pl_str::PlSmallStr;
3
4
#[derive(Debug, serde::Deserialize)]
5
pub struct CatalogInfo {
6
pub name: String,
7
8
pub comment: Option<String>,
9
10
#[serde(default)]
11
pub storage_location: Option<String>,
12
13
#[serde(default, deserialize_with = "null_to_default")]
14
pub properties: PlHashMap<PlSmallStr, String>,
15
16
#[serde(default, deserialize_with = "null_to_default")]
17
pub options: PlHashMap<PlSmallStr, String>,
18
19
#[serde(with = "chrono::serde::ts_milliseconds_option")]
20
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
21
22
pub created_by: Option<String>,
23
24
#[serde(with = "chrono::serde::ts_milliseconds_option")]
25
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
26
27
pub updated_by: Option<String>,
28
}
29
30
#[derive(Debug, serde::Deserialize)]
31
pub struct NamespaceInfo {
32
pub name: String,
33
pub comment: Option<String>,
34
35
#[serde(default, deserialize_with = "null_to_default")]
36
pub properties: PlHashMap<PlSmallStr, String>,
37
38
#[serde(default)]
39
pub storage_location: Option<String>,
40
41
#[serde(with = "chrono::serde::ts_milliseconds_option")]
42
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
43
44
pub created_by: Option<String>,
45
46
#[serde(with = "chrono::serde::ts_milliseconds_option")]
47
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
48
49
pub updated_by: Option<String>,
50
}
51
52
#[derive(Debug, serde::Deserialize)]
53
pub struct TableInfo {
54
pub name: String,
55
pub table_id: String,
56
pub table_type: TableType,
57
58
#[serde(default)]
59
pub comment: Option<String>,
60
61
#[serde(default)]
62
pub storage_location: Option<String>,
63
64
#[serde(default)]
65
pub data_source_format: Option<DataSourceFormat>,
66
67
#[serde(default)]
68
pub columns: Option<Vec<ColumnInfo>>,
69
70
#[serde(default, deserialize_with = "null_to_default")]
71
pub properties: PlHashMap<PlSmallStr, String>,
72
73
#[serde(with = "chrono::serde::ts_milliseconds_option")]
74
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
75
76
pub created_by: Option<String>,
77
78
#[serde(with = "chrono::serde::ts_milliseconds_option")]
79
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
80
81
pub updated_by: Option<String>,
82
}
83
84
#[derive(
85
Debug, strum_macros::Display, strum_macros::EnumString, serde::Serialize, serde::Deserialize,
86
)]
87
#[strum(serialize_all = "SCREAMING_SNAKE_CASE")]
88
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
89
pub enum TableType {
90
Managed,
91
External,
92
View,
93
MaterializedView,
94
StreamingTable,
95
ManagedShallowClone,
96
Foreign,
97
ExternalShallowClone,
98
}
99
100
#[derive(
101
Debug, strum_macros::Display, strum_macros::EnumString, serde::Serialize, serde::Deserialize,
102
)]
103
#[strum(serialize_all = "SCREAMING_SNAKE_CASE")]
104
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
105
pub enum DataSourceFormat {
106
Delta,
107
Csv,
108
Json,
109
Avro,
110
Parquet,
111
Orc,
112
Text,
113
114
// Databricks-specific
115
UnityCatalog,
116
Deltasharing,
117
DatabricksFormat,
118
MysqlFormat,
119
PostgresqlFormat,
120
RedshiftFormat,
121
SnowflakeFormat,
122
SqldwFormat,
123
SqlserverFormat,
124
SalesforceFormat,
125
BigqueryFormat,
126
NetsuiteFormat,
127
WorkdayRaasFormat,
128
HiveSerde,
129
HiveCustom,
130
VectorIndexFormat,
131
}
132
133
#[derive(Debug, serde::Serialize, serde::Deserialize)]
134
pub struct ColumnInfo {
135
pub name: PlSmallStr,
136
pub type_name: PlSmallStr,
137
pub type_text: PlSmallStr,
138
pub type_json: String,
139
pub position: Option<u32>,
140
pub comment: Option<String>,
141
pub partition_index: Option<u32>,
142
}
143
144
/// Note: This struct contains all the field names for a few different possible type / field presence
145
/// combinations. We use serde(default) and skip_serializing_if to get the desired serialization
146
/// output.
147
///
148
/// E.g.:
149
///
150
/// ```text
151
/// {
152
/// "name": "List",
153
/// "type": {"type": "array", "elementType": "long", "containsNull": True},
154
/// "nullable": True,
155
/// "metadata": {},
156
/// }
157
/// {
158
/// "name": "Struct",
159
/// "type": {
160
/// "type": "struct",
161
/// "fields": [{"name": "x", "type": "long", "nullable": True, "metadata": {}}],
162
/// },
163
/// "nullable": True,
164
/// "metadata": {},
165
/// }
166
/// {
167
/// "name": "ListStruct",
168
/// "type": {
169
/// "type": "array",
170
/// "elementType": {
171
/// "type": "struct",
172
/// "fields": [{"name": "x", "type": "long", "nullable": True, "metadata": {}}],
173
/// },
174
/// "containsNull": True,
175
/// },
176
/// "nullable": True,
177
/// "metadata": {},
178
/// }
179
/// {
180
/// "name": "Map",
181
/// "type": {
182
/// "type": "map",
183
/// "keyType": "string",
184
/// "valueType": "string",
185
/// "valueContainsNull": True,
186
/// },
187
/// "nullable": True,
188
/// "metadata": {},
189
/// }
190
/// ```
191
#[derive(Debug, Default, serde::Serialize, serde::Deserialize)]
192
pub struct ColumnTypeJson {
193
#[serde(default, skip_serializing_if = "Option::is_none")]
194
pub name: Option<PlSmallStr>,
195
196
#[serde(rename = "type")]
197
pub type_: ColumnTypeJsonType,
198
199
#[serde(default, skip_serializing_if = "Option::is_none")]
200
pub nullable: Option<bool>,
201
202
#[serde(default, skip_serializing_if = "Option::is_none")]
203
pub metadata: Option<PlHashMap<String, String>>,
204
205
// Used for List types
206
#[serde(
207
default,
208
rename = "elementType",
209
skip_serializing_if = "Option::is_none"
210
)]
211
pub element_type: Option<ColumnTypeJsonType>,
212
213
#[serde(
214
default,
215
rename = "containsNull",
216
skip_serializing_if = "Option::is_none"
217
)]
218
pub contains_null: Option<bool>,
219
220
// Used for Struct types
221
#[serde(default, skip_serializing_if = "Option::is_none")]
222
pub fields: Option<Vec<ColumnTypeJson>>,
223
224
// Used for Map types
225
#[serde(default, rename = "keyType", skip_serializing_if = "Option::is_none")]
226
pub key_type: Option<ColumnTypeJsonType>,
227
228
#[serde(default, rename = "valueType", skip_serializing_if = "Option::is_none")]
229
pub value_type: Option<ColumnTypeJsonType>,
230
231
#[serde(
232
default,
233
rename = "valueContainsNull",
234
skip_serializing_if = "Option::is_none"
235
)]
236
pub value_contains_null: Option<bool>,
237
}
238
239
#[derive(Debug, serde::Serialize, serde::Deserialize)]
240
#[serde(untagged)]
241
pub enum ColumnTypeJsonType {
242
/// * `{"type": "name", ..}``
243
TypeName(PlSmallStr),
244
/// * `{"type": {"type": "name", ..}}`
245
TypeJson(Box<ColumnTypeJson>),
246
}
247
248
impl Default for ColumnTypeJsonType {
249
fn default() -> Self {
250
Self::TypeName(PlSmallStr::EMPTY)
251
}
252
}
253
254
impl ColumnTypeJsonType {
255
pub const fn from_static_type_name(type_name: &'static str) -> Self {
256
Self::TypeName(PlSmallStr::from_static(type_name))
257
}
258
}
259
260
#[derive(Debug, serde::Deserialize)]
261
pub struct TableCredentials {
262
pub aws_temp_credentials: Option<TableCredentialsAws>,
263
pub azure_user_delegation_sas: Option<TableCredentialsAzure>,
264
pub gcp_oauth_token: Option<TableCredentialsGcp>,
265
pub expiration_time: i64,
266
}
267
268
impl TableCredentials {
269
pub fn into_enum(self) -> Option<TableCredentialsVariants> {
270
if let v @ Some(_) = self.aws_temp_credentials {
271
v.map(TableCredentialsVariants::Aws)
272
} else if let v @ Some(_) = self.azure_user_delegation_sas {
273
v.map(TableCredentialsVariants::Azure)
274
} else if let v @ Some(_) = self.gcp_oauth_token {
275
v.map(TableCredentialsVariants::Gcp)
276
} else {
277
None
278
}
279
}
280
}
281
282
pub enum TableCredentialsVariants {
283
Aws(TableCredentialsAws),
284
Azure(TableCredentialsAzure),
285
Gcp(TableCredentialsGcp),
286
}
287
288
#[derive(Debug, serde::Deserialize)]
289
pub struct TableCredentialsAws {
290
pub access_key_id: String,
291
pub secret_access_key: String,
292
pub session_token: Option<String>,
293
294
#[serde(default)]
295
pub access_point: Option<String>,
296
}
297
298
#[derive(Debug, serde::Deserialize)]
299
pub struct TableCredentialsAzure {
300
pub sas_token: String,
301
}
302
303
#[derive(Debug, serde::Deserialize)]
304
pub struct TableCredentialsGcp {
305
pub oauth_token: String,
306
}
307
308
fn null_to_default<'de, T, D>(d: D) -> Result<T, D::Error>
309
where
310
T: Default + serde::de::Deserialize<'de>,
311
D: serde::de::Deserializer<'de>,
312
{
313
use serde::Deserialize;
314
let opt_val = Option::<T>::deserialize(d)?;
315
Ok(opt_val.unwrap_or_default())
316
}
317
318