Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/bin/dsl-schema.rs
8446 views
1
//! A tool for working with DSL schema.
2
//!
3
//! Usage:
4
//! `dsl-schema [generate|update-hashes|check-hashes] [PATH]`
5
//! - `generate` the DSL schema as a full JSON file in the current directory
6
//! - `update-hashes` stored in the schema hashes file,
7
//! - `check-hashes` in the schema hashes file against the hashes from the code.
8
//!
9
//! The generated schema is affected by active features. To use a complete schema, first build
10
//! the whole workspace with all features:
11
//! ```sh
12
//! cargo build --all-features
13
//! ./target/debug/dsl-schema update-hashes
14
//! ./target/debug/dsl-schema check-hashes
15
//!
16
//! The tool has the code schema built-in. After code changes, you need to run
17
//! `cargo build --all-features` again.
18
//! ```
19
20
fn main() {
21
#[cfg(not(feature = "dsl-schema"))]
22
panic!("this tool requires the `dsl-schema` feature");
23
24
#[cfg(feature = "dsl-schema")]
25
{
26
impls::run();
27
}
28
}
29
30
#[cfg(feature = "dsl-schema")]
31
mod impls {
32
use std::fs::File;
33
use std::io::Write;
34
use std::path::Path;
35
36
use polars_plan::dsl::DslPlan;
37
use schemars::Schema;
38
use sha2::Digest;
39
40
const DEFAULT_HASHES_PATH: &str = "crates/polars-plan/dsl-schema-hashes.json";
41
42
pub fn run() {
43
let mut args = std::env::args();
44
45
let _ = args.next();
46
let cmd = args
47
.next()
48
.expect("missing command [generate, update-hashes, check-hashes]");
49
let path = args.next();
50
51
if let Some(unknown) = args.next() {
52
panic!("unknown argument: `{unknown}`");
53
}
54
55
match cmd.as_str() {
56
"generate" => {
57
generate(path.unwrap_or("./dsl-schema.json".to_owned()));
58
},
59
"update-hashes" => {
60
update_hashes(path.unwrap_or(DEFAULT_HASHES_PATH.to_owned()));
61
},
62
"check-hashes" => {
63
check_hashes(path.unwrap_or(DEFAULT_HASHES_PATH.to_owned()));
64
},
65
unknown => {
66
panic!("unknown command: `{unknown}`");
67
},
68
}
69
}
70
71
/// Serializes the current DSL schema into a file at the given path.
72
///
73
/// Any existing file at the path is overwritten.
74
fn generate(path: impl AsRef<Path>) {
75
let schema = DslPlan::dsl_schema();
76
77
let mut file = File::create(path).expect("failed to open the schema file for writing");
78
serde_json::to_writer_pretty(&mut file, &schema).expect("failed to serialize the schema");
79
writeln!(&mut file).expect("failed to write the last newline");
80
file.flush().expect("failed to flush the schema file");
81
}
82
83
/// Outputs the current DSL schema hashes into a file at the given path.
84
///
85
/// Any existing file at the path is overwritten.
86
fn update_hashes(path: impl AsRef<Path>) {
87
std::fs::write(path, current_schema_hashes())
88
.expect("failed to write the schema into the file");
89
eprintln!("the DSL schema file was updated");
90
}
91
92
/// Checks that the current schema hashes match the schema hashes in the file.
93
fn check_hashes(path: impl AsRef<Path>) {
94
let file_hashes =
95
std::fs::read_to_string(path).expect("failed to read the schema hashes from the file");
96
let expected = current_schema_hashes();
97
if file_hashes != expected {
98
eprintln!("the schema hashes are not up to date, run `make update-dsl-schema-hashes`");
99
100
eprintln!("expected schema hashes:");
101
eprintln!("{}", expected);
102
103
std::process::exit(1);
104
}
105
eprintln!("the DSL schema hashes are up to date");
106
}
107
108
/// Returns the schema hashes as a serialized JSON object.
109
/// Each field is named after a data type, with its schema hash as the value.
110
fn current_schema_hashes() -> String {
111
let schema = DslPlan::dsl_schema();
112
113
let mut hashes = serde_json::Map::new();
114
115
// Insert the subschemas
116
if let Some(definitions) = schema.get("$defs") {
117
if let Some(definitions) = definitions.as_object() {
118
for (name, def) in definitions {
119
let mut def = def.to_owned();
120
def.sort_all_objects();
121
let schema: &Schema = (&def).try_into().unwrap();
122
hashes.insert(name.into(), schema_hash(schema).into());
123
}
124
125
assert!(definitions.contains_key("DslPlan"));
126
}
127
}
128
129
hashes.sort_keys();
130
131
serde_json::to_string_pretty(&hashes).expect("failed to serialize schema hashes file")
132
}
133
134
fn schema_hash(schema: &Schema) -> String {
135
let mut digest = sha2::Sha256::new();
136
serde_json::to_writer(&mut digest, schema).expect("failed to serialize the schema");
137
let hash = digest.finalize();
138
format!("{hash:064x}")
139
}
140
}
141
142