Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/bin/dsl-schema.rs
6939 views
1
//! A tool for working with DSL schema.
2
//!
3
//! Usage:
4
//! `dsl-schema [generate|update-hashes|check-hashes] [PATH]`
5
//! - `generate` the DSL schema as a full JSON file in the current directory
6
//! - `update-hashes` stored in the schema hashes file,
7
//! - `check-hashes` in the schema hashes file against the hashes from the code.
8
//!
9
//! The generated schema is affected by active features. To use a complete schema, first build
10
//! the whole workspace with all features:
11
//! ```sh
12
//! cargo build --all-features
13
//! ./target/debug/dsl-schema update-hashes
14
//! ./target/debug/dsl-schema check-hashes
15
//!
16
//! The tool has the code schema built-in. After code changes, you need to run
17
//! `cargo build --all-features` again.
18
//! ```
19
20
fn main() {
21
#[cfg(not(feature = "dsl-schema"))]
22
panic!("this tool requires the `dsl-schema` feature");
23
24
#[cfg(feature = "dsl-schema")]
25
{
26
impls::run();
27
}
28
}
29
30
#[cfg(feature = "dsl-schema")]
31
mod impls {
32
use std::fs::File;
33
use std::io::Write;
34
use std::path::Path;
35
36
use polars_plan::dsl::DslPlan;
37
use schemars::schema::SchemaObject;
38
use sha2::Digest;
39
40
const DEFAULT_HASHES_PATH: &str = "crates/polars-plan/dsl-schema-hashes.json";
41
42
pub fn run() {
43
let mut args = std::env::args();
44
45
let _ = args.next();
46
let cmd = args
47
.next()
48
.expect("missing command [generate, update-hashes, check-hashes]");
49
let path = args.next();
50
51
if let Some(unknown) = args.next() {
52
panic!("unknown argument: `{unknown}`");
53
}
54
55
match cmd.as_str() {
56
"generate" => {
57
generate(path.unwrap_or("./dsl-schema.json".to_owned()));
58
},
59
"update-hashes" => {
60
update_hashes(path.unwrap_or(DEFAULT_HASHES_PATH.to_owned()));
61
},
62
"check-hashes" => {
63
check_hashes(path.unwrap_or(DEFAULT_HASHES_PATH.to_owned()));
64
},
65
unknown => {
66
panic!("unknown command: `{unknown}`");
67
},
68
}
69
}
70
71
/// Serializes the current DSL schema into a file at the given path.
72
///
73
/// Any existing file at the path is overwritten.
74
fn generate(path: impl AsRef<Path>) {
75
let schema = DslPlan::dsl_schema();
76
77
let mut file = File::create(path).expect("failed to open the schema file for writing");
78
serde_json::to_writer_pretty(&mut file, &schema).expect("failed to serialize the schema");
79
writeln!(&mut file).expect("failed to write the last newline");
80
file.flush().expect("failed to flush the schema file");
81
}
82
83
/// Outputs the current DSL schema hashes into a file at the given path.
84
///
85
/// Any existing file at the path is overwritten.
86
fn update_hashes(path: impl AsRef<Path>) {
87
std::fs::write(path, current_schema_hashes())
88
.expect("failed to write the schema into the file");
89
eprintln!("the DSL schema file was updated");
90
}
91
92
/// Checks that the current schema hashes match the schema hashes in the file.
93
fn check_hashes(path: impl AsRef<Path>) {
94
let file_hashes =
95
std::fs::read_to_string(path).expect("failed to read the schema hashes from the file");
96
if file_hashes != current_schema_hashes() {
97
eprintln!("the schema hashes are not up to date, run `make update-dsl-schema-hashes`");
98
std::process::exit(1);
99
}
100
eprintln!("the DSL schema hashes are up to date");
101
}
102
103
/// Returns the schema hashes as a serialized JSON object.
104
/// Each field is named after a data type, with its schema hash as the value.
105
fn current_schema_hashes() -> String {
106
let schema = DslPlan::dsl_schema();
107
108
let mut hashes = serde_json::Map::new();
109
110
// Insert the top level enum schema
111
hashes.insert(String::from("DslPlan"), schema_hash(&schema.schema).into());
112
113
// Insert the subschemas
114
for (name, def) in schema.definitions {
115
hashes.insert(name, schema_hash(&def.into_object()).into());
116
}
117
118
hashes.sort_keys();
119
120
serde_json::to_string_pretty(&hashes).expect("failed to serialize schema hashes file")
121
}
122
123
fn schema_hash(schema: &SchemaObject) -> String {
124
let mut digest = sha2::Sha256::new();
125
serde_json::to_writer(&mut digest, schema).expect("failed to serialize the schema");
126
let hash = digest.finalize();
127
format!("{hash:064x}")
128
}
129
}
130
131