Path: blob/main/crates/polars-plan/src/bin/dsl-schema.rs
8446 views
//! A tool for working with DSL schema.1//!2//! Usage:3//! `dsl-schema [generate|update-hashes|check-hashes] [PATH]`4//! - `generate` the DSL schema as a full JSON file in the current directory5//! - `update-hashes` stored in the schema hashes file,6//! - `check-hashes` in the schema hashes file against the hashes from the code.7//!8//! The generated schema is affected by active features. To use a complete schema, first build9//! the whole workspace with all features:10//! ```sh11//! cargo build --all-features12//! ./target/debug/dsl-schema update-hashes13//! ./target/debug/dsl-schema check-hashes14//!15//! The tool has the code schema built-in. After code changes, you need to run16//! `cargo build --all-features` again.17//! ```1819fn main() {20#[cfg(not(feature = "dsl-schema"))]21panic!("this tool requires the `dsl-schema` feature");2223#[cfg(feature = "dsl-schema")]24{25impls::run();26}27}2829#[cfg(feature = "dsl-schema")]30mod impls {31use std::fs::File;32use std::io::Write;33use std::path::Path;3435use polars_plan::dsl::DslPlan;36use schemars::Schema;37use sha2::Digest;3839const DEFAULT_HASHES_PATH: &str = "crates/polars-plan/dsl-schema-hashes.json";4041pub fn run() {42let mut args = std::env::args();4344let _ = args.next();45let cmd = args46.next()47.expect("missing command [generate, update-hashes, check-hashes]");48let path = args.next();4950if let Some(unknown) = args.next() {51panic!("unknown argument: `{unknown}`");52}5354match cmd.as_str() {55"generate" => {56generate(path.unwrap_or("./dsl-schema.json".to_owned()));57},58"update-hashes" => {59update_hashes(path.unwrap_or(DEFAULT_HASHES_PATH.to_owned()));60},61"check-hashes" => {62check_hashes(path.unwrap_or(DEFAULT_HASHES_PATH.to_owned()));63},64unknown => {65panic!("unknown command: `{unknown}`");66},67}68}6970/// Serializes the current DSL schema into a file at the given path.71///72/// Any existing file at the path is overwritten.73fn generate(path: impl AsRef<Path>) {74let schema = DslPlan::dsl_schema();7576let mut file = File::create(path).expect("failed to open the schema file for writing");77serde_json::to_writer_pretty(&mut file, &schema).expect("failed to serialize the schema");78writeln!(&mut file).expect("failed to write the last newline");79file.flush().expect("failed to flush the schema file");80}8182/// Outputs the current DSL schema hashes into a file at the given path.83///84/// Any existing file at the path is overwritten.85fn update_hashes(path: impl AsRef<Path>) {86std::fs::write(path, current_schema_hashes())87.expect("failed to write the schema into the file");88eprintln!("the DSL schema file was updated");89}9091/// Checks that the current schema hashes match the schema hashes in the file.92fn check_hashes(path: impl AsRef<Path>) {93let file_hashes =94std::fs::read_to_string(path).expect("failed to read the schema hashes from the file");95let expected = current_schema_hashes();96if file_hashes != expected {97eprintln!("the schema hashes are not up to date, run `make update-dsl-schema-hashes`");9899eprintln!("expected schema hashes:");100eprintln!("{}", expected);101102std::process::exit(1);103}104eprintln!("the DSL schema hashes are up to date");105}106107/// Returns the schema hashes as a serialized JSON object.108/// Each field is named after a data type, with its schema hash as the value.109fn current_schema_hashes() -> String {110let schema = DslPlan::dsl_schema();111112let mut hashes = serde_json::Map::new();113114// Insert the subschemas115if let Some(definitions) = schema.get("$defs") {116if let Some(definitions) = definitions.as_object() {117for (name, def) in definitions {118let mut def = def.to_owned();119def.sort_all_objects();120let schema: &Schema = (&def).try_into().unwrap();121hashes.insert(name.into(), schema_hash(schema).into());122}123124assert!(definitions.contains_key("DslPlan"));125}126}127128hashes.sort_keys();129130serde_json::to_string_pretty(&hashes).expect("failed to serialize schema hashes file")131}132133fn schema_hash(schema: &Schema) -> String {134let mut digest = sha2::Sha256::new();135serde_json::to_writer(&mut digest, schema).expect("failed to serialize the schema");136let hash = digest.finalize();137format!("{hash:064x}")138}139}140141142