Path: blob/main/crates/polars-plan/src/bin/dsl-schema.rs
6939 views
//! A tool for working with DSL schema.1//!2//! Usage:3//! `dsl-schema [generate|update-hashes|check-hashes] [PATH]`4//! - `generate` the DSL schema as a full JSON file in the current directory5//! - `update-hashes` stored in the schema hashes file,6//! - `check-hashes` in the schema hashes file against the hashes from the code.7//!8//! The generated schema is affected by active features. To use a complete schema, first build9//! the whole workspace with all features:10//! ```sh11//! cargo build --all-features12//! ./target/debug/dsl-schema update-hashes13//! ./target/debug/dsl-schema check-hashes14//!15//! The tool has the code schema built-in. After code changes, you need to run16//! `cargo build --all-features` again.17//! ```1819fn main() {20#[cfg(not(feature = "dsl-schema"))]21panic!("this tool requires the `dsl-schema` feature");2223#[cfg(feature = "dsl-schema")]24{25impls::run();26}27}2829#[cfg(feature = "dsl-schema")]30mod impls {31use std::fs::File;32use std::io::Write;33use std::path::Path;3435use polars_plan::dsl::DslPlan;36use schemars::schema::SchemaObject;37use sha2::Digest;3839const DEFAULT_HASHES_PATH: &str = "crates/polars-plan/dsl-schema-hashes.json";4041pub fn run() {42let mut args = std::env::args();4344let _ = args.next();45let cmd = args46.next()47.expect("missing command [generate, update-hashes, check-hashes]");48let path = args.next();4950if let Some(unknown) = args.next() {51panic!("unknown argument: `{unknown}`");52}5354match cmd.as_str() {55"generate" => {56generate(path.unwrap_or("./dsl-schema.json".to_owned()));57},58"update-hashes" => {59update_hashes(path.unwrap_or(DEFAULT_HASHES_PATH.to_owned()));60},61"check-hashes" => {62check_hashes(path.unwrap_or(DEFAULT_HASHES_PATH.to_owned()));63},64unknown => {65panic!("unknown command: `{unknown}`");66},67}68}6970/// Serializes the current DSL schema into a file at the given path.71///72/// Any existing file at the path is overwritten.73fn generate(path: impl AsRef<Path>) {74let schema = DslPlan::dsl_schema();7576let mut file = File::create(path).expect("failed to open the schema file for writing");77serde_json::to_writer_pretty(&mut file, &schema).expect("failed to serialize the schema");78writeln!(&mut file).expect("failed to write the last newline");79file.flush().expect("failed to flush the schema file");80}8182/// Outputs the current DSL schema hashes into a file at the given path.83///84/// Any existing file at the path is overwritten.85fn update_hashes(path: impl AsRef<Path>) {86std::fs::write(path, current_schema_hashes())87.expect("failed to write the schema into the file");88eprintln!("the DSL schema file was updated");89}9091/// Checks that the current schema hashes match the schema hashes in the file.92fn check_hashes(path: impl AsRef<Path>) {93let file_hashes =94std::fs::read_to_string(path).expect("failed to read the schema hashes from the file");95if file_hashes != current_schema_hashes() {96eprintln!("the schema hashes are not up to date, run `make update-dsl-schema-hashes`");97std::process::exit(1);98}99eprintln!("the DSL schema hashes are up to date");100}101102/// Returns the schema hashes as a serialized JSON object.103/// Each field is named after a data type, with its schema hash as the value.104fn current_schema_hashes() -> String {105let schema = DslPlan::dsl_schema();106107let mut hashes = serde_json::Map::new();108109// Insert the top level enum schema110hashes.insert(String::from("DslPlan"), schema_hash(&schema.schema).into());111112// Insert the subschemas113for (name, def) in schema.definitions {114hashes.insert(name, schema_hash(&def.into_object()).into());115}116117hashes.sort_keys();118119serde_json::to_string_pretty(&hashes).expect("failed to serialize schema hashes file")120}121122fn schema_hash(schema: &SchemaObject) -> String {123let mut digest = sha2::Sha256::new();124serde_json::to_writer(&mut digest, schema).expect("failed to serialize the schema");125let hash = digest.finalize();126format!("{hash:064x}")127}128}129130131