Path: blob/main/crates/polars-testing/src/asserts/frame.rs
6940 views
/// Asserts that two DataFrames are equal according to the specified options.1///2/// This macro compares two Polars DataFrame objects and panics with a detailed error message if they are not equal.3/// It provides two forms:4/// - With custom comparison options5/// - With default comparison options6///7/// # Example8///9/// ```10/// use polars_core::prelude::*;11/// use polars_testing::assert_dataframe_equal;12/// use polars_testing::asserts::DataFrameEqualOptions;13///14/// // Create two DataFrames to compare15/// let df1 = df! {16/// "a" => [1, 2, 3],17/// "b" => [4.0, 5.0, 6.0],18/// }.unwrap();19/// let df2 = df! {20/// "a" => [1, 2, 3],21/// "b" => [4.0, 5.0, 6.0],22/// }.unwrap();23///24/// // Assert with default options25/// assert_dataframe_equal!(&df1, &df2);26///27/// // Assert with custom options28/// let options = DataFrameEqualOptions::default()29/// .with_check_exact(true)30/// .with_check_row_order(false);31/// assert_dataframe_equal!(&df1, &df2, options);32/// ```33///34/// # Panics35///36/// Panics when the DataFrames are not equal according to the specified comparison criteria.37///38#[macro_export]39macro_rules! assert_dataframe_equal {40($left:expr, $right:expr $(, $options:expr)?) => {41#[allow(unused_assignments)]42#[allow(unused_mut)]43let mut options = $crate::asserts::DataFrameEqualOptions::default();44$(options = $options;)?4546match $crate::asserts::assert_dataframe_equal($left, $right, options) {47Ok(_) => {},48Err(e) => panic!("{}", e),49}50};51}5253#[cfg(test)]54mod tests {55#[allow(unused_imports)]56use polars_core::prelude::*;5758// Testing default struct implementation59#[test]60fn test_dataframe_equal_options() {61let options = crate::asserts::DataFrameEqualOptions::default();6263assert!(options.check_row_order);64assert!(options.check_column_order);65assert!(options.check_dtypes);66assert!(!options.check_exact);67assert_eq!(options.rel_tol, 1e-5);68assert_eq!(options.abs_tol, 1e-8);69assert!(!options.categorical_as_str);70}7172// Testing dataframe schema equality parameters73#[test]74#[should_panic(expected = "height (row count) mismatch")]75fn test_dataframe_height_mismatch() {76let df1 = DataFrame::new(vec![77Series::new("col1".into(), &[1, 2]).into(),78Series::new("col2".into(), &["a", "b"]).into(),79])80.unwrap();8182let df2 = DataFrame::new(vec![83Series::new("col1".into(), &[1, 2, 3]).into(),84Series::new("col2".into(), &["a", "b", "c"]).into(),85])86.unwrap();8788assert_dataframe_equal!(&df1, &df2);89}9091#[test]92#[should_panic(expected = "columns mismatch")]93fn test_dataframe_column_mismatch() {94let df1 = DataFrame::new(vec![95Series::new("col1".into(), &[1, 2, 3]).into(),96Series::new("col2".into(), &["a", "b", "c"]).into(),97])98.unwrap();99100let df2 = DataFrame::new(vec![101Series::new("col1".into(), &[1, 2, 3]).into(),102Series::new("different_col".into(), &["a", "b", "c"]).into(),103])104.unwrap();105106assert_dataframe_equal!(&df1, &df2);107}108109#[test]110#[should_panic(expected = "dtypes do not match")]111fn test_dataframe_dtype_mismatch() {112let df1 = DataFrame::new(vec![113Series::new("col1".into(), &[1, 2, 3]).into(),114Series::new("col2".into(), &["a", "b", "c"]).into(),115])116.unwrap();117118let df2 = DataFrame::new(vec![119Series::new("col1".into(), &[1.0, 2.0, 3.0]).into(),120Series::new("col2".into(), &["a", "b", "c"]).into(),121])122.unwrap();123124assert_dataframe_equal!(&df1, &df2);125}126127#[test]128fn test_dataframe_dtype_mismatch_ignored() {129let df1 = DataFrame::new(vec![130Series::new("col1".into(), &[1, 2, 3]).into(),131Series::new("col2".into(), &["a", "b", "c"]).into(),132])133.unwrap();134135let df2 = DataFrame::new(vec![136Series::new("col1".into(), &[1.0, 2.0, 3.0]).into(),137Series::new("col2".into(), &["a", "b", "c"]).into(),138])139.unwrap();140141let options = crate::asserts::DataFrameEqualOptions::default().with_check_dtypes(false);142assert_dataframe_equal!(&df1, &df2, options);143}144145#[test]146#[should_panic(expected = "columns are not in the same order")]147fn test_dataframe_column_order_mismatch() {148let df1 = DataFrame::new(vec![149Series::new("col1".into(), &[1, 2, 3]).into(),150Series::new("col2".into(), &["a", "b", "c"]).into(),151])152.unwrap();153154let df2 = DataFrame::new(vec![155Series::new("col2".into(), &["a", "b", "c"]).into(),156Series::new("col1".into(), &[1, 2, 3]).into(),157])158.unwrap();159160assert_dataframe_equal!(&df1, &df2);161}162163#[test]164fn test_dataframe_column_order_mismatch_ignored() {165let df1 = DataFrame::new(vec![166Series::new("col1".into(), &[1, 2, 3]).into(),167Series::new("col2".into(), &["a", "b", "c"]).into(),168])169.unwrap();170171let df2 = DataFrame::new(vec![172Series::new("col2".into(), &["a", "b", "c"]).into(),173Series::new("col1".into(), &[1, 2, 3]).into(),174])175.unwrap();176177let options =178crate::asserts::DataFrameEqualOptions::default().with_check_column_order(false);179assert_dataframe_equal!(&df1, &df2, options);180}181182#[test]183#[should_panic(expected = "columns mismatch: [\"col3\"] in left, but not in right")]184fn test_dataframe_left_has_extra_column() {185let df1 = DataFrame::new(vec![186Series::new("col1".into(), &[1, 2, 3]).into(),187Series::new("col2".into(), &["a", "b", "c"]).into(),188Series::new("col3".into(), &[true, false, true]).into(),189])190.unwrap();191192let df2 = DataFrame::new(vec![193Series::new("col1".into(), &[1, 2, 3]).into(),194Series::new("col2".into(), &["a", "b", "c"]).into(),195])196.unwrap();197198assert_dataframe_equal!(&df1, &df2);199}200201#[test]202#[should_panic(expected = "columns mismatch: [\"col3\"] in right, but not in left")]203fn test_dataframe_right_has_extra_column() {204let df1 = DataFrame::new(vec![205Series::new("col1".into(), &[1, 2, 3]).into(),206Series::new("col2".into(), &["a", "b", "c"]).into(),207])208.unwrap();209210let df2 = DataFrame::new(vec![211Series::new("col1".into(), &[1, 2, 3]).into(),212Series::new("col2".into(), &["a", "b", "c"]).into(),213Series::new("col3".into(), &[true, false, true]).into(),214])215.unwrap();216217assert_dataframe_equal!(&df1, &df2);218}219220// Testing basic equality221#[test]222#[should_panic(expected = "value mismatch for column")]223fn test_dataframe_value_mismatch() {224let df1 = DataFrame::new(vec![225Series::new("col1".into(), &[1, 2, 3]).into(),226Series::new("col2".into(), &["a", "b", "c"]).into(),227Series::new("col3".into(), &[true, false, true]).into(),228])229.unwrap();230231let df2 = DataFrame::new(vec![232Series::new("col1".into(), &[1, 2, 3]).into(),233Series::new("col2".into(), &["a", "b", "changed"]).into(),234Series::new("col3".into(), &[true, false, true]).into(),235])236.unwrap();237238assert_dataframe_equal!(&df1, &df2);239}240241#[test]242fn test_dataframe_equal() {243let df1 = DataFrame::new(vec![244Series::new("col1".into(), &[1, 2, 3]).into(),245Series::new("col2".into(), &["a", "b", "c"]).into(),246Series::new("col3".into(), &[true, false, true]).into(),247])248.unwrap();249250let df2 = DataFrame::new(vec![251Series::new("col1".into(), &[1, 2, 3]).into(),252Series::new("col2".into(), &["a", "b", "c"]).into(),253Series::new("col3".into(), &[true, false, true]).into(),254])255.unwrap();256257assert_dataframe_equal!(&df1, &df2);258}259260#[test]261#[should_panic(expected = "value mismatch")]262fn test_dataframe_row_order_mismatch() {263let df1 = DataFrame::new(vec![264Series::new("col1".into(), &[1, 2, 3]).into(),265Series::new("col2".into(), &["a", "b", "c"]).into(),266])267.unwrap();268269let df2 = DataFrame::new(vec![270Series::new("col1".into(), &[3, 1, 2]).into(),271Series::new("col2".into(), &["c", "a", "b"]).into(),272])273.unwrap();274275assert_dataframe_equal!(&df1, &df2);276}277278#[test]279fn test_dataframe_row_order_ignored() {280let df1 = DataFrame::new(vec![281Series::new("col1".into(), &[1, 2, 3]).into(),282Series::new("col2".into(), &["a", "b", "c"]).into(),283])284.unwrap();285286let df2 = DataFrame::new(vec![287Series::new("col1".into(), &[3, 1, 2]).into(),288Series::new("col2".into(), &["c", "a", "b"]).into(),289])290.unwrap();291292let options = crate::asserts::DataFrameEqualOptions::default().with_check_row_order(false);293assert_dataframe_equal!(&df1, &df2, options);294}295296// Testing more comprehensive equality297#[test]298#[should_panic(expected = "value mismatch")]299fn test_dataframe_complex_mismatch() {300let df1 = DataFrame::new(vec![301Series::new("integers".into(), &[1, 2, 3, 4, 5]).into(),302Series::new("floats".into(), &[1.1, 2.2, 3.3, 4.4, 5.5]).into(),303Series::new("strings".into(), &["a", "b", "c", "d", "e"]).into(),304Series::new("booleans".into(), &[true, false, true, false, true]).into(),305Series::new("opt_ints".into(), &[Some(1), None, Some(3), Some(4), None]).into(),306])307.unwrap();308309let df2 = DataFrame::new(vec![310Series::new("integers".into(), &[1, 2, 99, 4, 5]).into(),311Series::new("floats".into(), &[1.1, 2.2, 3.3, 9.9, 5.5]).into(),312Series::new("strings".into(), &["a", "b", "c", "CHANGED", "e"]).into(),313Series::new("booleans".into(), &[true, false, false, false, true]).into(),314Series::new("opt_ints".into(), &[Some(1), None, Some(3), None, None]).into(),315])316.unwrap();317318assert_dataframe_equal!(&df1, &df2);319}320321#[test]322fn test_dataframe_complex_match() {323let df1 = DataFrame::new(vec![324Series::new("integers".into(), &[1, 2, 3, 4, 5]).into(),325Series::new("floats".into(), &[1.1, 2.2, 3.3, 4.4, 5.5]).into(),326Series::new("strings".into(), &["a", "b", "c", "d", "e"]).into(),327Series::new("booleans".into(), &[true, false, true, false, true]).into(),328Series::new("opt_ints".into(), &[Some(1), None, Some(3), Some(4), None]).into(),329])330.unwrap();331332let df2 = DataFrame::new(vec![333Series::new("integers".into(), &[1, 2, 3, 4, 5]).into(),334Series::new("floats".into(), &[1.1, 2.2, 3.3, 4.4, 5.5]).into(),335Series::new("strings".into(), &["a", "b", "c", "d", "e"]).into(),336Series::new("booleans".into(), &[true, false, true, false, true]).into(),337Series::new("opt_ints".into(), &[Some(1), None, Some(3), Some(4), None]).into(),338])339.unwrap();340341assert_dataframe_equal!(&df1, &df2);342}343344// Testing float value precision equality345#[test]346#[should_panic(expected = "value mismatch")]347fn test_dataframe_numeric_exact_fail() {348let df1 = DataFrame::new(vec![349Series::new("col1".into(), &[1.0000001, 2.0000002, 3.0000003]).into(),350])351.unwrap();352353let df2 =354DataFrame::new(vec![Series::new("col1".into(), &[1.0, 2.0, 3.0]).into()]).unwrap();355356let options = crate::asserts::DataFrameEqualOptions::default().with_check_exact(true);357assert_dataframe_equal!(&df1, &df2, options);358}359360#[test]361fn test_dataframe_numeric_tolerance_pass() {362let df1 = DataFrame::new(vec![363Series::new("col1".into(), &[1.0000001, 2.0000002, 3.0000003]).into(),364])365.unwrap();366367let df2 =368DataFrame::new(vec![Series::new("col1".into(), &[1.0, 2.0, 3.0]).into()]).unwrap();369370assert_dataframe_equal!(&df1, &df2);371}372373// Testing equality with special values374#[test]375fn test_empty_dataframe_equal() {376let df1 = DataFrame::default();377let df2 = DataFrame::default();378379assert_dataframe_equal!(&df1, &df2);380}381382#[test]383fn test_empty_dataframe_schema_equal() {384let df1 = DataFrame::new(vec![385Series::new("col1".into(), &Vec::<i32>::new()).into(),386Series::new("col2".into(), &Vec::<String>::new()).into(),387])388.unwrap();389390let df2 = DataFrame::new(vec![391Series::new("col1".into(), &Vec::<i32>::new()).into(),392Series::new("col2".into(), &Vec::<String>::new()).into(),393])394.unwrap();395396assert_dataframe_equal!(&df1, &df2);397}398399#[test]400#[should_panic(expected = "value mismatch")]401fn test_dataframe_single_row_mismatch() {402let df1 = DataFrame::new(vec![403Series::new("col1".into(), &[42]).into(),404Series::new("col2".into(), &["value"]).into(),405Series::new("col3".into(), &[true]).into(),406])407.unwrap();408409let df2 = DataFrame::new(vec![410Series::new("col1".into(), &[42]).into(),411Series::new("col2".into(), &["different"]).into(),412Series::new("col3".into(), &[true]).into(),413])414.unwrap();415416assert_dataframe_equal!(&df1, &df2);417}418419#[test]420fn test_dataframe_single_row_match() {421let df1 = DataFrame::new(vec![422Series::new("col1".into(), &[42]).into(),423Series::new("col2".into(), &["value"]).into(),424Series::new("col3".into(), &[true]).into(),425])426.unwrap();427428let df2 = DataFrame::new(vec![429Series::new("col1".into(), &[42]).into(),430Series::new("col2".into(), &["value"]).into(),431Series::new("col3".into(), &[true]).into(),432])433.unwrap();434435assert_dataframe_equal!(&df1, &df2);436}437438#[test]439#[should_panic(expected = "value mismatch")]440fn test_dataframe_null_values_mismatch() {441let df1 = DataFrame::new(vec![442Series::new("col1".into(), &[Some(1), None, Some(3)]).into(),443])444.unwrap();445446let df2 = DataFrame::new(vec![447Series::new("col1".into(), &[Some(1), Some(2), None]).into(),448])449.unwrap();450451assert_dataframe_equal!(&df1, &df2);452}453454#[test]455fn test_dataframe_null_values_match() {456let df1 = DataFrame::new(vec![457Series::new("col1".into(), &[Some(1), None, Some(3)]).into(),458])459.unwrap();460461let df2 = DataFrame::new(vec![462Series::new("col1".into(), &[Some(1), None, Some(3)]).into(),463])464.unwrap();465466assert_dataframe_equal!(&df1, &df2);467}468469#[test]470#[should_panic(expected = "value mismatch")]471fn test_dataframe_nan_values_mismatch() {472let df1 = DataFrame::new(vec![473Series::new("col1".into(), &[1.0, f64::NAN, 3.0]).into(),474])475.unwrap();476477let df2 = DataFrame::new(vec![478Series::new("col1".into(), &[1.0, 2.0, f64::NAN]).into(),479])480.unwrap();481482assert_dataframe_equal!(&df1, &df2);483}484485#[test]486fn test_dataframe_nan_values_match() {487let df1 = DataFrame::new(vec![488Series::new("col1".into(), &[1.0, f64::NAN, 3.0]).into(),489])490.unwrap();491492let df2 = DataFrame::new(vec![493Series::new("col1".into(), &[1.0, f64::NAN, 3.0]).into(),494])495.unwrap();496497assert_dataframe_equal!(&df1, &df2);498}499500#[test]501#[should_panic(expected = "value mismatch")]502fn test_dataframe_infinity_values_mismatch() {503let df1 = DataFrame::new(vec![504Series::new("col1".into(), &[1.0, f64::INFINITY, 3.0]).into(),505])506.unwrap();507508let df2 = DataFrame::new(vec![509Series::new("col1".into(), &[1.0, f64::NEG_INFINITY, 3.0]).into(),510])511.unwrap();512513assert_dataframe_equal!(&df1, &df2);514}515516#[test]517fn test_dataframe_infinity_values_match() {518let df1 = DataFrame::new(vec![519Series::new("col1".into(), &[1.0, f64::INFINITY, 3.0]).into(),520])521.unwrap();522523let df2 = DataFrame::new(vec![524Series::new("col1".into(), &[1.0, f64::INFINITY, 3.0]).into(),525])526.unwrap();527528assert_dataframe_equal!(&df1, &df2);529}530531// Testing categorical operations532#[test]533#[should_panic(expected = "value mismatch")]534fn test_dataframe_categorical_as_string_mismatch() {535let mut categorical1 = Series::new("categories".into(), &["a", "b", "c", "d"]);536categorical1 = categorical1537.cast(&DataType::from_categories(Categories::global()))538.unwrap();539let df1 = DataFrame::new(vec![categorical1.into()]).unwrap();540541let mut categorical2 = Series::new("categories".into(), &["a", "b", "c", "e"]);542categorical2 = categorical2543.cast(&DataType::from_categories(Categories::global()))544.unwrap();545let df2 = DataFrame::new(vec![categorical2.into()]).unwrap();546547let options =548crate::asserts::DataFrameEqualOptions::default().with_categorical_as_str(true);549assert_dataframe_equal!(&df1, &df2, options);550}551552#[test]553fn test_dataframe_categorical_as_string_match() {554let mut categorical1 = Series::new("categories".into(), &["a", "b", "c", "d"]);555categorical1 = categorical1556.cast(&DataType::from_categories(Categories::global()))557.unwrap();558let df1 = DataFrame::new(vec![categorical1.into()]).unwrap();559560let mut categorical2 = Series::new("categories".into(), &["a", "b", "c", "d"]);561categorical2 = categorical2562.cast(&DataType::from_categories(Categories::global()))563.unwrap();564let df2 = DataFrame::new(vec![categorical2.into()]).unwrap();565566let options =567crate::asserts::DataFrameEqualOptions::default().with_categorical_as_str(true);568assert_dataframe_equal!(&df1, &df2, options);569}570571// Testing nested types572#[test]573#[should_panic(expected = "value mismatch")]574fn test_dataframe_nested_values_mismatch() {575let df1 = DataFrame::new(vec![576Series::new(577"list_col".into(),578&[579Some(vec![1, 2, 3]),580Some(vec![4, 5, 6]),581None,582Some(vec![7, 8, 9]),583],584)585.into(),586])587.unwrap();588589let df2 = DataFrame::new(vec![590Series::new(591"list_col".into(),592&[593Some(vec![1, 2, 3]),594Some(vec![4, 5, 99]),595None,596Some(vec![7, 8, 9]),597],598)599.into(),600])601.unwrap();602603assert_dataframe_equal!(&df1, &df2);604}605606#[test]607fn test_dataframe_nested_values_match() {608let df1 = DataFrame::new(vec![609Series::new(610"list_col".into(),611&[Some(vec![1, 2, 3]), Some(vec![]), None, Some(vec![7, 8, 9])],612)613.into(),614])615.unwrap();616617let df2 = DataFrame::new(vec![618Series::new(619"list_col".into(),620&[Some(vec![1, 2, 3]), Some(vec![]), None, Some(vec![7, 8, 9])],621)622.into(),623])624.unwrap();625626assert_dataframe_equal!(&df1, &df2);627}628}629630631