Path: blob/main/cranelift/codegen/src/opts/div_const.rs
1693 views
//! Compute "magic numbers" for division-by-constants transformations.1//!2//! Math helpers for division by (non-power-of-2) constants. This is based3//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There4//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size5//! makes little difference, but the signed-vs-unsigned aspect has a large6//! effect. Therefore everything is presented in the order U32 U64 S32 S647//! so as to emphasise the similarity of the U32 and U64 cases and the S328//! and S64 cases.910// Structures to hold the "magic numbers" computed.1112#[derive(PartialEq, Debug)]13pub struct MU32 {14pub mul_by: u32,15pub do_add: bool,16pub shift_by: i32,17}1819#[derive(PartialEq, Debug)]20pub struct MU64 {21pub mul_by: u64,22pub do_add: bool,23pub shift_by: i32,24}2526#[derive(PartialEq, Debug)]27pub struct MS32 {28pub mul_by: i32,29pub shift_by: i32,30}3132#[derive(PartialEq, Debug)]33pub struct MS64 {34pub mul_by: i64,35pub shift_by: i32,36}3738// The actual "magic number" generators follow.3940pub fn magic_u32(d: u32) -> MU32 {41debug_assert_ne!(d, 0);42debug_assert_ne!(d, 1); // d==1 generates out of range shifts.4344let mut do_add: bool = false;45let mut p: i32 = 31;46let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;47let mut q1: u32 = 0x80000000u32 / nc;48let mut r1: u32 = 0x80000000u32 - q1 * nc;49let mut q2: u32 = 0x7FFFFFFFu32 / d;50let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;51loop {52p = p + 1;53if r1 >= nc - r1 {54q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);55r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);56} else {57q1 = u32::wrapping_mul(2, q1);58r1 = 2 * r1;59}60if r2 + 1 >= d - r2 {61if q2 >= 0x7FFFFFFFu32 {62do_add = true;63}64q2 = 2 * q2 + 1;65r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);66} else {67if q2 >= 0x80000000u32 {68do_add = true;69}70q2 = u32::wrapping_mul(2, q2);71r2 = 2 * r2 + 1;72}73let delta: u32 = d - 1 - r2;74if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {75break;76}77}7879MU32 {80mul_by: q2 + 1,81do_add,82shift_by: p - 32,83}84}8586pub fn magic_u64(d: u64) -> MU64 {87debug_assert_ne!(d, 0);88debug_assert_ne!(d, 1); // d==1 generates out of range shifts.8990let mut do_add: bool = false;91let mut p: i32 = 63;92let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;93let mut q1: u64 = 0x8000000000000000u64 / nc;94let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;95let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;96let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;97loop {98p = p + 1;99if r1 >= nc - r1 {100q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);101r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);102} else {103q1 = u64::wrapping_mul(2, q1);104r1 = 2 * r1;105}106if r2 + 1 >= d - r2 {107if q2 >= 0x7FFFFFFFFFFFFFFFu64 {108do_add = true;109}110q2 = 2 * q2 + 1;111r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);112} else {113if q2 >= 0x8000000000000000u64 {114do_add = true;115}116q2 = u64::wrapping_mul(2, q2);117r2 = 2 * r2 + 1;118}119let delta: u64 = d - 1 - r2;120if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {121break;122}123}124125MU64 {126mul_by: q2 + 1,127do_add,128shift_by: p - 64,129}130}131132pub fn magic_s32(d: i32) -> MS32 {133debug_assert_ne!(d, -1);134debug_assert_ne!(d, 0);135debug_assert_ne!(d, 1);136let two31: u32 = 0x80000000u32;137let mut p: i32 = 31;138let ad: u32 = i32::wrapping_abs(d) as u32;139let t: u32 = two31 + ((d as u32) >> 31);140let anc: u32 = u32::wrapping_sub(t - 1, t % ad);141let mut q1: u32 = two31 / anc;142let mut r1: u32 = two31 - q1 * anc;143let mut q2: u32 = two31 / ad;144let mut r2: u32 = two31 - q2 * ad;145loop {146p = p + 1;147q1 = 2 * q1;148r1 = 2 * r1;149if r1 >= anc {150q1 = q1 + 1;151r1 = r1 - anc;152}153q2 = 2 * q2;154r2 = 2 * r2;155if r2 >= ad {156q2 = q2 + 1;157r2 = r2 - ad;158}159let delta: u32 = ad - r2;160if !(q1 < delta || (q1 == delta && r1 == 0)) {161break;162}163}164165MS32 {166mul_by: (if d < 0 {167u32::wrapping_neg(q2 + 1)168} else {169q2 + 1170}) as i32,171shift_by: p - 32,172}173}174175pub fn magic_s64(d: i64) -> MS64 {176debug_assert_ne!(d, -1);177debug_assert_ne!(d, 0);178debug_assert_ne!(d, 1);179let two63: u64 = 0x8000000000000000u64;180let mut p: i32 = 63;181let ad: u64 = i64::wrapping_abs(d) as u64;182let t: u64 = two63 + ((d as u64) >> 63);183let anc: u64 = u64::wrapping_sub(t - 1, t % ad);184let mut q1: u64 = two63 / anc;185let mut r1: u64 = two63 - q1 * anc;186let mut q2: u64 = two63 / ad;187let mut r2: u64 = two63 - q2 * ad;188loop {189p = p + 1;190q1 = 2 * q1;191r1 = 2 * r1;192if r1 >= anc {193q1 = q1 + 1;194r1 = r1 - anc;195}196q2 = 2 * q2;197r2 = 2 * r2;198if r2 >= ad {199q2 = q2 + 1;200r2 = r2 - ad;201}202let delta: u64 = ad - r2;203if !(q1 < delta || (q1 == delta && r1 == 0)) {204break;205}206}207208MS64 {209mul_by: (if d < 0 {210u64::wrapping_neg(q2 + 1)211} else {212q2 + 1213}) as i64,214shift_by: p - 64,215}216}217218#[cfg(test)]219mod tests {220use super::{MS32, MS64, MU32, MU64};221use super::{magic_s32, magic_s64, magic_u32, magic_u64};222use proptest::strategy::Strategy;223224fn make_mu32(mul_by: u32, do_add: bool, shift_by: i32) -> MU32 {225MU32 {226mul_by,227do_add,228shift_by,229}230}231232fn make_mu64(mul_by: u64, do_add: bool, shift_by: i32) -> MU64 {233MU64 {234mul_by,235do_add,236shift_by,237}238}239240fn make_ms32(mul_by: i32, shift_by: i32) -> MS32 {241MS32 { mul_by, shift_by }242}243244fn make_ms64(mul_by: i64, shift_by: i32) -> MS64 {245MS64 { mul_by, shift_by }246}247248#[test]249fn test_magic_u32() {250assert_eq!(magic_u32(2u32), make_mu32(0x80000000u32, false, 0));251assert_eq!(magic_u32(3u32), make_mu32(0xaaaaaaabu32, false, 1));252assert_eq!(magic_u32(4u32), make_mu32(0x40000000u32, false, 0));253assert_eq!(magic_u32(5u32), make_mu32(0xcccccccdu32, false, 2));254assert_eq!(magic_u32(6u32), make_mu32(0xaaaaaaabu32, false, 2));255assert_eq!(magic_u32(7u32), make_mu32(0x24924925u32, true, 3));256assert_eq!(magic_u32(9u32), make_mu32(0x38e38e39u32, false, 1));257assert_eq!(magic_u32(10u32), make_mu32(0xcccccccdu32, false, 3));258assert_eq!(magic_u32(11u32), make_mu32(0xba2e8ba3u32, false, 3));259assert_eq!(magic_u32(12u32), make_mu32(0xaaaaaaabu32, false, 3));260assert_eq!(magic_u32(25u32), make_mu32(0x51eb851fu32, false, 3));261assert_eq!(magic_u32(125u32), make_mu32(0x10624dd3u32, false, 3));262assert_eq!(magic_u32(625u32), make_mu32(0xd1b71759u32, false, 9));263assert_eq!(magic_u32(1337u32), make_mu32(0x88233b2bu32, true, 11));264assert_eq!(magic_u32(65535u32), make_mu32(0x80008001u32, false, 15));265assert_eq!(magic_u32(65536u32), make_mu32(0x00010000u32, false, 0));266assert_eq!(magic_u32(65537u32), make_mu32(0xffff0001u32, false, 16));267assert_eq!(magic_u32(31415927u32), make_mu32(0x445b4553u32, false, 23));268assert_eq!(269magic_u32(0xdeadbeefu32),270make_mu32(0x93275ab3u32, false, 31)271);272assert_eq!(273magic_u32(0xfffffffdu32),274make_mu32(0x40000001u32, false, 30)275);276assert_eq!(magic_u32(0xfffffffeu32), make_mu32(0x00000003u32, true, 32));277assert_eq!(278magic_u32(0xffffffffu32),279make_mu32(0x80000001u32, false, 31)280);281}282283#[test]284fn test_magic_u64() {285assert_eq!(magic_u64(2u64), make_mu64(0x8000000000000000u64, false, 0));286assert_eq!(magic_u64(3u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 1));287assert_eq!(magic_u64(4u64), make_mu64(0x4000000000000000u64, false, 0));288assert_eq!(magic_u64(5u64), make_mu64(0xcccccccccccccccdu64, false, 2));289assert_eq!(magic_u64(6u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 2));290assert_eq!(magic_u64(7u64), make_mu64(0x2492492492492493u64, true, 3));291assert_eq!(magic_u64(9u64), make_mu64(0xe38e38e38e38e38fu64, false, 3));292assert_eq!(magic_u64(10u64), make_mu64(0xcccccccccccccccdu64, false, 3));293assert_eq!(magic_u64(11u64), make_mu64(0x2e8ba2e8ba2e8ba3u64, false, 1));294assert_eq!(magic_u64(12u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 3));295assert_eq!(magic_u64(25u64), make_mu64(0x47ae147ae147ae15u64, true, 5));296assert_eq!(magic_u64(125u64), make_mu64(0x0624dd2f1a9fbe77u64, true, 7));297assert_eq!(298magic_u64(625u64),299make_mu64(0x346dc5d63886594bu64, false, 7)300);301assert_eq!(302magic_u64(1337u64),303make_mu64(0xc4119d952866a139u64, false, 10)304);305assert_eq!(306magic_u64(31415927u64),307make_mu64(0x116d154b9c3d2f85u64, true, 25)308);309assert_eq!(310magic_u64(0x00000000deadbeefu64),311make_mu64(0x93275ab2dfc9094bu64, false, 31)312);313assert_eq!(314magic_u64(0x00000000fffffffdu64),315make_mu64(0x8000000180000005u64, false, 31)316);317assert_eq!(318magic_u64(0x00000000fffffffeu64),319make_mu64(0x0000000200000005u64, true, 32)320);321assert_eq!(322magic_u64(0x00000000ffffffffu64),323make_mu64(0x8000000080000001u64, false, 31)324);325assert_eq!(326magic_u64(0x0000000100000000u64),327make_mu64(0x0000000100000000u64, false, 0)328);329assert_eq!(330magic_u64(0x0000000100000001u64),331make_mu64(0xffffffff00000001u64, false, 32)332);333assert_eq!(334magic_u64(0x0ddc0ffeebadf00du64),335make_mu64(0x2788e9d394b77da1u64, true, 60)336);337assert_eq!(338magic_u64(0xfffffffffffffffdu64),339make_mu64(0x4000000000000001u64, false, 62)340);341assert_eq!(342magic_u64(0xfffffffffffffffeu64),343make_mu64(0x0000000000000003u64, true, 64)344);345assert_eq!(346magic_u64(0xffffffffffffffffu64),347make_mu64(0x8000000000000001u64, false, 63)348);349}350351#[test]352fn test_magic_s32() {353assert_eq!(354magic_s32(-0x80000000i32),355make_ms32(0x7fffffffu32 as i32, 30)356);357assert_eq!(358magic_s32(-0x7FFFFFFFi32),359make_ms32(0xbfffffffu32 as i32, 29)360);361assert_eq!(362magic_s32(-0x7FFFFFFEi32),363make_ms32(0x7ffffffdu32 as i32, 30)364);365assert_eq!(magic_s32(-31415927i32), make_ms32(0xbba4baadu32 as i32, 23));366assert_eq!(magic_s32(-1337i32), make_ms32(0x9df73135u32 as i32, 9));367assert_eq!(magic_s32(-256i32), make_ms32(0x7fffffffu32 as i32, 7));368assert_eq!(magic_s32(-5i32), make_ms32(0x99999999u32 as i32, 1));369assert_eq!(magic_s32(-3i32), make_ms32(0x55555555u32 as i32, 1));370assert_eq!(magic_s32(-2i32), make_ms32(0x7fffffffu32 as i32, 0));371assert_eq!(magic_s32(2i32), make_ms32(0x80000001u32 as i32, 0));372assert_eq!(magic_s32(3i32), make_ms32(0x55555556u32 as i32, 0));373assert_eq!(magic_s32(4i32), make_ms32(0x80000001u32 as i32, 1));374assert_eq!(magic_s32(5i32), make_ms32(0x66666667u32 as i32, 1));375assert_eq!(magic_s32(6i32), make_ms32(0x2aaaaaabu32 as i32, 0));376assert_eq!(magic_s32(7i32), make_ms32(0x92492493u32 as i32, 2));377assert_eq!(magic_s32(9i32), make_ms32(0x38e38e39u32 as i32, 1));378assert_eq!(magic_s32(10i32), make_ms32(0x66666667u32 as i32, 2));379assert_eq!(magic_s32(11i32), make_ms32(0x2e8ba2e9u32 as i32, 1));380assert_eq!(magic_s32(12i32), make_ms32(0x2aaaaaabu32 as i32, 1));381assert_eq!(magic_s32(25i32), make_ms32(0x51eb851fu32 as i32, 3));382assert_eq!(magic_s32(125i32), make_ms32(0x10624dd3u32 as i32, 3));383assert_eq!(magic_s32(625i32), make_ms32(0x68db8badu32 as i32, 8));384assert_eq!(magic_s32(1337i32), make_ms32(0x6208cecbu32 as i32, 9));385assert_eq!(magic_s32(31415927i32), make_ms32(0x445b4553u32 as i32, 23));386assert_eq!(387magic_s32(0x7ffffffei32),388make_ms32(0x80000003u32 as i32, 30)389);390assert_eq!(391magic_s32(0x7fffffffi32),392make_ms32(0x40000001u32 as i32, 29)393);394}395396#[test]397fn test_magic_s64() {398assert_eq!(399magic_s64(-0x8000000000000000i64),400make_ms64(0x7fffffffffffffffu64 as i64, 62)401);402assert_eq!(403magic_s64(-0x7FFFFFFFFFFFFFFFi64),404make_ms64(0xbfffffffffffffffu64 as i64, 61)405);406assert_eq!(407magic_s64(-0x7FFFFFFFFFFFFFFEi64),408make_ms64(0x7ffffffffffffffdu64 as i64, 62)409);410assert_eq!(411magic_s64(-0x0ddC0ffeeBadF00di64),412make_ms64(0x6c3b8b1635a4412fu64 as i64, 59)413);414assert_eq!(415magic_s64(-0x100000001i64),416make_ms64(0x800000007fffffffu64 as i64, 31)417);418assert_eq!(419magic_s64(-0x100000000i64),420make_ms64(0x7fffffffffffffffu64 as i64, 31)421);422assert_eq!(423magic_s64(-0xFFFFFFFFi64),424make_ms64(0x7fffffff7fffffffu64 as i64, 31)425);426assert_eq!(427magic_s64(-0xFFFFFFFEi64),428make_ms64(0x7ffffffefffffffdu64 as i64, 31)429);430assert_eq!(431magic_s64(-0xFFFFFFFDi64),432make_ms64(0x7ffffffe7ffffffbu64 as i64, 31)433);434assert_eq!(435magic_s64(-0xDeadBeefi64),436make_ms64(0x6cd8a54d2036f6b5u64 as i64, 31)437);438assert_eq!(439magic_s64(-31415927i64),440make_ms64(0x7749755a31e1683du64 as i64, 24)441);442assert_eq!(443magic_s64(-1337i64),444make_ms64(0x9df731356bccaf63u64 as i64, 9)445);446assert_eq!(447magic_s64(-256i64),448make_ms64(0x7fffffffffffffffu64 as i64, 7)449);450assert_eq!(magic_s64(-5i64), make_ms64(0x9999999999999999u64 as i64, 1));451assert_eq!(magic_s64(-3i64), make_ms64(0x5555555555555555u64 as i64, 1));452assert_eq!(magic_s64(-2i64), make_ms64(0x7fffffffffffffffu64 as i64, 0));453assert_eq!(magic_s64(2i64), make_ms64(0x8000000000000001u64 as i64, 0));454assert_eq!(magic_s64(3i64), make_ms64(0x5555555555555556u64 as i64, 0));455assert_eq!(magic_s64(4i64), make_ms64(0x8000000000000001u64 as i64, 1));456assert_eq!(magic_s64(5i64), make_ms64(0x6666666666666667u64 as i64, 1));457assert_eq!(magic_s64(6i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 0));458assert_eq!(magic_s64(7i64), make_ms64(0x4924924924924925u64 as i64, 1));459assert_eq!(magic_s64(9i64), make_ms64(0x1c71c71c71c71c72u64 as i64, 0));460assert_eq!(magic_s64(10i64), make_ms64(0x6666666666666667u64 as i64, 2));461assert_eq!(magic_s64(11i64), make_ms64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));462assert_eq!(magic_s64(12i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 1));463assert_eq!(magic_s64(25i64), make_ms64(0xa3d70a3d70a3d70bu64 as i64, 4));464assert_eq!(465magic_s64(125i64),466make_ms64(0x20c49ba5e353f7cfu64 as i64, 4)467);468assert_eq!(469magic_s64(625i64),470make_ms64(0x346dc5d63886594bu64 as i64, 7)471);472assert_eq!(473magic_s64(1337i64),474make_ms64(0x6208ceca9433509du64 as i64, 9)475);476assert_eq!(477magic_s64(31415927i64),478make_ms64(0x88b68aa5ce1e97c3u64 as i64, 24)479);480assert_eq!(481magic_s64(0x00000000deadbeefi64),482make_ms64(0x93275ab2dfc9094bu64 as i64, 31)483);484assert_eq!(485magic_s64(0x00000000fffffffdi64),486make_ms64(0x8000000180000005u64 as i64, 31)487);488assert_eq!(489magic_s64(0x00000000fffffffei64),490make_ms64(0x8000000100000003u64 as i64, 31)491);492assert_eq!(493magic_s64(0x00000000ffffffffi64),494make_ms64(0x8000000080000001u64 as i64, 31)495);496assert_eq!(497magic_s64(0x0000000100000000i64),498make_ms64(0x8000000000000001u64 as i64, 31)499);500assert_eq!(501magic_s64(0x0000000100000001i64),502make_ms64(0x7fffffff80000001u64 as i64, 31)503);504assert_eq!(505magic_s64(0x0ddc0ffeebadf00di64),506make_ms64(0x93c474e9ca5bbed1u64 as i64, 59)507);508assert_eq!(509magic_s64(0x7ffffffffffffffdi64),510make_ms64(0x2000000000000001u64 as i64, 60)511);512assert_eq!(513magic_s64(0x7ffffffffffffffei64),514make_ms64(0x8000000000000003u64 as i64, 62)515);516assert_eq!(517magic_s64(0x7fffffffffffffffi64),518make_ms64(0x4000000000000001u64 as i64, 61)519);520}521522#[test]523fn test_magic_generators_dont_panic() {524// The point of this is to check that the magic number generators525// don't panic with integer wraparounds, especially at boundary cases526// for their arguments. The actual results are thrown away, although527// we force `total` to be used, so that rustc can't optimise the528// entire computation away.529530// Testing UP magic_u32531let mut total: u64 = 0;532for x in 2..(200 * 1000u32) {533let m = magic_u32(x);534total = total ^ (m.mul_by as u64);535total = total + (m.shift_by as u64);536total = total + (if m.do_add { 123 } else { 456 });537}538assert_eq!(total, 2481999609);539540total = 0;541// Testing MIDPOINT magic_u32542for x in 0x8000_0000u32 - 10 * 1000u32..0x8000_0000u32 + 10 * 1000u32 {543let m = magic_u32(x);544total = total ^ (m.mul_by as u64);545total = total + (m.shift_by as u64);546total = total + (if m.do_add { 123 } else { 456 });547}548assert_eq!(total, 2399809723);549550total = 0;551// Testing DOWN magic_u32552for x in 0..(200 * 1000u32) {553let m = magic_u32(0xFFFF_FFFFu32 - x);554total = total ^ (m.mul_by as u64);555total = total + (m.shift_by as u64);556total = total + (if m.do_add { 123 } else { 456 });557}558assert_eq!(total, 271138267);559560// Testing UP magic_u64561total = 0;562for x in 2..(200 * 1000u64) {563let m = magic_u64(x);564total = total ^ m.mul_by;565total = total + (m.shift_by as u64);566total = total + (if m.do_add { 123 } else { 456 });567}568assert_eq!(total, 7430004086976261161);569570total = 0;571// Testing MIDPOINT magic_u64572for x in 0x8000_0000_0000_0000u64 - 10 * 1000u64..0x8000_0000_0000_0000u64 + 10 * 1000u64 {573let m = magic_u64(x);574total = total ^ m.mul_by;575total = total + (m.shift_by as u64);576total = total + (if m.do_add { 123 } else { 456 });577}578assert_eq!(total, 10312117246769520603);579580// Testing DOWN magic_u64581total = 0;582for x in 0..(200 * 1000u64) {583let m = magic_u64(0xFFFF_FFFF_FFFF_FFFFu64 - x);584total = total ^ m.mul_by;585total = total + (m.shift_by as u64);586total = total + (if m.do_add { 123 } else { 456 });587}588assert_eq!(total, 1126603594357269734);589590// Testing UP magic_s32591total = 0;592for x in 0..(200 * 1000i32) {593let m = magic_s32(-0x8000_0000i32 + x);594total = total ^ (m.mul_by as u64);595total = total + (m.shift_by as u64);596}597assert_eq!(total, 18446744069953376812);598599total = 0;600// Testing MIDPOINT magic_s32601for x in 0..(200 * 1000i32) {602let x2 = -100 * 1000i32 + x;603if x2 != -1 && x2 != 0 && x2 != 1 {604let m = magic_s32(x2);605total = total ^ (m.mul_by as u64);606total = total + (m.shift_by as u64);607}608}609assert_eq!(total, 351839350);610611// Testing DOWN magic_s32612total = 0;613for x in 0..(200 * 1000i32) {614let m = magic_s32(0x7FFF_FFFFi32 - x);615total = total ^ (m.mul_by as u64);616total = total + (m.shift_by as u64);617}618assert_eq!(total, 18446744072916880714);619620// Testing UP magic_s64621total = 0;622for x in 0..(200 * 1000i64) {623let m = magic_s64(-0x8000_0000_0000_0000i64 + x);624total = total ^ (m.mul_by as u64);625total = total + (m.shift_by as u64);626}627assert_eq!(total, 17929885647724831014);628629total = 0;630// Testing MIDPOINT magic_s64631for x in 0..(200 * 1000i64) {632let x2 = -100 * 1000i64 + x;633if x2 != -1 && x2 != 0 && x2 != 1 {634let m = magic_s64(x2);635total = total ^ (m.mul_by as u64);636total = total + (m.shift_by as u64);637}638}639assert_eq!(total, 18106042338125661964);640641// Testing DOWN magic_s64642total = 0;643for x in 0..(200 * 1000i64) {644let m = magic_s64(0x7FFF_FFFF_FFFF_FFFFi64 - x);645total = total ^ (m.mul_by as u64);646total = total + (m.shift_by as u64);647}648assert_eq!(total, 563301797155560970);649}650651// These generate reference results for signed/unsigned 32/64 bit652// division, rounding towards zero.653fn div_u32(x: u32, y: u32) -> u32 {654return x / y;655}656fn div_s32(x: i32, y: i32) -> i32 {657return x / y;658}659fn div_u64(x: u64, y: u64) -> u64 {660return x / y;661}662fn div_s64(x: i64, y: i64) -> i64 {663return x / y;664}665666// Returns the high half of a 32 bit unsigned widening multiply.667fn mulhw_u32(x: u32, y: u32) -> u32 {668let x64: u64 = x as u64;669let y64: u64 = y as u64;670let r64: u64 = x64 * y64;671(r64 >> 32) as u32672}673674// Returns the high half of a 32 bit signed widening multiply.675fn mulhw_s32(x: i32, y: i32) -> i32 {676let x64: i64 = x as i64;677let y64: i64 = y as i64;678let r64: i64 = x64 * y64;679(r64 >> 32) as i32680}681682// Returns the high half of a 64 bit unsigned widening multiply.683fn mulhw_u64(x: u64, y: u64) -> u64 {684let x128 = x as u128;685let y128 = y as u128;686let r128 = x128 * y128;687(r128 >> 64) as u64688}689690// Returns the high half of a 64 bit signed widening multiply.691fn mulhw_s64(x: i64, y: i64) -> i64 {692let x128 = x as i128;693let y128 = y as i128;694let r128 = x128 * y128;695(r128 >> 64) as i64696}697698/// Compute and check `q = n / d` using `magic`.699fn eval_magic_u32_div(n: u32, magic: &MU32) -> u32 {700let mut q = mulhw_u32(n, magic.mul_by);701if magic.do_add {702assert!(magic.shift_by >= 1 && magic.shift_by <= 32);703let mut t = n - q;704t >>= 1;705t = t + q;706q = t >> (magic.shift_by - 1);707} else {708assert!(magic.shift_by >= 0 && magic.shift_by <= 31);709q >>= magic.shift_by;710}711q712}713714/// Compute and check `r = n % d` using `magic`.715fn eval_magic_u32_rem(n: u32, d: u32, magic: &MU32) -> u32 {716let mut q = mulhw_u32(n, magic.mul_by);717if magic.do_add {718assert!(magic.shift_by >= 1 && magic.shift_by <= 32);719let mut t = n - q;720t >>= 1;721t = t + q;722q = t >> (magic.shift_by - 1);723} else {724assert!(magic.shift_by >= 0 && magic.shift_by <= 31);725q >>= magic.shift_by;726}727let tt = q.wrapping_mul(d);728n.wrapping_sub(tt)729}730731/// Compute and check `q = n / d` using `magic`.732fn eval_magic_u64_div(n: u64, magic: &MU64) -> u64 {733let mut q = mulhw_u64(n, magic.mul_by);734if magic.do_add {735assert!(magic.shift_by >= 1 && magic.shift_by <= 64);736let mut t = n - q;737t >>= 1;738t = t + q;739q = t >> (magic.shift_by - 1);740} else {741assert!(magic.shift_by >= 0 && magic.shift_by <= 63);742q >>= magic.shift_by;743}744q745}746747/// Compute and check `r = n % d` using `magic`.748fn eval_magic_u64_rem(n: u64, d: u64, magic: &MU64) -> u64 {749let mut q = mulhw_u64(n, magic.mul_by);750if magic.do_add {751assert!(magic.shift_by >= 1 && magic.shift_by <= 64);752let mut t = n - q;753t >>= 1;754t = t + q;755q = t >> (magic.shift_by - 1);756} else {757assert!(magic.shift_by >= 0 && magic.shift_by <= 63);758q >>= magic.shift_by;759}760let tt = q.wrapping_mul(d);761n.wrapping_sub(tt)762}763764/// Compute and check `q = n / d` using `magic`.765fn eval_magic_s32_div(n: i32, d: i32, magic: &MS32) -> i32 {766let mut q: i32 = mulhw_s32(n, magic.mul_by);767if d > 0 && magic.mul_by < 0 {768q = q + n;769} else if d < 0 && magic.mul_by > 0 {770q = q - n;771}772assert!(magic.shift_by >= 0 && magic.shift_by <= 31);773q = q >> magic.shift_by;774let mut t: u32 = q as u32;775t = t >> 31;776q = q + (t as i32);777q778}779780/// Compute and check `r = n % d` using `magic`.781fn eval_magic_s32_rem(n: i32, d: i32, magic: &MS32) -> i32 {782let mut q: i32 = mulhw_s32(n, magic.mul_by);783if d > 0 && magic.mul_by < 0 {784q = q + n;785} else if d < 0 && magic.mul_by > 0 {786q = q - n;787}788assert!(magic.shift_by >= 0 && magic.shift_by <= 31);789q = q >> magic.shift_by;790let mut t: u32 = q as u32;791t = t >> 31;792q = q + (t as i32);793let tt = q.wrapping_mul(d);794n.wrapping_sub(tt)795}796797/// Compute and check `q = n / d` using `magic`. */798fn eval_magic_s64_div(n: i64, d: i64, magic: &MS64) -> i64 {799let mut q: i64 = mulhw_s64(n, magic.mul_by);800if d > 0 && magic.mul_by < 0 {801q = q + n;802} else if d < 0 && magic.mul_by > 0 {803q = q - n;804}805assert!(magic.shift_by >= 0 && magic.shift_by <= 63);806q = q >> magic.shift_by;807let mut t: u64 = q as u64;808t = t >> 63;809q = q + (t as i64);810q811}812813/// Compute and check `r = n % d` using `magic`.814fn eval_magic_s64_rem(n: i64, d: i64, magic: &MS64) -> i64 {815let mut q: i64 = mulhw_s64(n, magic.mul_by);816if d > 0 && magic.mul_by < 0 {817q = q + n;818} else if d < 0 && magic.mul_by > 0 {819q = q - n;820}821assert!(magic.shift_by >= 0 && magic.shift_by <= 63);822q = q >> magic.shift_by;823let mut t: u64 = q as u64;824t = t >> 63;825q = q + (t as i64);826let tt = q.wrapping_mul(d);827n.wrapping_sub(tt)828}829830#[test]831fn test_magic_generators_give_correct_numbers() {832// For a variety of values for both `n` and `d`, compute the magic833// numbers for `d`, and in effect interpret them so as to compute834// `n / d`. Check that that equals the value of `n / d` computed835// directly by the hardware. This serves to check that the magic836// number generates work properly. In total, 50,148,000 tests are837// done.838839// Some constants840const MIN_U32: u32 = 0;841const MAX_U32: u32 = 0xFFFF_FFFFu32;842const MAX_U32_HALF: u32 = 0x8000_0000u32; // more or less843844const MIN_S32: i32 = 0x8000_0000u32 as i32;845const MAX_S32: i32 = 0x7FFF_FFFFu32 as i32;846847const MIN_U64: u64 = 0;848const MAX_U64: u64 = 0xFFFF_FFFF_FFFF_FFFFu64;849const MAX_U64_HALF: u64 = 0x8000_0000_0000_0000u64; // ditto850851const MIN_S64: i64 = 0x8000_0000_0000_0000u64 as i64;852const MAX_S64: i64 = 0x7FFF_FFFF_FFFF_FFFFu64 as i64;853854// Compute the magic numbers for `d` and then use them to compute and855// check `n / d` for around 1000 values of `n`, using unsigned 32-bit856// division.857fn test_magic_u32_inner(d: u32, n_tests_done: &mut i32) {858// Advance the numerator (the `n` in `n / d`) so as to test859// densely near the range ends (and, in the signed variants, near860// zero) but not so densely away from those regions.861fn advance_n_u32(x: u32) -> u32 {862if x < MIN_U32 + 110 {863return x + 1;864}865if x < MIN_U32 + 1700 {866return x + 23;867}868if x < MAX_U32 - 1700 {869let xd: f64 = (x as f64) * 1.06415927;870return if xd >= (MAX_U32 - 1700) as f64 {871MAX_U32 - 1700872} else {873xd as u32874};875}876if x < MAX_U32 - 110 {877return x + 23;878}879u32::wrapping_add(x, 1)880}881882let magic: MU32 = magic_u32(d);883let mut n: u32 = MIN_U32;884loop {885*n_tests_done += 1;886let q = eval_magic_u32_div(n, &magic);887assert_eq!(q, div_u32(n, d));888889n = advance_n_u32(n);890if n == MIN_U32 {891break;892}893}894}895896// Compute the magic numbers for `d` and then use them to compute and897// check `n / d` for around 1000 values of `n`, using signed 32-bit898// division.899fn test_magic_s32_inner(d: i32, n_tests_done: &mut i32) {900// See comment on advance_n_u32 above.901fn advance_n_s32(x: i32) -> i32 {902if x >= 0 && x <= 29 {903return x + 1;904}905if x < MIN_S32 + 110 {906return x + 1;907}908if x < MIN_S32 + 1700 {909return x + 23;910}911if x < MAX_S32 - 1700 {912let mut xd: f64 = x as f64;913xd = if xd < 0.0 {914xd / 1.06415927915} else {916xd * 1.06415927917};918return if xd >= (MAX_S32 - 1700) as f64 {919MAX_S32 - 1700920} else {921xd as i32922};923}924if x < MAX_S32 - 110 {925return x + 23;926}927if x == MAX_S32 {928return MIN_S32;929}930x + 1931}932933let magic: MS32 = magic_s32(d);934let mut n: i32 = MIN_S32;935loop {936*n_tests_done += 1;937let q = eval_magic_s32_div(n, d, &magic);938assert_eq!(q, div_s32(n, d));939940n = advance_n_s32(n);941if n == MIN_S32 {942break;943}944}945}946947// Compute the magic numbers for `d` and then use them to compute and948// check `n / d` for around 1000 values of `n`, using unsigned 64-bit949// division.950fn test_magic_u64_inner(d: u64, n_tests_done: &mut i32) {951// See comment on advance_n_u32 above.952fn advance_n_u64(x: u64) -> u64 {953if x < MIN_U64 + 110 {954return x + 1;955}956if x < MIN_U64 + 1700 {957return x + 23;958}959if x < MAX_U64 - 1700 {960let xd: f64 = (x as f64) * 1.06415927;961return if xd >= (MAX_U64 - 1700) as f64 {962MAX_U64 - 1700963} else {964xd as u64965};966}967if x < MAX_U64 - 110 {968return x + 23;969}970u64::wrapping_add(x, 1)971}972973let magic: MU64 = magic_u64(d);974let mut n: u64 = MIN_U64;975loop {976*n_tests_done += 1;977let q = eval_magic_u64_div(n, &magic);978assert_eq!(q, div_u64(n, d));979980n = advance_n_u64(n);981if n == MIN_U64 {982break;983}984}985}986987// Compute the magic numbers for `d` and then use them to compute and988// check `n / d` for around 1000 values of `n`, using signed 64-bit989// division.990fn test_magic_s64_inner(d: i64, n_tests_done: &mut i32) {991// See comment on advance_n_u32 above.992fn advance_n_s64(x: i64) -> i64 {993if x >= 0 && x <= 29 {994return x + 1;995}996if x < MIN_S64 + 110 {997return x + 1;998}999if x < MIN_S64 + 1700 {1000return x + 23;1001}1002if x < MAX_S64 - 1700 {1003let mut xd: f64 = x as f64;1004xd = if xd < 0.0 {1005xd / 1.064159271006} else {1007xd * 1.064159271008};1009return if xd >= (MAX_S64 - 1700) as f64 {1010MAX_S64 - 17001011} else {1012xd as i641013};1014}1015if x < MAX_S64 - 110 {1016return x + 23;1017}1018if x == MAX_S64 {1019return MIN_S64;1020}1021x + 11022}10231024let magic: MS64 = magic_s64(d);1025let mut n: i64 = MIN_S64;1026loop {1027*n_tests_done += 1;1028let q = eval_magic_s64_div(n, d, &magic);1029assert_eq!(q, div_s64(n, d));10301031n = advance_n_s64(n);1032if n == MIN_S64 {1033break;1034}1035}1036}10371038// Using all the above support machinery, actually run the tests.10391040let mut n_tests_done: i32 = 0;10411042// u32 division tests1043{1044// 2 .. 3k1045let mut d: u32 = 2;1046for _ in 0..3 * 1000 {1047test_magic_u32_inner(d, &mut n_tests_done);1048d += 1;1049}10501051// across the midpoint: midpoint - 3k .. midpoint + 3k1052d = MAX_U32_HALF - 3 * 1000;1053for _ in 0..2 * 3 * 1000 {1054test_magic_u32_inner(d, &mut n_tests_done);1055d += 1;1056}10571058// MAX_U32 - 3k .. MAX_U32 (in reverse order)1059d = MAX_U32;1060for _ in 0..3 * 1000 {1061test_magic_u32_inner(d, &mut n_tests_done);1062d -= 1;1063}1064}10651066// s32 division tests1067{1068// MIN_S32 .. MIN_S32 + 3k1069let mut d: i32 = MIN_S32;1070for _ in 0..3 * 1000 {1071test_magic_s32_inner(d, &mut n_tests_done);1072d += 1;1073}10741075// -3k .. -2 (in reverse order)1076d = -2;1077for _ in 0..3 * 1000 {1078test_magic_s32_inner(d, &mut n_tests_done);1079d -= 1;1080}10811082// 2 .. 3k1083d = 2;1084for _ in 0..3 * 1000 {1085test_magic_s32_inner(d, &mut n_tests_done);1086d += 1;1087}10881089// MAX_S32 - 3k .. MAX_S32 (in reverse order)1090d = MAX_S32;1091for _ in 0..3 * 1000 {1092test_magic_s32_inner(d, &mut n_tests_done);1093d -= 1;1094}1095}10961097// u64 division tests1098{1099// 2 .. 3k1100let mut d: u64 = 2;1101for _ in 0..3 * 1000 {1102test_magic_u64_inner(d, &mut n_tests_done);1103d += 1;1104}11051106// across the midpoint: midpoint - 3k .. midpoint + 3k1107d = MAX_U64_HALF - 3 * 1000;1108for _ in 0..2 * 3 * 1000 {1109test_magic_u64_inner(d, &mut n_tests_done);1110d += 1;1111}11121113// mAX_U64 - 3000 .. mAX_U64 (in reverse order)1114d = MAX_U64;1115for _ in 0..3 * 1000 {1116test_magic_u64_inner(d, &mut n_tests_done);1117d -= 1;1118}1119}11201121// s64 division tests1122{1123// MIN_S64 .. MIN_S64 + 3k1124let mut d: i64 = MIN_S64;1125for _ in 0..3 * 1000 {1126test_magic_s64_inner(d, &mut n_tests_done);1127d += 1;1128}11291130// -3k .. -2 (in reverse order)1131d = -2;1132for _ in 0..3 * 1000 {1133test_magic_s64_inner(d, &mut n_tests_done);1134d -= 1;1135}11361137// 2 .. 3k1138d = 2;1139for _ in 0..3 * 1000 {1140test_magic_s64_inner(d, &mut n_tests_done);1141d += 1;1142}11431144// MAX_S64 - 3k .. MAX_S64 (in reverse order)1145d = MAX_S64;1146for _ in 0..3 * 1000 {1147test_magic_s64_inner(d, &mut n_tests_done);1148d -= 1;1149}1150}1151assert_eq!(n_tests_done, 50_148_000);1152}11531154proptest::proptest! {1155#[test]1156fn proptest_magic_u32(numerator in 0..u32::MAX, divisor in 1..u32::MAX) {1157let expected_div = numerator.wrapping_div(divisor);1158let expected_rem = numerator.wrapping_rem(divisor);11591160let magic = magic_u32(divisor);1161let actual_div = eval_magic_u32_div(numerator, &magic);1162let actual_rem = eval_magic_u32_rem(numerator, divisor, &magic);11631164assert_eq!(expected_div, actual_div);1165assert_eq!(expected_rem, actual_rem);1166}11671168#[test]1169fn proptest_magic_u64(numerator in 0..u64::MAX, divisor in 1..u64::MAX) {1170let expected_div = numerator.wrapping_div(divisor);1171let expected_rem = numerator.wrapping_rem(divisor);11721173let magic = magic_u64(divisor);1174let actual_div = eval_magic_u64_div(numerator, &magic);1175let actual_rem = eval_magic_u64_rem(numerator, divisor, &magic);11761177assert_eq!(expected_div, actual_div);1178assert_eq!(expected_rem, actual_rem);1179}11801181#[test]1182fn proptest_magic_s32(1183numerator in i32::MIN..i32::MAX,1184divisor in (i32::MIN..i32::MAX).prop_filter("avoid divide-by-zero", |d| *d != 0),1185) {1186let expected_div = numerator.wrapping_div(divisor);1187let expected_rem = numerator.wrapping_rem(divisor);11881189let magic = magic_s32(divisor);1190let actual_div = eval_magic_s32_div(numerator, divisor, &magic);1191let actual_rem = eval_magic_s32_rem(numerator, divisor, &magic);11921193assert_eq!(expected_div, actual_div);1194assert_eq!(expected_rem, actual_rem);1195}11961197#[test]1198fn proptest_magic_s64(1199numerator in i64::MIN..i64::MAX,1200divisor in (i64::MIN..i64::MAX).prop_filter("avoid divide-by-zero", |d| *d != 0),1201) {1202let expected_div = numerator.wrapping_div(divisor);1203let expected_rem = numerator.wrapping_rem(divisor);12041205let magic = magic_s64(divisor);1206let actual_div = eval_magic_s64_div(numerator, divisor, &magic);1207let actual_rem = eval_magic_s64_rem(numerator, divisor, &magic);12081209assert_eq!(expected_div, actual_div);1210assert_eq!(expected_rem, actual_rem);1211}1212}1213}121412151216