Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/opts/div_const.rs
1693 views
1
//! Compute "magic numbers" for division-by-constants transformations.
2
//!
3
//! Math helpers for division by (non-power-of-2) constants. This is based
4
//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
5
//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
6
//! makes little difference, but the signed-vs-unsigned aspect has a large
7
//! effect. Therefore everything is presented in the order U32 U64 S32 S64
8
//! so as to emphasise the similarity of the U32 and U64 cases and the S32
9
//! and S64 cases.
10
11
// Structures to hold the "magic numbers" computed.
12
13
#[derive(PartialEq, Debug)]
14
pub struct MU32 {
15
pub mul_by: u32,
16
pub do_add: bool,
17
pub shift_by: i32,
18
}
19
20
#[derive(PartialEq, Debug)]
21
pub struct MU64 {
22
pub mul_by: u64,
23
pub do_add: bool,
24
pub shift_by: i32,
25
}
26
27
#[derive(PartialEq, Debug)]
28
pub struct MS32 {
29
pub mul_by: i32,
30
pub shift_by: i32,
31
}
32
33
#[derive(PartialEq, Debug)]
34
pub struct MS64 {
35
pub mul_by: i64,
36
pub shift_by: i32,
37
}
38
39
// The actual "magic number" generators follow.
40
41
pub fn magic_u32(d: u32) -> MU32 {
42
debug_assert_ne!(d, 0);
43
debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
44
45
let mut do_add: bool = false;
46
let mut p: i32 = 31;
47
let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
48
let mut q1: u32 = 0x80000000u32 / nc;
49
let mut r1: u32 = 0x80000000u32 - q1 * nc;
50
let mut q2: u32 = 0x7FFFFFFFu32 / d;
51
let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
52
loop {
53
p = p + 1;
54
if r1 >= nc - r1 {
55
q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
56
r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
57
} else {
58
q1 = u32::wrapping_mul(2, q1);
59
r1 = 2 * r1;
60
}
61
if r2 + 1 >= d - r2 {
62
if q2 >= 0x7FFFFFFFu32 {
63
do_add = true;
64
}
65
q2 = 2 * q2 + 1;
66
r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
67
} else {
68
if q2 >= 0x80000000u32 {
69
do_add = true;
70
}
71
q2 = u32::wrapping_mul(2, q2);
72
r2 = 2 * r2 + 1;
73
}
74
let delta: u32 = d - 1 - r2;
75
if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
76
break;
77
}
78
}
79
80
MU32 {
81
mul_by: q2 + 1,
82
do_add,
83
shift_by: p - 32,
84
}
85
}
86
87
pub fn magic_u64(d: u64) -> MU64 {
88
debug_assert_ne!(d, 0);
89
debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
90
91
let mut do_add: bool = false;
92
let mut p: i32 = 63;
93
let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
94
let mut q1: u64 = 0x8000000000000000u64 / nc;
95
let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
96
let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
97
let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
98
loop {
99
p = p + 1;
100
if r1 >= nc - r1 {
101
q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
102
r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
103
} else {
104
q1 = u64::wrapping_mul(2, q1);
105
r1 = 2 * r1;
106
}
107
if r2 + 1 >= d - r2 {
108
if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
109
do_add = true;
110
}
111
q2 = 2 * q2 + 1;
112
r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
113
} else {
114
if q2 >= 0x8000000000000000u64 {
115
do_add = true;
116
}
117
q2 = u64::wrapping_mul(2, q2);
118
r2 = 2 * r2 + 1;
119
}
120
let delta: u64 = d - 1 - r2;
121
if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
122
break;
123
}
124
}
125
126
MU64 {
127
mul_by: q2 + 1,
128
do_add,
129
shift_by: p - 64,
130
}
131
}
132
133
pub fn magic_s32(d: i32) -> MS32 {
134
debug_assert_ne!(d, -1);
135
debug_assert_ne!(d, 0);
136
debug_assert_ne!(d, 1);
137
let two31: u32 = 0x80000000u32;
138
let mut p: i32 = 31;
139
let ad: u32 = i32::wrapping_abs(d) as u32;
140
let t: u32 = two31 + ((d as u32) >> 31);
141
let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
142
let mut q1: u32 = two31 / anc;
143
let mut r1: u32 = two31 - q1 * anc;
144
let mut q2: u32 = two31 / ad;
145
let mut r2: u32 = two31 - q2 * ad;
146
loop {
147
p = p + 1;
148
q1 = 2 * q1;
149
r1 = 2 * r1;
150
if r1 >= anc {
151
q1 = q1 + 1;
152
r1 = r1 - anc;
153
}
154
q2 = 2 * q2;
155
r2 = 2 * r2;
156
if r2 >= ad {
157
q2 = q2 + 1;
158
r2 = r2 - ad;
159
}
160
let delta: u32 = ad - r2;
161
if !(q1 < delta || (q1 == delta && r1 == 0)) {
162
break;
163
}
164
}
165
166
MS32 {
167
mul_by: (if d < 0 {
168
u32::wrapping_neg(q2 + 1)
169
} else {
170
q2 + 1
171
}) as i32,
172
shift_by: p - 32,
173
}
174
}
175
176
pub fn magic_s64(d: i64) -> MS64 {
177
debug_assert_ne!(d, -1);
178
debug_assert_ne!(d, 0);
179
debug_assert_ne!(d, 1);
180
let two63: u64 = 0x8000000000000000u64;
181
let mut p: i32 = 63;
182
let ad: u64 = i64::wrapping_abs(d) as u64;
183
let t: u64 = two63 + ((d as u64) >> 63);
184
let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
185
let mut q1: u64 = two63 / anc;
186
let mut r1: u64 = two63 - q1 * anc;
187
let mut q2: u64 = two63 / ad;
188
let mut r2: u64 = two63 - q2 * ad;
189
loop {
190
p = p + 1;
191
q1 = 2 * q1;
192
r1 = 2 * r1;
193
if r1 >= anc {
194
q1 = q1 + 1;
195
r1 = r1 - anc;
196
}
197
q2 = 2 * q2;
198
r2 = 2 * r2;
199
if r2 >= ad {
200
q2 = q2 + 1;
201
r2 = r2 - ad;
202
}
203
let delta: u64 = ad - r2;
204
if !(q1 < delta || (q1 == delta && r1 == 0)) {
205
break;
206
}
207
}
208
209
MS64 {
210
mul_by: (if d < 0 {
211
u64::wrapping_neg(q2 + 1)
212
} else {
213
q2 + 1
214
}) as i64,
215
shift_by: p - 64,
216
}
217
}
218
219
#[cfg(test)]
220
mod tests {
221
use super::{MS32, MS64, MU32, MU64};
222
use super::{magic_s32, magic_s64, magic_u32, magic_u64};
223
use proptest::strategy::Strategy;
224
225
fn make_mu32(mul_by: u32, do_add: bool, shift_by: i32) -> MU32 {
226
MU32 {
227
mul_by,
228
do_add,
229
shift_by,
230
}
231
}
232
233
fn make_mu64(mul_by: u64, do_add: bool, shift_by: i32) -> MU64 {
234
MU64 {
235
mul_by,
236
do_add,
237
shift_by,
238
}
239
}
240
241
fn make_ms32(mul_by: i32, shift_by: i32) -> MS32 {
242
MS32 { mul_by, shift_by }
243
}
244
245
fn make_ms64(mul_by: i64, shift_by: i32) -> MS64 {
246
MS64 { mul_by, shift_by }
247
}
248
249
#[test]
250
fn test_magic_u32() {
251
assert_eq!(magic_u32(2u32), make_mu32(0x80000000u32, false, 0));
252
assert_eq!(magic_u32(3u32), make_mu32(0xaaaaaaabu32, false, 1));
253
assert_eq!(magic_u32(4u32), make_mu32(0x40000000u32, false, 0));
254
assert_eq!(magic_u32(5u32), make_mu32(0xcccccccdu32, false, 2));
255
assert_eq!(magic_u32(6u32), make_mu32(0xaaaaaaabu32, false, 2));
256
assert_eq!(magic_u32(7u32), make_mu32(0x24924925u32, true, 3));
257
assert_eq!(magic_u32(9u32), make_mu32(0x38e38e39u32, false, 1));
258
assert_eq!(magic_u32(10u32), make_mu32(0xcccccccdu32, false, 3));
259
assert_eq!(magic_u32(11u32), make_mu32(0xba2e8ba3u32, false, 3));
260
assert_eq!(magic_u32(12u32), make_mu32(0xaaaaaaabu32, false, 3));
261
assert_eq!(magic_u32(25u32), make_mu32(0x51eb851fu32, false, 3));
262
assert_eq!(magic_u32(125u32), make_mu32(0x10624dd3u32, false, 3));
263
assert_eq!(magic_u32(625u32), make_mu32(0xd1b71759u32, false, 9));
264
assert_eq!(magic_u32(1337u32), make_mu32(0x88233b2bu32, true, 11));
265
assert_eq!(magic_u32(65535u32), make_mu32(0x80008001u32, false, 15));
266
assert_eq!(magic_u32(65536u32), make_mu32(0x00010000u32, false, 0));
267
assert_eq!(magic_u32(65537u32), make_mu32(0xffff0001u32, false, 16));
268
assert_eq!(magic_u32(31415927u32), make_mu32(0x445b4553u32, false, 23));
269
assert_eq!(
270
magic_u32(0xdeadbeefu32),
271
make_mu32(0x93275ab3u32, false, 31)
272
);
273
assert_eq!(
274
magic_u32(0xfffffffdu32),
275
make_mu32(0x40000001u32, false, 30)
276
);
277
assert_eq!(magic_u32(0xfffffffeu32), make_mu32(0x00000003u32, true, 32));
278
assert_eq!(
279
magic_u32(0xffffffffu32),
280
make_mu32(0x80000001u32, false, 31)
281
);
282
}
283
284
#[test]
285
fn test_magic_u64() {
286
assert_eq!(magic_u64(2u64), make_mu64(0x8000000000000000u64, false, 0));
287
assert_eq!(magic_u64(3u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 1));
288
assert_eq!(magic_u64(4u64), make_mu64(0x4000000000000000u64, false, 0));
289
assert_eq!(magic_u64(5u64), make_mu64(0xcccccccccccccccdu64, false, 2));
290
assert_eq!(magic_u64(6u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 2));
291
assert_eq!(magic_u64(7u64), make_mu64(0x2492492492492493u64, true, 3));
292
assert_eq!(magic_u64(9u64), make_mu64(0xe38e38e38e38e38fu64, false, 3));
293
assert_eq!(magic_u64(10u64), make_mu64(0xcccccccccccccccdu64, false, 3));
294
assert_eq!(magic_u64(11u64), make_mu64(0x2e8ba2e8ba2e8ba3u64, false, 1));
295
assert_eq!(magic_u64(12u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 3));
296
assert_eq!(magic_u64(25u64), make_mu64(0x47ae147ae147ae15u64, true, 5));
297
assert_eq!(magic_u64(125u64), make_mu64(0x0624dd2f1a9fbe77u64, true, 7));
298
assert_eq!(
299
magic_u64(625u64),
300
make_mu64(0x346dc5d63886594bu64, false, 7)
301
);
302
assert_eq!(
303
magic_u64(1337u64),
304
make_mu64(0xc4119d952866a139u64, false, 10)
305
);
306
assert_eq!(
307
magic_u64(31415927u64),
308
make_mu64(0x116d154b9c3d2f85u64, true, 25)
309
);
310
assert_eq!(
311
magic_u64(0x00000000deadbeefu64),
312
make_mu64(0x93275ab2dfc9094bu64, false, 31)
313
);
314
assert_eq!(
315
magic_u64(0x00000000fffffffdu64),
316
make_mu64(0x8000000180000005u64, false, 31)
317
);
318
assert_eq!(
319
magic_u64(0x00000000fffffffeu64),
320
make_mu64(0x0000000200000005u64, true, 32)
321
);
322
assert_eq!(
323
magic_u64(0x00000000ffffffffu64),
324
make_mu64(0x8000000080000001u64, false, 31)
325
);
326
assert_eq!(
327
magic_u64(0x0000000100000000u64),
328
make_mu64(0x0000000100000000u64, false, 0)
329
);
330
assert_eq!(
331
magic_u64(0x0000000100000001u64),
332
make_mu64(0xffffffff00000001u64, false, 32)
333
);
334
assert_eq!(
335
magic_u64(0x0ddc0ffeebadf00du64),
336
make_mu64(0x2788e9d394b77da1u64, true, 60)
337
);
338
assert_eq!(
339
magic_u64(0xfffffffffffffffdu64),
340
make_mu64(0x4000000000000001u64, false, 62)
341
);
342
assert_eq!(
343
magic_u64(0xfffffffffffffffeu64),
344
make_mu64(0x0000000000000003u64, true, 64)
345
);
346
assert_eq!(
347
magic_u64(0xffffffffffffffffu64),
348
make_mu64(0x8000000000000001u64, false, 63)
349
);
350
}
351
352
#[test]
353
fn test_magic_s32() {
354
assert_eq!(
355
magic_s32(-0x80000000i32),
356
make_ms32(0x7fffffffu32 as i32, 30)
357
);
358
assert_eq!(
359
magic_s32(-0x7FFFFFFFi32),
360
make_ms32(0xbfffffffu32 as i32, 29)
361
);
362
assert_eq!(
363
magic_s32(-0x7FFFFFFEi32),
364
make_ms32(0x7ffffffdu32 as i32, 30)
365
);
366
assert_eq!(magic_s32(-31415927i32), make_ms32(0xbba4baadu32 as i32, 23));
367
assert_eq!(magic_s32(-1337i32), make_ms32(0x9df73135u32 as i32, 9));
368
assert_eq!(magic_s32(-256i32), make_ms32(0x7fffffffu32 as i32, 7));
369
assert_eq!(magic_s32(-5i32), make_ms32(0x99999999u32 as i32, 1));
370
assert_eq!(magic_s32(-3i32), make_ms32(0x55555555u32 as i32, 1));
371
assert_eq!(magic_s32(-2i32), make_ms32(0x7fffffffu32 as i32, 0));
372
assert_eq!(magic_s32(2i32), make_ms32(0x80000001u32 as i32, 0));
373
assert_eq!(magic_s32(3i32), make_ms32(0x55555556u32 as i32, 0));
374
assert_eq!(magic_s32(4i32), make_ms32(0x80000001u32 as i32, 1));
375
assert_eq!(magic_s32(5i32), make_ms32(0x66666667u32 as i32, 1));
376
assert_eq!(magic_s32(6i32), make_ms32(0x2aaaaaabu32 as i32, 0));
377
assert_eq!(magic_s32(7i32), make_ms32(0x92492493u32 as i32, 2));
378
assert_eq!(magic_s32(9i32), make_ms32(0x38e38e39u32 as i32, 1));
379
assert_eq!(magic_s32(10i32), make_ms32(0x66666667u32 as i32, 2));
380
assert_eq!(magic_s32(11i32), make_ms32(0x2e8ba2e9u32 as i32, 1));
381
assert_eq!(magic_s32(12i32), make_ms32(0x2aaaaaabu32 as i32, 1));
382
assert_eq!(magic_s32(25i32), make_ms32(0x51eb851fu32 as i32, 3));
383
assert_eq!(magic_s32(125i32), make_ms32(0x10624dd3u32 as i32, 3));
384
assert_eq!(magic_s32(625i32), make_ms32(0x68db8badu32 as i32, 8));
385
assert_eq!(magic_s32(1337i32), make_ms32(0x6208cecbu32 as i32, 9));
386
assert_eq!(magic_s32(31415927i32), make_ms32(0x445b4553u32 as i32, 23));
387
assert_eq!(
388
magic_s32(0x7ffffffei32),
389
make_ms32(0x80000003u32 as i32, 30)
390
);
391
assert_eq!(
392
magic_s32(0x7fffffffi32),
393
make_ms32(0x40000001u32 as i32, 29)
394
);
395
}
396
397
#[test]
398
fn test_magic_s64() {
399
assert_eq!(
400
magic_s64(-0x8000000000000000i64),
401
make_ms64(0x7fffffffffffffffu64 as i64, 62)
402
);
403
assert_eq!(
404
magic_s64(-0x7FFFFFFFFFFFFFFFi64),
405
make_ms64(0xbfffffffffffffffu64 as i64, 61)
406
);
407
assert_eq!(
408
magic_s64(-0x7FFFFFFFFFFFFFFEi64),
409
make_ms64(0x7ffffffffffffffdu64 as i64, 62)
410
);
411
assert_eq!(
412
magic_s64(-0x0ddC0ffeeBadF00di64),
413
make_ms64(0x6c3b8b1635a4412fu64 as i64, 59)
414
);
415
assert_eq!(
416
magic_s64(-0x100000001i64),
417
make_ms64(0x800000007fffffffu64 as i64, 31)
418
);
419
assert_eq!(
420
magic_s64(-0x100000000i64),
421
make_ms64(0x7fffffffffffffffu64 as i64, 31)
422
);
423
assert_eq!(
424
magic_s64(-0xFFFFFFFFi64),
425
make_ms64(0x7fffffff7fffffffu64 as i64, 31)
426
);
427
assert_eq!(
428
magic_s64(-0xFFFFFFFEi64),
429
make_ms64(0x7ffffffefffffffdu64 as i64, 31)
430
);
431
assert_eq!(
432
magic_s64(-0xFFFFFFFDi64),
433
make_ms64(0x7ffffffe7ffffffbu64 as i64, 31)
434
);
435
assert_eq!(
436
magic_s64(-0xDeadBeefi64),
437
make_ms64(0x6cd8a54d2036f6b5u64 as i64, 31)
438
);
439
assert_eq!(
440
magic_s64(-31415927i64),
441
make_ms64(0x7749755a31e1683du64 as i64, 24)
442
);
443
assert_eq!(
444
magic_s64(-1337i64),
445
make_ms64(0x9df731356bccaf63u64 as i64, 9)
446
);
447
assert_eq!(
448
magic_s64(-256i64),
449
make_ms64(0x7fffffffffffffffu64 as i64, 7)
450
);
451
assert_eq!(magic_s64(-5i64), make_ms64(0x9999999999999999u64 as i64, 1));
452
assert_eq!(magic_s64(-3i64), make_ms64(0x5555555555555555u64 as i64, 1));
453
assert_eq!(magic_s64(-2i64), make_ms64(0x7fffffffffffffffu64 as i64, 0));
454
assert_eq!(magic_s64(2i64), make_ms64(0x8000000000000001u64 as i64, 0));
455
assert_eq!(magic_s64(3i64), make_ms64(0x5555555555555556u64 as i64, 0));
456
assert_eq!(magic_s64(4i64), make_ms64(0x8000000000000001u64 as i64, 1));
457
assert_eq!(magic_s64(5i64), make_ms64(0x6666666666666667u64 as i64, 1));
458
assert_eq!(magic_s64(6i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
459
assert_eq!(magic_s64(7i64), make_ms64(0x4924924924924925u64 as i64, 1));
460
assert_eq!(magic_s64(9i64), make_ms64(0x1c71c71c71c71c72u64 as i64, 0));
461
assert_eq!(magic_s64(10i64), make_ms64(0x6666666666666667u64 as i64, 2));
462
assert_eq!(magic_s64(11i64), make_ms64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
463
assert_eq!(magic_s64(12i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
464
assert_eq!(magic_s64(25i64), make_ms64(0xa3d70a3d70a3d70bu64 as i64, 4));
465
assert_eq!(
466
magic_s64(125i64),
467
make_ms64(0x20c49ba5e353f7cfu64 as i64, 4)
468
);
469
assert_eq!(
470
magic_s64(625i64),
471
make_ms64(0x346dc5d63886594bu64 as i64, 7)
472
);
473
assert_eq!(
474
magic_s64(1337i64),
475
make_ms64(0x6208ceca9433509du64 as i64, 9)
476
);
477
assert_eq!(
478
magic_s64(31415927i64),
479
make_ms64(0x88b68aa5ce1e97c3u64 as i64, 24)
480
);
481
assert_eq!(
482
magic_s64(0x00000000deadbeefi64),
483
make_ms64(0x93275ab2dfc9094bu64 as i64, 31)
484
);
485
assert_eq!(
486
magic_s64(0x00000000fffffffdi64),
487
make_ms64(0x8000000180000005u64 as i64, 31)
488
);
489
assert_eq!(
490
magic_s64(0x00000000fffffffei64),
491
make_ms64(0x8000000100000003u64 as i64, 31)
492
);
493
assert_eq!(
494
magic_s64(0x00000000ffffffffi64),
495
make_ms64(0x8000000080000001u64 as i64, 31)
496
);
497
assert_eq!(
498
magic_s64(0x0000000100000000i64),
499
make_ms64(0x8000000000000001u64 as i64, 31)
500
);
501
assert_eq!(
502
magic_s64(0x0000000100000001i64),
503
make_ms64(0x7fffffff80000001u64 as i64, 31)
504
);
505
assert_eq!(
506
magic_s64(0x0ddc0ffeebadf00di64),
507
make_ms64(0x93c474e9ca5bbed1u64 as i64, 59)
508
);
509
assert_eq!(
510
magic_s64(0x7ffffffffffffffdi64),
511
make_ms64(0x2000000000000001u64 as i64, 60)
512
);
513
assert_eq!(
514
magic_s64(0x7ffffffffffffffei64),
515
make_ms64(0x8000000000000003u64 as i64, 62)
516
);
517
assert_eq!(
518
magic_s64(0x7fffffffffffffffi64),
519
make_ms64(0x4000000000000001u64 as i64, 61)
520
);
521
}
522
523
#[test]
524
fn test_magic_generators_dont_panic() {
525
// The point of this is to check that the magic number generators
526
// don't panic with integer wraparounds, especially at boundary cases
527
// for their arguments. The actual results are thrown away, although
528
// we force `total` to be used, so that rustc can't optimise the
529
// entire computation away.
530
531
// Testing UP magic_u32
532
let mut total: u64 = 0;
533
for x in 2..(200 * 1000u32) {
534
let m = magic_u32(x);
535
total = total ^ (m.mul_by as u64);
536
total = total + (m.shift_by as u64);
537
total = total + (if m.do_add { 123 } else { 456 });
538
}
539
assert_eq!(total, 2481999609);
540
541
total = 0;
542
// Testing MIDPOINT magic_u32
543
for x in 0x8000_0000u32 - 10 * 1000u32..0x8000_0000u32 + 10 * 1000u32 {
544
let m = magic_u32(x);
545
total = total ^ (m.mul_by as u64);
546
total = total + (m.shift_by as u64);
547
total = total + (if m.do_add { 123 } else { 456 });
548
}
549
assert_eq!(total, 2399809723);
550
551
total = 0;
552
// Testing DOWN magic_u32
553
for x in 0..(200 * 1000u32) {
554
let m = magic_u32(0xFFFF_FFFFu32 - x);
555
total = total ^ (m.mul_by as u64);
556
total = total + (m.shift_by as u64);
557
total = total + (if m.do_add { 123 } else { 456 });
558
}
559
assert_eq!(total, 271138267);
560
561
// Testing UP magic_u64
562
total = 0;
563
for x in 2..(200 * 1000u64) {
564
let m = magic_u64(x);
565
total = total ^ m.mul_by;
566
total = total + (m.shift_by as u64);
567
total = total + (if m.do_add { 123 } else { 456 });
568
}
569
assert_eq!(total, 7430004086976261161);
570
571
total = 0;
572
// Testing MIDPOINT magic_u64
573
for x in 0x8000_0000_0000_0000u64 - 10 * 1000u64..0x8000_0000_0000_0000u64 + 10 * 1000u64 {
574
let m = magic_u64(x);
575
total = total ^ m.mul_by;
576
total = total + (m.shift_by as u64);
577
total = total + (if m.do_add { 123 } else { 456 });
578
}
579
assert_eq!(total, 10312117246769520603);
580
581
// Testing DOWN magic_u64
582
total = 0;
583
for x in 0..(200 * 1000u64) {
584
let m = magic_u64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
585
total = total ^ m.mul_by;
586
total = total + (m.shift_by as u64);
587
total = total + (if m.do_add { 123 } else { 456 });
588
}
589
assert_eq!(total, 1126603594357269734);
590
591
// Testing UP magic_s32
592
total = 0;
593
for x in 0..(200 * 1000i32) {
594
let m = magic_s32(-0x8000_0000i32 + x);
595
total = total ^ (m.mul_by as u64);
596
total = total + (m.shift_by as u64);
597
}
598
assert_eq!(total, 18446744069953376812);
599
600
total = 0;
601
// Testing MIDPOINT magic_s32
602
for x in 0..(200 * 1000i32) {
603
let x2 = -100 * 1000i32 + x;
604
if x2 != -1 && x2 != 0 && x2 != 1 {
605
let m = magic_s32(x2);
606
total = total ^ (m.mul_by as u64);
607
total = total + (m.shift_by as u64);
608
}
609
}
610
assert_eq!(total, 351839350);
611
612
// Testing DOWN magic_s32
613
total = 0;
614
for x in 0..(200 * 1000i32) {
615
let m = magic_s32(0x7FFF_FFFFi32 - x);
616
total = total ^ (m.mul_by as u64);
617
total = total + (m.shift_by as u64);
618
}
619
assert_eq!(total, 18446744072916880714);
620
621
// Testing UP magic_s64
622
total = 0;
623
for x in 0..(200 * 1000i64) {
624
let m = magic_s64(-0x8000_0000_0000_0000i64 + x);
625
total = total ^ (m.mul_by as u64);
626
total = total + (m.shift_by as u64);
627
}
628
assert_eq!(total, 17929885647724831014);
629
630
total = 0;
631
// Testing MIDPOINT magic_s64
632
for x in 0..(200 * 1000i64) {
633
let x2 = -100 * 1000i64 + x;
634
if x2 != -1 && x2 != 0 && x2 != 1 {
635
let m = magic_s64(x2);
636
total = total ^ (m.mul_by as u64);
637
total = total + (m.shift_by as u64);
638
}
639
}
640
assert_eq!(total, 18106042338125661964);
641
642
// Testing DOWN magic_s64
643
total = 0;
644
for x in 0..(200 * 1000i64) {
645
let m = magic_s64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
646
total = total ^ (m.mul_by as u64);
647
total = total + (m.shift_by as u64);
648
}
649
assert_eq!(total, 563301797155560970);
650
}
651
652
// These generate reference results for signed/unsigned 32/64 bit
653
// division, rounding towards zero.
654
fn div_u32(x: u32, y: u32) -> u32 {
655
return x / y;
656
}
657
fn div_s32(x: i32, y: i32) -> i32 {
658
return x / y;
659
}
660
fn div_u64(x: u64, y: u64) -> u64 {
661
return x / y;
662
}
663
fn div_s64(x: i64, y: i64) -> i64 {
664
return x / y;
665
}
666
667
// Returns the high half of a 32 bit unsigned widening multiply.
668
fn mulhw_u32(x: u32, y: u32) -> u32 {
669
let x64: u64 = x as u64;
670
let y64: u64 = y as u64;
671
let r64: u64 = x64 * y64;
672
(r64 >> 32) as u32
673
}
674
675
// Returns the high half of a 32 bit signed widening multiply.
676
fn mulhw_s32(x: i32, y: i32) -> i32 {
677
let x64: i64 = x as i64;
678
let y64: i64 = y as i64;
679
let r64: i64 = x64 * y64;
680
(r64 >> 32) as i32
681
}
682
683
// Returns the high half of a 64 bit unsigned widening multiply.
684
fn mulhw_u64(x: u64, y: u64) -> u64 {
685
let x128 = x as u128;
686
let y128 = y as u128;
687
let r128 = x128 * y128;
688
(r128 >> 64) as u64
689
}
690
691
// Returns the high half of a 64 bit signed widening multiply.
692
fn mulhw_s64(x: i64, y: i64) -> i64 {
693
let x128 = x as i128;
694
let y128 = y as i128;
695
let r128 = x128 * y128;
696
(r128 >> 64) as i64
697
}
698
699
/// Compute and check `q = n / d` using `magic`.
700
fn eval_magic_u32_div(n: u32, magic: &MU32) -> u32 {
701
let mut q = mulhw_u32(n, magic.mul_by);
702
if magic.do_add {
703
assert!(magic.shift_by >= 1 && magic.shift_by <= 32);
704
let mut t = n - q;
705
t >>= 1;
706
t = t + q;
707
q = t >> (magic.shift_by - 1);
708
} else {
709
assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
710
q >>= magic.shift_by;
711
}
712
q
713
}
714
715
/// Compute and check `r = n % d` using `magic`.
716
fn eval_magic_u32_rem(n: u32, d: u32, magic: &MU32) -> u32 {
717
let mut q = mulhw_u32(n, magic.mul_by);
718
if magic.do_add {
719
assert!(magic.shift_by >= 1 && magic.shift_by <= 32);
720
let mut t = n - q;
721
t >>= 1;
722
t = t + q;
723
q = t >> (magic.shift_by - 1);
724
} else {
725
assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
726
q >>= magic.shift_by;
727
}
728
let tt = q.wrapping_mul(d);
729
n.wrapping_sub(tt)
730
}
731
732
/// Compute and check `q = n / d` using `magic`.
733
fn eval_magic_u64_div(n: u64, magic: &MU64) -> u64 {
734
let mut q = mulhw_u64(n, magic.mul_by);
735
if magic.do_add {
736
assert!(magic.shift_by >= 1 && magic.shift_by <= 64);
737
let mut t = n - q;
738
t >>= 1;
739
t = t + q;
740
q = t >> (magic.shift_by - 1);
741
} else {
742
assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
743
q >>= magic.shift_by;
744
}
745
q
746
}
747
748
/// Compute and check `r = n % d` using `magic`.
749
fn eval_magic_u64_rem(n: u64, d: u64, magic: &MU64) -> u64 {
750
let mut q = mulhw_u64(n, magic.mul_by);
751
if magic.do_add {
752
assert!(magic.shift_by >= 1 && magic.shift_by <= 64);
753
let mut t = n - q;
754
t >>= 1;
755
t = t + q;
756
q = t >> (magic.shift_by - 1);
757
} else {
758
assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
759
q >>= magic.shift_by;
760
}
761
let tt = q.wrapping_mul(d);
762
n.wrapping_sub(tt)
763
}
764
765
/// Compute and check `q = n / d` using `magic`.
766
fn eval_magic_s32_div(n: i32, d: i32, magic: &MS32) -> i32 {
767
let mut q: i32 = mulhw_s32(n, magic.mul_by);
768
if d > 0 && magic.mul_by < 0 {
769
q = q + n;
770
} else if d < 0 && magic.mul_by > 0 {
771
q = q - n;
772
}
773
assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
774
q = q >> magic.shift_by;
775
let mut t: u32 = q as u32;
776
t = t >> 31;
777
q = q + (t as i32);
778
q
779
}
780
781
/// Compute and check `r = n % d` using `magic`.
782
fn eval_magic_s32_rem(n: i32, d: i32, magic: &MS32) -> i32 {
783
let mut q: i32 = mulhw_s32(n, magic.mul_by);
784
if d > 0 && magic.mul_by < 0 {
785
q = q + n;
786
} else if d < 0 && magic.mul_by > 0 {
787
q = q - n;
788
}
789
assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
790
q = q >> magic.shift_by;
791
let mut t: u32 = q as u32;
792
t = t >> 31;
793
q = q + (t as i32);
794
let tt = q.wrapping_mul(d);
795
n.wrapping_sub(tt)
796
}
797
798
/// Compute and check `q = n / d` using `magic`. */
799
fn eval_magic_s64_div(n: i64, d: i64, magic: &MS64) -> i64 {
800
let mut q: i64 = mulhw_s64(n, magic.mul_by);
801
if d > 0 && magic.mul_by < 0 {
802
q = q + n;
803
} else if d < 0 && magic.mul_by > 0 {
804
q = q - n;
805
}
806
assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
807
q = q >> magic.shift_by;
808
let mut t: u64 = q as u64;
809
t = t >> 63;
810
q = q + (t as i64);
811
q
812
}
813
814
/// Compute and check `r = n % d` using `magic`.
815
fn eval_magic_s64_rem(n: i64, d: i64, magic: &MS64) -> i64 {
816
let mut q: i64 = mulhw_s64(n, magic.mul_by);
817
if d > 0 && magic.mul_by < 0 {
818
q = q + n;
819
} else if d < 0 && magic.mul_by > 0 {
820
q = q - n;
821
}
822
assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
823
q = q >> magic.shift_by;
824
let mut t: u64 = q as u64;
825
t = t >> 63;
826
q = q + (t as i64);
827
let tt = q.wrapping_mul(d);
828
n.wrapping_sub(tt)
829
}
830
831
#[test]
832
fn test_magic_generators_give_correct_numbers() {
833
// For a variety of values for both `n` and `d`, compute the magic
834
// numbers for `d`, and in effect interpret them so as to compute
835
// `n / d`. Check that that equals the value of `n / d` computed
836
// directly by the hardware. This serves to check that the magic
837
// number generates work properly. In total, 50,148,000 tests are
838
// done.
839
840
// Some constants
841
const MIN_U32: u32 = 0;
842
const MAX_U32: u32 = 0xFFFF_FFFFu32;
843
const MAX_U32_HALF: u32 = 0x8000_0000u32; // more or less
844
845
const MIN_S32: i32 = 0x8000_0000u32 as i32;
846
const MAX_S32: i32 = 0x7FFF_FFFFu32 as i32;
847
848
const MIN_U64: u64 = 0;
849
const MAX_U64: u64 = 0xFFFF_FFFF_FFFF_FFFFu64;
850
const MAX_U64_HALF: u64 = 0x8000_0000_0000_0000u64; // ditto
851
852
const MIN_S64: i64 = 0x8000_0000_0000_0000u64 as i64;
853
const MAX_S64: i64 = 0x7FFF_FFFF_FFFF_FFFFu64 as i64;
854
855
// Compute the magic numbers for `d` and then use them to compute and
856
// check `n / d` for around 1000 values of `n`, using unsigned 32-bit
857
// division.
858
fn test_magic_u32_inner(d: u32, n_tests_done: &mut i32) {
859
// Advance the numerator (the `n` in `n / d`) so as to test
860
// densely near the range ends (and, in the signed variants, near
861
// zero) but not so densely away from those regions.
862
fn advance_n_u32(x: u32) -> u32 {
863
if x < MIN_U32 + 110 {
864
return x + 1;
865
}
866
if x < MIN_U32 + 1700 {
867
return x + 23;
868
}
869
if x < MAX_U32 - 1700 {
870
let xd: f64 = (x as f64) * 1.06415927;
871
return if xd >= (MAX_U32 - 1700) as f64 {
872
MAX_U32 - 1700
873
} else {
874
xd as u32
875
};
876
}
877
if x < MAX_U32 - 110 {
878
return x + 23;
879
}
880
u32::wrapping_add(x, 1)
881
}
882
883
let magic: MU32 = magic_u32(d);
884
let mut n: u32 = MIN_U32;
885
loop {
886
*n_tests_done += 1;
887
let q = eval_magic_u32_div(n, &magic);
888
assert_eq!(q, div_u32(n, d));
889
890
n = advance_n_u32(n);
891
if n == MIN_U32 {
892
break;
893
}
894
}
895
}
896
897
// Compute the magic numbers for `d` and then use them to compute and
898
// check `n / d` for around 1000 values of `n`, using signed 32-bit
899
// division.
900
fn test_magic_s32_inner(d: i32, n_tests_done: &mut i32) {
901
// See comment on advance_n_u32 above.
902
fn advance_n_s32(x: i32) -> i32 {
903
if x >= 0 && x <= 29 {
904
return x + 1;
905
}
906
if x < MIN_S32 + 110 {
907
return x + 1;
908
}
909
if x < MIN_S32 + 1700 {
910
return x + 23;
911
}
912
if x < MAX_S32 - 1700 {
913
let mut xd: f64 = x as f64;
914
xd = if xd < 0.0 {
915
xd / 1.06415927
916
} else {
917
xd * 1.06415927
918
};
919
return if xd >= (MAX_S32 - 1700) as f64 {
920
MAX_S32 - 1700
921
} else {
922
xd as i32
923
};
924
}
925
if x < MAX_S32 - 110 {
926
return x + 23;
927
}
928
if x == MAX_S32 {
929
return MIN_S32;
930
}
931
x + 1
932
}
933
934
let magic: MS32 = magic_s32(d);
935
let mut n: i32 = MIN_S32;
936
loop {
937
*n_tests_done += 1;
938
let q = eval_magic_s32_div(n, d, &magic);
939
assert_eq!(q, div_s32(n, d));
940
941
n = advance_n_s32(n);
942
if n == MIN_S32 {
943
break;
944
}
945
}
946
}
947
948
// Compute the magic numbers for `d` and then use them to compute and
949
// check `n / d` for around 1000 values of `n`, using unsigned 64-bit
950
// division.
951
fn test_magic_u64_inner(d: u64, n_tests_done: &mut i32) {
952
// See comment on advance_n_u32 above.
953
fn advance_n_u64(x: u64) -> u64 {
954
if x < MIN_U64 + 110 {
955
return x + 1;
956
}
957
if x < MIN_U64 + 1700 {
958
return x + 23;
959
}
960
if x < MAX_U64 - 1700 {
961
let xd: f64 = (x as f64) * 1.06415927;
962
return if xd >= (MAX_U64 - 1700) as f64 {
963
MAX_U64 - 1700
964
} else {
965
xd as u64
966
};
967
}
968
if x < MAX_U64 - 110 {
969
return x + 23;
970
}
971
u64::wrapping_add(x, 1)
972
}
973
974
let magic: MU64 = magic_u64(d);
975
let mut n: u64 = MIN_U64;
976
loop {
977
*n_tests_done += 1;
978
let q = eval_magic_u64_div(n, &magic);
979
assert_eq!(q, div_u64(n, d));
980
981
n = advance_n_u64(n);
982
if n == MIN_U64 {
983
break;
984
}
985
}
986
}
987
988
// Compute the magic numbers for `d` and then use them to compute and
989
// check `n / d` for around 1000 values of `n`, using signed 64-bit
990
// division.
991
fn test_magic_s64_inner(d: i64, n_tests_done: &mut i32) {
992
// See comment on advance_n_u32 above.
993
fn advance_n_s64(x: i64) -> i64 {
994
if x >= 0 && x <= 29 {
995
return x + 1;
996
}
997
if x < MIN_S64 + 110 {
998
return x + 1;
999
}
1000
if x < MIN_S64 + 1700 {
1001
return x + 23;
1002
}
1003
if x < MAX_S64 - 1700 {
1004
let mut xd: f64 = x as f64;
1005
xd = if xd < 0.0 {
1006
xd / 1.06415927
1007
} else {
1008
xd * 1.06415927
1009
};
1010
return if xd >= (MAX_S64 - 1700) as f64 {
1011
MAX_S64 - 1700
1012
} else {
1013
xd as i64
1014
};
1015
}
1016
if x < MAX_S64 - 110 {
1017
return x + 23;
1018
}
1019
if x == MAX_S64 {
1020
return MIN_S64;
1021
}
1022
x + 1
1023
}
1024
1025
let magic: MS64 = magic_s64(d);
1026
let mut n: i64 = MIN_S64;
1027
loop {
1028
*n_tests_done += 1;
1029
let q = eval_magic_s64_div(n, d, &magic);
1030
assert_eq!(q, div_s64(n, d));
1031
1032
n = advance_n_s64(n);
1033
if n == MIN_S64 {
1034
break;
1035
}
1036
}
1037
}
1038
1039
// Using all the above support machinery, actually run the tests.
1040
1041
let mut n_tests_done: i32 = 0;
1042
1043
// u32 division tests
1044
{
1045
// 2 .. 3k
1046
let mut d: u32 = 2;
1047
for _ in 0..3 * 1000 {
1048
test_magic_u32_inner(d, &mut n_tests_done);
1049
d += 1;
1050
}
1051
1052
// across the midpoint: midpoint - 3k .. midpoint + 3k
1053
d = MAX_U32_HALF - 3 * 1000;
1054
for _ in 0..2 * 3 * 1000 {
1055
test_magic_u32_inner(d, &mut n_tests_done);
1056
d += 1;
1057
}
1058
1059
// MAX_U32 - 3k .. MAX_U32 (in reverse order)
1060
d = MAX_U32;
1061
for _ in 0..3 * 1000 {
1062
test_magic_u32_inner(d, &mut n_tests_done);
1063
d -= 1;
1064
}
1065
}
1066
1067
// s32 division tests
1068
{
1069
// MIN_S32 .. MIN_S32 + 3k
1070
let mut d: i32 = MIN_S32;
1071
for _ in 0..3 * 1000 {
1072
test_magic_s32_inner(d, &mut n_tests_done);
1073
d += 1;
1074
}
1075
1076
// -3k .. -2 (in reverse order)
1077
d = -2;
1078
for _ in 0..3 * 1000 {
1079
test_magic_s32_inner(d, &mut n_tests_done);
1080
d -= 1;
1081
}
1082
1083
// 2 .. 3k
1084
d = 2;
1085
for _ in 0..3 * 1000 {
1086
test_magic_s32_inner(d, &mut n_tests_done);
1087
d += 1;
1088
}
1089
1090
// MAX_S32 - 3k .. MAX_S32 (in reverse order)
1091
d = MAX_S32;
1092
for _ in 0..3 * 1000 {
1093
test_magic_s32_inner(d, &mut n_tests_done);
1094
d -= 1;
1095
}
1096
}
1097
1098
// u64 division tests
1099
{
1100
// 2 .. 3k
1101
let mut d: u64 = 2;
1102
for _ in 0..3 * 1000 {
1103
test_magic_u64_inner(d, &mut n_tests_done);
1104
d += 1;
1105
}
1106
1107
// across the midpoint: midpoint - 3k .. midpoint + 3k
1108
d = MAX_U64_HALF - 3 * 1000;
1109
for _ in 0..2 * 3 * 1000 {
1110
test_magic_u64_inner(d, &mut n_tests_done);
1111
d += 1;
1112
}
1113
1114
// mAX_U64 - 3000 .. mAX_U64 (in reverse order)
1115
d = MAX_U64;
1116
for _ in 0..3 * 1000 {
1117
test_magic_u64_inner(d, &mut n_tests_done);
1118
d -= 1;
1119
}
1120
}
1121
1122
// s64 division tests
1123
{
1124
// MIN_S64 .. MIN_S64 + 3k
1125
let mut d: i64 = MIN_S64;
1126
for _ in 0..3 * 1000 {
1127
test_magic_s64_inner(d, &mut n_tests_done);
1128
d += 1;
1129
}
1130
1131
// -3k .. -2 (in reverse order)
1132
d = -2;
1133
for _ in 0..3 * 1000 {
1134
test_magic_s64_inner(d, &mut n_tests_done);
1135
d -= 1;
1136
}
1137
1138
// 2 .. 3k
1139
d = 2;
1140
for _ in 0..3 * 1000 {
1141
test_magic_s64_inner(d, &mut n_tests_done);
1142
d += 1;
1143
}
1144
1145
// MAX_S64 - 3k .. MAX_S64 (in reverse order)
1146
d = MAX_S64;
1147
for _ in 0..3 * 1000 {
1148
test_magic_s64_inner(d, &mut n_tests_done);
1149
d -= 1;
1150
}
1151
}
1152
assert_eq!(n_tests_done, 50_148_000);
1153
}
1154
1155
proptest::proptest! {
1156
#[test]
1157
fn proptest_magic_u32(numerator in 0..u32::MAX, divisor in 1..u32::MAX) {
1158
let expected_div = numerator.wrapping_div(divisor);
1159
let expected_rem = numerator.wrapping_rem(divisor);
1160
1161
let magic = magic_u32(divisor);
1162
let actual_div = eval_magic_u32_div(numerator, &magic);
1163
let actual_rem = eval_magic_u32_rem(numerator, divisor, &magic);
1164
1165
assert_eq!(expected_div, actual_div);
1166
assert_eq!(expected_rem, actual_rem);
1167
}
1168
1169
#[test]
1170
fn proptest_magic_u64(numerator in 0..u64::MAX, divisor in 1..u64::MAX) {
1171
let expected_div = numerator.wrapping_div(divisor);
1172
let expected_rem = numerator.wrapping_rem(divisor);
1173
1174
let magic = magic_u64(divisor);
1175
let actual_div = eval_magic_u64_div(numerator, &magic);
1176
let actual_rem = eval_magic_u64_rem(numerator, divisor, &magic);
1177
1178
assert_eq!(expected_div, actual_div);
1179
assert_eq!(expected_rem, actual_rem);
1180
}
1181
1182
#[test]
1183
fn proptest_magic_s32(
1184
numerator in i32::MIN..i32::MAX,
1185
divisor in (i32::MIN..i32::MAX).prop_filter("avoid divide-by-zero", |d| *d != 0),
1186
) {
1187
let expected_div = numerator.wrapping_div(divisor);
1188
let expected_rem = numerator.wrapping_rem(divisor);
1189
1190
let magic = magic_s32(divisor);
1191
let actual_div = eval_magic_s32_div(numerator, divisor, &magic);
1192
let actual_rem = eval_magic_s32_rem(numerator, divisor, &magic);
1193
1194
assert_eq!(expected_div, actual_div);
1195
assert_eq!(expected_rem, actual_rem);
1196
}
1197
1198
#[test]
1199
fn proptest_magic_s64(
1200
numerator in i64::MIN..i64::MAX,
1201
divisor in (i64::MIN..i64::MAX).prop_filter("avoid divide-by-zero", |d| *d != 0),
1202
) {
1203
let expected_div = numerator.wrapping_div(divisor);
1204
let expected_rem = numerator.wrapping_rem(divisor);
1205
1206
let magic = magic_s64(divisor);
1207
let actual_div = eval_magic_s64_div(numerator, divisor, &magic);
1208
let actual_rem = eval_magic_s64_rem(numerator, divisor, &magic);
1209
1210
assert_eq!(expected_div, actual_div);
1211
assert_eq!(expected_rem, actual_rem);
1212
}
1213
}
1214
}
1215
1216