Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/util/fast_idiv_by_const.h
4545 views
1
/*
2
* Copyright © 2018 Advanced Micro Devices, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#ifndef FAST_IDIV_BY_CONST_H
25
#define FAST_IDIV_BY_CONST_H
26
27
/* Imported from:
28
* https://raw.githubusercontent.com/ridiculousfish/libdivide/master/divide_by_constants_codegen_reference.c
29
*/
30
31
#include <inttypes.h>
32
#include <limits.h>
33
#include <assert.h>
34
35
#ifdef __cplusplus
36
extern "C" {
37
#endif
38
39
/* Computes "magic info" for performing signed division by a fixed integer D.
40
* The type 'sint_t' is assumed to be defined as a signed integer type large
41
* enough to hold both the dividend and the divisor.
42
* Here >> is arithmetic (signed) shift, and >>> is logical shift.
43
*
44
* To emit code for n/d, rounding towards zero, use the following sequence:
45
*
46
* m = compute_signed_magic_info(D)
47
* emit("result = (m.multiplier * n) >> SINT_BITS");
48
* if d > 0 and m.multiplier < 0: emit("result += n")
49
* if d < 0 and m.multiplier > 0: emit("result -= n")
50
* if m.post_shift > 0: emit("result >>= m.shift")
51
* emit("result += (result < 0)")
52
*
53
* The shifts by SINT_BITS may be "free" if the high half of the full multiply
54
* is put in a separate register.
55
*
56
* The final add can of course be implemented via the sign bit, e.g.
57
* result += (result >>> (SINT_BITS - 1))
58
* or
59
* result -= (result >> (SINT_BITS - 1))
60
*
61
* This code is heavily indebted to Hacker's Delight by Henry Warren.
62
* See http://www.hackersdelight.org/HDcode/magic.c.txt
63
* Used with permission from http://www.hackersdelight.org/permissions.htm
64
*/
65
66
struct util_fast_sdiv_info {
67
int64_t multiplier; /* the "magic number" multiplier */
68
unsigned shift; /* shift for the dividend after multiplying */
69
};
70
71
struct util_fast_sdiv_info
72
util_compute_fast_sdiv_info(int64_t D, unsigned SINT_BITS);
73
74
/* Computes "magic info" for performing unsigned division by a fixed positive
75
* integer D. UINT_BITS is the bit size at which the final "magic"
76
* calculation will be performed; it is assumed to be large enough to hold
77
* both the dividand and the divisor. num_bits can be set appropriately if n
78
* is known to be smaller than calc_bits; if this is not known then UINT_BITS
79
* for num_bits.
80
*
81
* Assume we have a hardware register of width UINT_BITS, a known constant D
82
* which is not zero and not a power of 2, and a variable n of width num_bits
83
* (which may be up to UINT_BITS). To emit code for n/d, use one of the two
84
* following sequences (here >>> refers to a logical bitshift):
85
*
86
* m = compute_unsigned_magic_info(D, num_bits)
87
* if m.pre_shift > 0: emit("n >>>= m.pre_shift")
88
* if m.increment: emit("n = saturated_increment(n)")
89
* emit("result = (m.multiplier * n) >>> UINT_BITS")
90
* if m.post_shift > 0: emit("result >>>= m.post_shift")
91
*
92
* or
93
*
94
* m = compute_unsigned_magic_info(D, num_bits)
95
* if m.pre_shift > 0: emit("n >>>= m.pre_shift")
96
* emit("result = m.multiplier * n")
97
* if m.increment: emit("result = result + m.multiplier")
98
* emit("result >>>= UINT_BITS")
99
* if m.post_shift > 0: emit("result >>>= m.post_shift")
100
*
101
* This second version works even if D is 1. The shifts by UINT_BITS may be
102
* "free" if the high half of the full multiply is put in a separate register.
103
*
104
* saturated_increment(n) means "increment n unless it would wrap to 0," i.e.
105
* if n == (1 << UINT_BITS)-1: result = n
106
* else: result = n+1
107
* A common way to implement this is with the carry bit. For example, on x86:
108
* add 1
109
* sbb 0
110
*
111
* Some invariants:
112
* 1: At least one of pre_shift and increment is zero
113
* 2: multiplier is never zero
114
*
115
* This code incorporates the "round down" optimization per ridiculous_fish.
116
*/
117
118
struct util_fast_udiv_info {
119
uint64_t multiplier; /* the "magic number" multiplier */
120
unsigned pre_shift; /* shift for the dividend before multiplying */
121
unsigned post_shift; /* shift for the dividend after multiplying */
122
int increment; /* 0 or 1; if set then increment the numerator, using one of
123
the two strategies */
124
};
125
126
struct util_fast_udiv_info
127
util_compute_fast_udiv_info(uint64_t D, unsigned num_bits, unsigned UINT_BITS);
128
129
/* Below are possible options for dividing by a uniform in a shader where
130
* the divisor is constant but not known at compile time.
131
*/
132
133
/* Full version. */
134
static inline uint32_t
135
util_fast_udiv32(uint32_t n, struct util_fast_udiv_info info)
136
{
137
n = n >> info.pre_shift;
138
/* If the divisor is not 1, you can instead use a 32-bit ADD that clamps
139
* to UINT_MAX. Dividing by 1 needs the full 64-bit ADD.
140
*
141
* If you have unsigned 64-bit MAD with 32-bit inputs, you can do:
142
* increment = increment ? multiplier : 0; // on the CPU
143
* (n * multiplier + increment) // on the GPU using unsigned 64-bit MAD
144
*/
145
n = (((uint64_t)n + info.increment) * info.multiplier) >> 32;
146
n = n >> info.post_shift;
147
return n;
148
}
149
150
/* A little more efficient version if n != UINT_MAX, i.e. no unsigned
151
* wraparound in the computation.
152
*/
153
static inline uint32_t
154
util_fast_udiv32_nuw(uint32_t n, struct util_fast_udiv_info info)
155
{
156
assert(n != UINT32_MAX);
157
n = n >> info.pre_shift;
158
n = n + info.increment;
159
n = ((uint64_t)n * info.multiplier) >> 32;
160
n = n >> info.post_shift;
161
return n;
162
}
163
164
/* Even faster version but both operands must be 31-bit unsigned integers
165
* and the divisor must be greater than 1.
166
*
167
* info must be computed with num_bits == 31.
168
*/
169
static inline uint32_t
170
util_fast_udiv32_u31_d_not_one(uint32_t n, struct util_fast_udiv_info info)
171
{
172
assert(info.pre_shift == 0);
173
assert(info.increment == 0);
174
n = ((uint64_t)n * info.multiplier) >> 32;
175
n = n >> info.post_shift;
176
return n;
177
}
178
179
#ifdef __cplusplus
180
} /* extern C */
181
#endif
182
183
#endif /* FAST_IDIV_BY_CONST_H */
184
185