Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
4565 views
1
/**************************************************************************
2
*
3
* Copyright 2013 VMware, Inc.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
16
* of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
**************************************************************************/
27
28
29
/**
30
* @file
31
* Format conversion code for srgb formats.
32
*
33
* Functions for converting from srgb to linear and vice versa.
34
* From http://www.opengl.org/registry/specs/EXT/texture_sRGB.txt:
35
*
36
* srgb->linear:
37
* cl = cs / 12.92, cs <= 0.04045
38
* cl = ((cs + 0.055)/1.055)^2.4, cs > 0.04045
39
*
40
* linear->srgb:
41
* if (isnan(cl)) {
42
* Map IEEE-754 Not-a-number to zero.
43
* cs = 0.0;
44
* } else if (cl > 1.0) {
45
* cs = 1.0;
46
* } else if (cl < 0.0) {
47
* cs = 0.0;
48
* } else if (cl < 0.0031308) {
49
* cs = 12.92 * cl;
50
* } else {
51
* cs = 1.055 * pow(cl, 0.41666) - 0.055;
52
* }
53
*
54
* This does not need to be accurate, however at least for d3d10
55
* (http://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx):
56
* 1) For srgb->linear, it is required that the error on the srgb side is
57
* not larger than 0.5f, which I interpret that if you map the value back
58
* to srgb from linear using the ideal conversion, it would not be off by
59
* more than 0.5f (that is, it would map to the same 8-bit integer value
60
* as it was before conversion to linear).
61
* 2) linear->srgb is permitted 0.6f which luckily looks like quite a large
62
* error is allowed.
63
* 3) Additionally, all srgb values converted to linear and back must result
64
* in the same value as they were originally.
65
*
66
* @author Roland Scheidegger <[email protected]>
67
*/
68
69
70
#include "util/u_debug.h"
71
#include "util/u_math.h"
72
73
#include "lp_bld_type.h"
74
#include "lp_bld_const.h"
75
#include "lp_bld_arit.h"
76
#include "lp_bld_bitarit.h"
77
#include "lp_bld_logic.h"
78
#include "lp_bld_format.h"
79
80
81
82
/**
83
* Convert srgb int values to linear float values.
84
* Several possibilities how to do this, e.g.
85
* - table
86
* - doing the pow() with int-to-float and float-to-int tricks
87
* (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
88
* - just using standard polynomial approximation
89
* (3rd order polynomial is required for crappy but just sufficient accuracy)
90
*
91
* @param src integer (vector) value(s) to convert
92
* (chan_bits bit values unpacked to 32 bit already).
93
*/
94
LLVMValueRef
95
lp_build_srgb_to_linear(struct gallivm_state *gallivm,
96
struct lp_type src_type,
97
unsigned chan_bits,
98
LLVMValueRef src)
99
{
100
struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
101
struct lp_build_context f32_bld;
102
LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh;
103
double coeffs[4] = {0.0023f,
104
0.0030f / 255.0f,
105
0.6935f / (255.0f * 255.0f),
106
0.3012f / (255.0f * 255.0f * 255.0f)
107
};
108
109
assert(src_type.width == 32);
110
/* Technically this would work with more bits too but would be inaccurate. */
111
assert(chan_bits <= 8);
112
113
lp_build_context_init(&f32_bld, gallivm, f32_type);
114
115
/*
116
* using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023)
117
* ( poly = 0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023)
118
* (found with octave polyfit and some magic as I couldn't get the error
119
* function right). Using the above mentioned error function, the values stay
120
* within +-0.35, except for the lowest values - hence tweaking linear segment
121
* to cover the first 16 instead of the first 11 values (the error stays
122
* just about acceptable there too).
123
* Hence: lin = src > 15 ? poly : src / 12.6
124
* This function really only makes sense for vectors, should use LUT otherwise.
125
* All in all (including float conversion) 11 instructions (with sse4.1),
126
* 6 constants (polynomial could be done with 1 instruction less at the cost
127
* of slightly worse dependency chain, fma should also help).
128
*/
129
/* doing the 1/255 mul as part of the approximation */
130
srcf = lp_build_int_to_float(&f32_bld, src);
131
if (chan_bits != 8) {
132
/* could adjust all the constants instead */
133
LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type,
134
255.0f / ((1 << chan_bits) - 1));
135
srcf = lp_build_mul(&f32_bld, srcf, rescale_const);
136
}
137
lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
138
part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
139
140
part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4);
141
142
lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f);
143
is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh);
144
return lp_build_select(&f32_bld, is_linear, part_lin, part_pow);
145
}
146
147
148
/**
149
* Convert linear float values to srgb int values.
150
* Several possibilities how to do this, e.g.
151
* - use table (based on exponent/highest order mantissa bits) and do
152
* linear interpolation (https://gist.github.com/rygorous/2203834)
153
* - Chebyshev polynomial
154
* - Approximation using reciprocals
155
* - using int-to-float and float-to-int tricks for pow()
156
* (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
157
*
158
* @param src float (vector) value(s) to convert.
159
*/
160
static LLVMValueRef
161
lp_build_linear_to_srgb(struct gallivm_state *gallivm,
162
struct lp_type src_type,
163
unsigned chan_bits,
164
LLVMValueRef src)
165
{
166
LLVMBuilderRef builder = gallivm->builder;
167
struct lp_build_context f32_bld;
168
LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final;
169
170
lp_build_context_init(&f32_bld, gallivm, src_type);
171
172
src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one);
173
174
if (0) {
175
/*
176
* using int-to-float and float-to-int trick for pow().
177
* This is much more accurate than necessary thanks to the correction,
178
* but it most certainly makes no sense without rsqrt available.
179
* Bonus points if you understand how this works...
180
* All in all (including min/max clamp, conversion) 19 instructions.
181
*/
182
183
float exp_f = 2.0f / 3.0f;
184
/* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */
185
float exp2f_c = 1.30438178253e+19f;
186
float coeff_f = 0.62996f;
187
LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2;
188
struct lp_type int_type = lp_int_type(src_type);
189
190
/*
191
* First calculate approx x^8/12
192
*/
193
exponent = lp_build_const_vec(gallivm, src_type, exp_f);
194
coeff = lp_build_const_vec(gallivm, src_type,
195
exp2f_c * powf(coeff_f, 1.0f / exp_f));
196
197
/* premultiply src */
198
tmp = lp_build_mul(&f32_bld, coeff, src);
199
/* "log2" */
200
tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), "");
201
tmp = lp_build_int_to_float(&f32_bld, tmp);
202
/* multiply for pow */
203
tmp = lp_build_mul(&f32_bld, tmp, exponent);
204
/* "exp2" */
205
pow_approx = lp_build_itrunc(&f32_bld, tmp);
206
pow_approx = LLVMBuildBitCast(builder, pow_approx,
207
lp_build_vec_type(gallivm, src_type), "");
208
209
/*
210
* Since that pow was inaccurate (like 3 bits, though each sqrt step would
211
* give another bit), compensate the error (which is why we chose another
212
* exponent in the first place).
213
*/
214
/* x * x^(8/12) = x^(20/12) */
215
pow_1 = lp_build_mul(&f32_bld, pow_approx, src);
216
217
/* x * x * x^(-4/12) = x^(20/12) */
218
/* Should avoid using rsqrt if it's not available, but
219
* using x * x^(4/12) * x^(4/12) instead will change error weight */
220
tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx);
221
x2 = lp_build_mul(&f32_bld, src, src);
222
pow_2 = lp_build_mul(&f32_bld, x2, tmp);
223
224
/* average the values so the errors cancel out, compensate bias,
225
* we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul
226
* for conversion to int in here */
227
tmp = lp_build_add(&f32_bld, pow_1, pow_2);
228
coeff = lp_build_const_vec(gallivm, src_type,
229
1.0f / (3.0f * coeff_f) * 0.999852f *
230
powf(1.055f * 255.0f, 4.0f));
231
pow_final = lp_build_mul(&f32_bld, tmp, coeff);
232
233
/* x^(5/12) = rsqrt(rsqrt(x^20/12)) */
234
if (lp_build_fast_rsqrt_available(src_type)) {
235
pow_final = lp_build_fast_rsqrt(&f32_bld,
236
lp_build_fast_rsqrt(&f32_bld, pow_final));
237
}
238
else {
239
pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final));
240
}
241
pow_final = lp_build_add(&f32_bld, pow_final,
242
lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f));
243
}
244
245
else {
246
/*
247
* using "rational polynomial" approximation here.
248
* Essentially y = a*x^0.375 + b*x^0.5 + c, with also
249
* factoring in the 255.0 mul and the scaling mul.
250
* (a is closer to actual value so has higher weight than b.)
251
* Note: the constants are magic values. They were found empirically,
252
* possibly could be improved but good enough (be VERY careful with
253
* error metric if you'd want to tweak them, they also MUST fit with
254
* the crappy polynomial above for srgb->linear since it is required
255
* that each srgb value maps back to the same value).
256
* This function has an error of max +-0.17. Not sure this is actually
257
* enough, we require +-0.6 but that may include the +-0.5 from integer
258
* conversion. Seems to pass all relevant tests though...
259
* For the approximated srgb->linear values the error is naturally larger
260
* (+-0.42) but still accurate enough (required +-0.5 essentially).
261
* All in all (including min/max clamp, conversion) 15 instructions.
262
* FMA would help (minus 2 instructions).
263
*/
264
265
LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2;
266
267
if (lp_build_fast_rsqrt_available(src_type)) {
268
tmp = lp_build_fast_rsqrt(&f32_bld, src);
269
x05 = lp_build_mul(&f32_bld, src, tmp);
270
}
271
else {
272
/*
273
* I don't really expect this to be practical without rsqrt
274
* but there's no reason for triple punishment so at least
275
* save the otherwise resulting division and unnecessary mul...
276
*/
277
x05 = lp_build_sqrt(&f32_bld, src);
278
}
279
280
tmp = lp_build_mul(&f32_bld, x05, src);
281
if (lp_build_fast_rsqrt_available(src_type)) {
282
x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp));
283
}
284
else {
285
x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp));
286
}
287
288
a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f);
289
b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f);
290
c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f);
291
292
tmp = lp_build_mul(&f32_bld, a_const, x0375);
293
tmp2 = lp_build_mad(&f32_bld, b_const, x05, c_const);
294
pow_final = lp_build_add(&f32_bld, tmp, tmp2);
295
}
296
297
/* linear part is easy */
298
lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f);
299
lin = lp_build_mul(&f32_bld, src, lin_const);
300
301
lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f);
302
is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
303
tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
304
305
if (chan_bits != 8) {
306
/* could adjust all the constants instead */
307
LLVMValueRef rescale_const = lp_build_const_vec(gallivm, src_type,
308
((1 << chan_bits) - 1) / 255.0f);
309
tmp = lp_build_mul(&f32_bld, tmp, rescale_const);
310
}
311
312
f32_bld.type.sign = 0;
313
return lp_build_iround(&f32_bld, tmp);
314
}
315
316
317
/**
318
* Convert linear float soa values to packed srgb AoS values.
319
* This only handles packed formats which are 4x8bit in size
320
* (rgba and rgbx plus swizzles), and 16bit 565-style formats
321
* with no alpha. (In the latter case the return values won't be
322
* fully packed, it will look like r5g6b5x16r5g6b5x16...)
323
*
324
* @param src float SoA (vector) values to convert.
325
*/
326
LLVMValueRef
327
lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
328
const struct util_format_description *dst_fmt,
329
struct lp_type src_type,
330
LLVMValueRef *src)
331
{
332
LLVMBuilderRef builder = gallivm->builder;
333
unsigned chan;
334
struct lp_build_context f32_bld;
335
struct lp_type int32_type = lp_int_type(src_type);
336
LLVMValueRef tmpsrgb[4], alpha, dst;
337
338
lp_build_context_init(&f32_bld, gallivm, src_type);
339
340
/* rgb is subject to linear->srgb conversion, alpha is not */
341
for (chan = 0; chan < 3; chan++) {
342
unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size;
343
tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits, src[chan]);
344
}
345
/*
346
* can't use lp_build_conv since we want to keep values as 32bit
347
* here so we can interleave with rgb to go from SoA->AoS.
348
*/
349
alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]);
350
alpha = lp_build_mul(&f32_bld, alpha,
351
lp_build_const_vec(gallivm, src_type, 255.0f));
352
tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
353
354
dst = lp_build_zero(gallivm, int32_type);
355
for (chan = 0; chan < dst_fmt->nr_channels; chan++) {
356
if (dst_fmt->swizzle[chan] <= PIPE_SWIZZLE_W) {
357
unsigned ls;
358
LLVMValueRef shifted, shift_val;
359
ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift;
360
shift_val = lp_build_const_int_vec(gallivm, int32_type, ls);
361
shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, "");
362
dst = LLVMBuildOr(builder, dst, shifted, "");
363
}
364
}
365
return dst;
366
}
367
368