Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
4565 views
1
/**************************************************************************
2
*
3
* Copyright 2009 VMware, Inc.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
16
* of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
**************************************************************************/
27
28
/**
29
* @file
30
* AoS pixel format manipulation.
31
*
32
* @author Jose Fonseca <[email protected]>
33
*/
34
35
36
#include "util/format/u_format.h"
37
#include "util/u_memory.h"
38
#include "util/u_math.h"
39
#include "util/u_pointer.h"
40
#include "util/u_string.h"
41
#include "util/u_cpu_detect.h"
42
43
#include "lp_bld_arit.h"
44
#include "lp_bld_init.h"
45
#include "lp_bld_type.h"
46
#include "lp_bld_flow.h"
47
#include "lp_bld_const.h"
48
#include "lp_bld_conv.h"
49
#include "lp_bld_swizzle.h"
50
#include "lp_bld_gather.h"
51
#include "lp_bld_debug.h"
52
#include "lp_bld_format.h"
53
#include "lp_bld_pack.h"
54
#include "lp_bld_intr.h"
55
#include "lp_bld_logic.h"
56
#include "lp_bld_bitarit.h"
57
#include "lp_bld_misc.h"
58
59
/**
60
* Basic swizzling. Rearrange the order of the unswizzled array elements
61
* according to the format description. PIPE_SWIZZLE_0/ONE are supported
62
* too.
63
* Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
64
*/
65
LLVMValueRef
66
lp_build_format_swizzle_aos(const struct util_format_description *desc,
67
struct lp_build_context *bld,
68
LLVMValueRef unswizzled)
69
{
70
unsigned char swizzles[4];
71
unsigned chan;
72
73
assert(bld->type.length % 4 == 0);
74
75
for (chan = 0; chan < 4; ++chan) {
76
enum pipe_swizzle swizzle;
77
78
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
79
/*
80
* For ZS formats do RGBA = ZZZ1
81
*/
82
if (chan == 3) {
83
swizzle = PIPE_SWIZZLE_1;
84
} else if (desc->swizzle[0] == PIPE_SWIZZLE_NONE) {
85
swizzle = PIPE_SWIZZLE_0;
86
} else {
87
swizzle = desc->swizzle[0];
88
}
89
} else {
90
swizzle = desc->swizzle[chan];
91
}
92
swizzles[chan] = swizzle;
93
}
94
95
return lp_build_swizzle_aos(bld, unswizzled, swizzles);
96
}
97
98
99
/**
100
* Whether the format matches the vector type, apart of swizzles.
101
*/
102
static inline boolean
103
format_matches_type(const struct util_format_description *desc,
104
struct lp_type type)
105
{
106
enum util_format_type chan_type;
107
unsigned chan;
108
109
assert(type.length % 4 == 0);
110
111
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
112
desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
113
desc->block.width != 1 ||
114
desc->block.height != 1) {
115
return FALSE;
116
}
117
118
if (type.floating) {
119
chan_type = UTIL_FORMAT_TYPE_FLOAT;
120
} else if (type.fixed) {
121
chan_type = UTIL_FORMAT_TYPE_FIXED;
122
} else if (type.sign) {
123
chan_type = UTIL_FORMAT_TYPE_SIGNED;
124
} else {
125
chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
126
}
127
128
for (chan = 0; chan < desc->nr_channels; ++chan) {
129
if (desc->channel[chan].size != type.width) {
130
return FALSE;
131
}
132
133
if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
134
if (desc->channel[chan].type != chan_type ||
135
desc->channel[chan].normalized != type.norm) {
136
return FALSE;
137
}
138
}
139
}
140
141
return TRUE;
142
}
143
144
/*
145
* Do rounding when converting small unorm values to larger ones.
146
* Not quite 100% accurate, as it's done by appending MSBs, but
147
* should be good enough.
148
*/
149
150
static inline LLVMValueRef
151
scale_bits_up(struct gallivm_state *gallivm,
152
int src_bits,
153
int dst_bits,
154
LLVMValueRef src,
155
struct lp_type src_type)
156
{
157
LLVMBuilderRef builder = gallivm->builder;
158
LLVMValueRef result = src;
159
160
if (src_bits == 1 && dst_bits > 1) {
161
/*
162
* Useful for a1 - we'd need quite some repeated copies otherwise.
163
*/
164
struct lp_build_context bld;
165
LLVMValueRef dst_mask;
166
lp_build_context_init(&bld, gallivm, src_type);
167
dst_mask = lp_build_const_int_vec(gallivm, src_type,
168
(1 << dst_bits) - 1),
169
result = lp_build_cmp(&bld, PIPE_FUNC_EQUAL, src,
170
lp_build_const_int_vec(gallivm, src_type, 0));
171
result = lp_build_andnot(&bld, dst_mask, result);
172
}
173
else if (dst_bits > src_bits) {
174
/* Scale up bits */
175
int db = dst_bits - src_bits;
176
177
/* Shift left by difference in bits */
178
result = LLVMBuildShl(builder,
179
src,
180
lp_build_const_int_vec(gallivm, src_type, db),
181
"");
182
183
if (db <= src_bits) {
184
/* Enough bits in src to fill the remainder */
185
LLVMValueRef lower = LLVMBuildLShr(builder,
186
src,
187
lp_build_const_int_vec(gallivm, src_type,
188
src_bits - db),
189
"");
190
191
result = LLVMBuildOr(builder, result, lower, "");
192
} else if (db > src_bits) {
193
/* Need to repeatedly copy src bits to fill remainder in dst */
194
unsigned n;
195
196
for (n = src_bits; n < dst_bits; n *= 2) {
197
LLVMValueRef shuv = lp_build_const_int_vec(gallivm, src_type, n);
198
199
result = LLVMBuildOr(builder,
200
result,
201
LLVMBuildLShr(builder, result, shuv, ""),
202
"");
203
}
204
}
205
} else {
206
assert (dst_bits == src_bits);
207
}
208
209
return result;
210
}
211
212
/**
213
* Unpack a single pixel into its XYZW components.
214
*
215
* @param desc the pixel format for the packed pixel value
216
* @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
217
*
218
* @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
219
*/
220
static inline LLVMValueRef
221
lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
222
const struct util_format_description *desc,
223
LLVMValueRef packed)
224
{
225
LLVMBuilderRef builder = gallivm->builder;
226
LLVMValueRef shifted, casted, scaled, masked;
227
LLVMValueRef shifts[4];
228
LLVMValueRef masks[4];
229
LLVMValueRef scales[4];
230
LLVMTypeRef vec32_type;
231
232
boolean normalized;
233
boolean needs_uitofp;
234
unsigned i;
235
236
/* TODO: Support more formats */
237
assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
238
assert(desc->block.width == 1);
239
assert(desc->block.height == 1);
240
assert(desc->block.bits <= 32);
241
242
/* Do the intermediate integer computations with 32bit integers since it
243
* matches floating point size */
244
assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));
245
246
vec32_type = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
247
248
/* Broadcast the packed value to all four channels
249
* before: packed = BGRA
250
* after: packed = {BGRA, BGRA, BGRA, BGRA}
251
*/
252
packed = LLVMBuildInsertElement(builder, LLVMGetUndef(vec32_type), packed,
253
LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
254
"");
255
packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(vec32_type),
256
LLVMConstNull(vec32_type),
257
"");
258
259
/* Initialize vector constants */
260
normalized = FALSE;
261
needs_uitofp = FALSE;
262
263
/* Loop over 4 color components */
264
for (i = 0; i < 4; ++i) {
265
unsigned bits = desc->channel[i].size;
266
unsigned shift = desc->channel[i].shift;
267
268
if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
269
shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
270
masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
271
scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
272
}
273
else {
274
unsigned long long mask = (1ULL << bits) - 1;
275
276
assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
277
278
if (bits == 32) {
279
needs_uitofp = TRUE;
280
}
281
282
shifts[i] = lp_build_const_int32(gallivm, shift);
283
masks[i] = lp_build_const_int32(gallivm, mask);
284
285
if (desc->channel[i].normalized) {
286
scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
287
normalized = TRUE;
288
}
289
else
290
scales[i] = lp_build_const_float(gallivm, 1.0);
291
}
292
}
293
294
/* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW}
295
* into masked = {X, Y, Z, W}
296
*/
297
if (desc->block.bits < 32 && normalized) {
298
/*
299
* Note: we cannot do the shift below on x86 natively until AVX2.
300
*
301
* Old llvm versions will resort to scalar extract/shift insert,
302
* which is definitely terrible, new versions will just do
303
* several vector shifts and shuffle/blend results together.
304
* We could turn this into a variable left shift plus a constant
305
* right shift, and llvm would then turn the variable left shift
306
* into a mul for us (albeit without sse41 the mul needs emulation
307
* too...). However, since we're going to do a float mul
308
* anyway, we just adjust that mul instead (plus the mask), skipping
309
* the shift completely.
310
* We could also use a extra mul when the format isn't normalized and
311
* we don't have AVX2 support, but don't bother for now. Unfortunately,
312
* this strategy doesn't work for 32bit formats (such as rgb10a2 or even
313
* rgba8 if it ends up here), as that would require UIToFP, albeit that
314
* would be fixable with easy 16bit shuffle (unless there's channels
315
* crossing 16bit boundaries).
316
*/
317
for (i = 0; i < 4; ++i) {
318
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
319
unsigned bits = desc->channel[i].size;
320
unsigned shift = desc->channel[i].shift;
321
unsigned long long mask = ((1ULL << bits) - 1) << shift;
322
scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
323
masks[i] = lp_build_const_int32(gallivm, mask);
324
}
325
}
326
masked = LLVMBuildAnd(builder, packed, LLVMConstVector(masks, 4), "");
327
} else {
328
shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
329
masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
330
}
331
332
if (!needs_uitofp) {
333
/* UIToFP can't be expressed in SSE2 */
334
casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
335
} else {
336
casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
337
}
338
339
/*
340
* At this point 'casted' may be a vector of floats such as
341
* {255.0, 255.0, 255.0, 255.0}. (Normalized values may be multiplied
342
* by powers of two). Next, if the pixel values are normalized
343
* we'll scale this to {1.0, 1.0, 1.0, 1.0}.
344
*/
345
346
if (normalized)
347
scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
348
else
349
scaled = casted;
350
351
return scaled;
352
}
353
354
355
/**
356
* Pack a single pixel.
357
*
358
* @param rgba 4 float vector with the unpacked components.
359
*
360
* XXX: This is mostly for reference and testing -- operating a single pixel at
361
* a time is rarely if ever needed.
362
*/
363
LLVMValueRef
364
lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
365
const struct util_format_description *desc,
366
LLVMValueRef rgba)
367
{
368
LLVMBuilderRef builder = gallivm->builder;
369
LLVMTypeRef type;
370
LLVMValueRef packed = NULL;
371
LLVMValueRef swizzles[4];
372
LLVMValueRef shifted, casted, scaled, unswizzled;
373
LLVMValueRef shifts[4];
374
LLVMValueRef scales[4];
375
boolean normalized;
376
unsigned i, j;
377
378
assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
379
assert(desc->block.width == 1);
380
assert(desc->block.height == 1);
381
382
type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);
383
384
/* Unswizzle the color components into the source vector. */
385
for (i = 0; i < 4; ++i) {
386
for (j = 0; j < 4; ++j) {
387
if (desc->swizzle[j] == i)
388
break;
389
}
390
if (j < 4)
391
swizzles[i] = lp_build_const_int32(gallivm, j);
392
else
393
swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
394
}
395
396
unswizzled = LLVMBuildShuffleVector(builder, rgba,
397
LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
398
LLVMConstVector(swizzles, 4), "");
399
400
normalized = FALSE;
401
for (i = 0; i < 4; ++i) {
402
unsigned bits = desc->channel[i].size;
403
unsigned shift = desc->channel[i].shift;
404
405
if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
406
shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
407
scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
408
}
409
else {
410
unsigned mask = (1 << bits) - 1;
411
412
assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
413
assert(bits < 32);
414
415
shifts[i] = lp_build_const_int32(gallivm, shift);
416
417
if (desc->channel[i].normalized) {
418
scales[i] = lp_build_const_float(gallivm, mask);
419
normalized = TRUE;
420
}
421
else
422
scales[i] = lp_build_const_float(gallivm, 1.0);
423
}
424
}
425
426
if (normalized)
427
scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
428
else
429
scaled = unswizzled;
430
431
casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");
432
433
shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
434
435
/* Bitwise or all components */
436
for (i = 0; i < 4; ++i) {
437
if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
438
LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
439
lp_build_const_int32(gallivm, i), "");
440
if (packed)
441
packed = LLVMBuildOr(builder, packed, component, "");
442
else
443
packed = component;
444
}
445
}
446
447
if (!packed)
448
packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
449
450
if (desc->block.bits < 32)
451
packed = LLVMBuildTrunc(builder, packed, type, "");
452
453
return packed;
454
}
455
456
457
458
459
/**
460
* Fetch a pixel into a 4 float AoS.
461
*
462
* \param format_desc describes format of the image we're fetching from
463
* \param aligned whether the data is guaranteed to be aligned
464
* \param ptr address of the pixel block (or the texel if uncompressed)
465
* \param i, j the sub-block pixel coordinates. For non-compressed formats
466
* these will always be (0, 0).
467
* \param cache optional value pointing to a lp_build_format_cache structure
468
* \return a 4 element vector with the pixel's RGBA values.
469
*/
470
LLVMValueRef
471
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
472
const struct util_format_description *format_desc,
473
struct lp_type type,
474
boolean aligned,
475
LLVMValueRef base_ptr,
476
LLVMValueRef offset,
477
LLVMValueRef i,
478
LLVMValueRef j,
479
LLVMValueRef cache)
480
{
481
const struct util_format_unpack_description *unpack =
482
util_format_unpack_description(format_desc->format);
483
LLVMBuilderRef builder = gallivm->builder;
484
unsigned num_pixels = type.length / 4;
485
struct lp_build_context bld;
486
487
assert(type.length <= LP_MAX_VECTOR_LENGTH);
488
assert(type.length % 4 == 0);
489
490
lp_build_context_init(&bld, gallivm, type);
491
492
/*
493
* Trivial case
494
*
495
* The format matches the type (apart of a swizzle) so no need for
496
* scaling or converting.
497
*/
498
499
if (format_matches_type(format_desc, type) &&
500
format_desc->block.bits <= type.width * 4 &&
501
/* XXX this shouldn't be needed */
502
util_is_power_of_two_or_zero(format_desc->block.bits)) {
503
LLVMValueRef packed;
504
LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);
505
struct lp_type fetch_type;
506
unsigned vec_len = type.width * type.length;
507
508
/*
509
* The format matches the type (apart of a swizzle) so no need for
510
* scaling or converting.
511
*/
512
513
fetch_type = lp_type_uint(type.width*4);
514
packed = lp_build_gather(gallivm, type.length/4,
515
format_desc->block.bits, fetch_type,
516
aligned, base_ptr, offset, TRUE);
517
518
assert(format_desc->block.bits <= vec_len);
519
(void) vec_len; /* silence unused var warning for non-debug build */
520
521
packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
522
return lp_build_format_swizzle_aos(format_desc, &bld, packed);
523
}
524
525
/*
526
* Bit arithmetic for converting small_unorm to unorm8.
527
*
528
* This misses some opportunities for optimizations (like skipping mask
529
* for the highest channel for instance, or doing bit scaling in parallel
530
* for channels with the same bit width) but it should be passable for
531
* all arithmetic formats.
532
*/
533
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
534
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
535
util_format_fits_8unorm(format_desc) &&
536
type.width == 8 && type.norm == 1 && type.sign == 0 &&
537
type.fixed == 0 && type.floating == 0) {
538
LLVMValueRef packed, res = NULL, chans[4], rgba[4];
539
LLVMTypeRef dst_vec_type, conv_vec_type;
540
struct lp_type fetch_type, conv_type;
541
struct lp_build_context bld_conv;
542
unsigned j;
543
544
fetch_type = lp_type_uint(type.width*4);
545
conv_type = lp_type_int_vec(type.width*4, type.width * type.length);
546
dst_vec_type = lp_build_vec_type(gallivm, type);
547
conv_vec_type = lp_build_vec_type(gallivm, conv_type);
548
lp_build_context_init(&bld_conv, gallivm, conv_type);
549
550
packed = lp_build_gather(gallivm, type.length/4,
551
format_desc->block.bits, fetch_type,
552
aligned, base_ptr, offset, TRUE);
553
554
assert(format_desc->block.bits * type.length / 4 <=
555
type.width * type.length);
556
557
packed = LLVMBuildBitCast(gallivm->builder, packed, conv_vec_type, "");
558
559
for (j = 0; j < format_desc->nr_channels; ++j) {
560
unsigned mask = 0;
561
unsigned sa = format_desc->channel[j].shift;
562
563
mask = (1 << format_desc->channel[j].size) - 1;
564
565
/* Extract bits from source */
566
chans[j] = LLVMBuildLShr(builder, packed,
567
lp_build_const_int_vec(gallivm, conv_type, sa),
568
"");
569
570
chans[j] = LLVMBuildAnd(builder, chans[j],
571
lp_build_const_int_vec(gallivm, conv_type, mask),
572
"");
573
574
/* Scale bits */
575
if (type.norm) {
576
chans[j] = scale_bits_up(gallivm, format_desc->channel[j].size,
577
type.width, chans[j], conv_type);
578
}
579
}
580
/*
581
* This is a hacked lp_build_format_swizzle_soa() since we need a
582
* normalized 1 but only 8 bits in a 32bit vector...
583
*/
584
for (j = 0; j < 4; ++j) {
585
enum pipe_swizzle swizzle = format_desc->swizzle[j];
586
if (swizzle == PIPE_SWIZZLE_1) {
587
rgba[j] = lp_build_const_int_vec(gallivm, conv_type, (1 << type.width) - 1);
588
} else {
589
rgba[j] = lp_build_swizzle_soa_channel(&bld_conv, chans, swizzle);
590
}
591
if (j == 0) {
592
res = rgba[j];
593
} else {
594
rgba[j] = LLVMBuildShl(builder, rgba[j],
595
lp_build_const_int_vec(gallivm, conv_type,
596
j * type.width), "");
597
res = LLVMBuildOr(builder, res, rgba[j], "");
598
}
599
}
600
res = LLVMBuildBitCast(gallivm->builder, res, dst_vec_type, "");
601
602
return res;
603
}
604
605
/*
606
* Bit arithmetic
607
*/
608
609
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
610
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
611
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
612
format_desc->block.width == 1 &&
613
format_desc->block.height == 1 &&
614
/* XXX this shouldn't be needed */
615
util_is_power_of_two_or_zero(format_desc->block.bits) &&
616
format_desc->block.bits <= 32 &&
617
format_desc->is_bitmask &&
618
!format_desc->is_mixed &&
619
(format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
620
format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) &&
621
!format_desc->channel[0].pure_integer) {
622
623
LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
624
LLVMValueRef res[LP_MAX_VECTOR_WIDTH / 128];
625
struct lp_type conv_type;
626
unsigned k, num_conv_src, num_conv_dst;
627
628
/*
629
* Note this path is generally terrible for fetching multiple pixels.
630
* We should make sure we cannot hit this code path for anything but
631
* single pixels.
632
*/
633
634
/*
635
* Unpack a pixel at a time into a <4 x float> RGBA vector
636
*/
637
638
for (k = 0; k < num_pixels; ++k) {
639
LLVMValueRef packed;
640
641
packed = lp_build_gather_elem(gallivm, num_pixels,
642
format_desc->block.bits, 32, aligned,
643
base_ptr, offset, k, FALSE);
644
645
tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
646
format_desc,
647
packed);
648
}
649
650
/*
651
* Type conversion.
652
*
653
* TODO: We could avoid floating conversion for integer to
654
* integer conversions.
655
*/
656
657
if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
658
debug_printf("%s: unpacking %s with floating point\n",
659
__FUNCTION__, format_desc->short_name);
660
}
661
662
conv_type = lp_float32_vec4_type();
663
num_conv_src = num_pixels;
664
num_conv_dst = 1;
665
666
if (num_pixels % 8 == 0) {
667
lp_build_concat_n(gallivm, lp_float32_vec4_type(),
668
tmps, num_pixels, tmps, num_pixels / 2);
669
conv_type.length *= num_pixels / 4;
670
num_conv_src = 4 * num_pixels / 8;
671
if (type.width == 8 && type.floating == 0 && type.fixed == 0) {
672
/*
673
* FIXME: The fast float->unorm path (which is basically
674
* skipping the MIN/MAX which are extremely pointless in any
675
* case) requires that there's 2 destinations...
676
* In any case, we really should make sure we don't hit this
677
* code with multiple pixels for unorm8 dst types, it's
678
* completely hopeless even if we do hit the right conversion.
679
*/
680
type.length /= num_pixels / 4;
681
num_conv_dst = num_pixels / 4;
682
}
683
}
684
685
lp_build_conv(gallivm, conv_type, type,
686
tmps, num_conv_src, res, num_conv_dst);
687
688
if (num_pixels % 8 == 0 &&
689
(type.width == 8 && type.floating == 0 && type.fixed == 0)) {
690
lp_build_concat_n(gallivm, type, res, num_conv_dst, res, 1);
691
}
692
693
return lp_build_format_swizzle_aos(format_desc, &bld, res[0]);
694
}
695
696
/* If all channels are of same type and we are not using half-floats */
697
if (format_desc->is_array &&
698
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) {
699
assert(!format_desc->is_mixed);
700
return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);
701
}
702
703
/*
704
* YUV / subsampled formats
705
*/
706
707
if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
708
struct lp_type tmp_type;
709
LLVMValueRef tmp;
710
711
memset(&tmp_type, 0, sizeof tmp_type);
712
tmp_type.width = 8;
713
tmp_type.length = num_pixels * 4;
714
tmp_type.norm = TRUE;
715
716
tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
717
format_desc,
718
num_pixels,
719
base_ptr,
720
offset,
721
i, j);
722
723
lp_build_conv(gallivm,
724
tmp_type, type,
725
&tmp, 1, &tmp, 1);
726
727
return tmp;
728
}
729
730
/*
731
* s3tc rgb formats
732
*/
733
734
if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
735
struct lp_type tmp_type;
736
LLVMValueRef tmp;
737
738
memset(&tmp_type, 0, sizeof tmp_type);
739
tmp_type.width = 8;
740
tmp_type.length = num_pixels * 4;
741
tmp_type.norm = TRUE;
742
743
tmp = lp_build_fetch_s3tc_rgba_aos(gallivm,
744
format_desc,
745
num_pixels,
746
base_ptr,
747
offset,
748
i, j,
749
cache);
750
751
lp_build_conv(gallivm,
752
tmp_type, type,
753
&tmp, 1, &tmp, 1);
754
755
return tmp;
756
}
757
758
/*
759
* rgtc rgb formats
760
*/
761
762
if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
763
struct lp_type tmp_type;
764
LLVMValueRef tmp;
765
766
memset(&tmp_type, 0, sizeof tmp_type);
767
tmp_type.width = 8;
768
tmp_type.length = num_pixels * 4;
769
tmp_type.norm = TRUE;
770
tmp_type.sign = (format_desc->format == PIPE_FORMAT_RGTC1_SNORM ||
771
format_desc->format == PIPE_FORMAT_RGTC2_SNORM ||
772
format_desc->format == PIPE_FORMAT_LATC1_SNORM ||
773
format_desc->format == PIPE_FORMAT_LATC2_SNORM);
774
775
tmp = lp_build_fetch_rgtc_rgba_aos(gallivm,
776
format_desc,
777
num_pixels,
778
base_ptr,
779
offset,
780
i, j,
781
cache);
782
783
lp_build_conv(gallivm,
784
tmp_type, type,
785
&tmp, 1, &tmp, 1);
786
787
return tmp;
788
}
789
790
/*
791
* Fallback to util_format_description::fetch_rgba_8unorm().
792
*/
793
794
if (unpack->fetch_rgba_8unorm &&
795
!type.floating && type.width == 8 && !type.sign && type.norm) {
796
/*
797
* Fallback to calling util_format_description::fetch_rgba_8unorm.
798
*
799
* This is definitely not the most efficient way of fetching pixels, as
800
* we miss the opportunity to do vectorization, but this it is a
801
* convenient for formats or scenarios for which there was no opportunity
802
* or incentive to optimize.
803
*/
804
805
LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
806
LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
807
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
808
LLVMValueRef function;
809
LLVMValueRef tmp_ptr;
810
LLVMValueRef tmp;
811
LLVMValueRef res;
812
unsigned k;
813
814
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
815
debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
816
__FUNCTION__, format_desc->short_name);
817
}
818
819
/*
820
* Declare and bind format_desc->fetch_rgba_8unorm().
821
*/
822
823
{
824
/*
825
* Function to call looks like:
826
* fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
827
*/
828
LLVMTypeRef ret_type;
829
LLVMTypeRef arg_types[4];
830
LLVMTypeRef function_type;
831
832
ret_type = LLVMVoidTypeInContext(gallivm->context);
833
arg_types[0] = pi8t;
834
arg_types[1] = pi8t;
835
arg_types[2] = i32t;
836
arg_types[3] = i32t;
837
function_type = LLVMFunctionType(ret_type, arg_types,
838
ARRAY_SIZE(arg_types), 0);
839
840
if (gallivm->cache)
841
gallivm->cache->dont_cache = true;
842
/* make const pointer for the C fetch_rgba_8unorm function */
843
function = lp_build_const_int_pointer(gallivm,
844
func_to_pointer((func_pointer) unpack->fetch_rgba_8unorm));
845
846
/* cast the callee pointer to the function's type */
847
function = LLVMBuildBitCast(builder, function,
848
LLVMPointerType(function_type, 0),
849
"cast callee");
850
}
851
852
tmp_ptr = lp_build_alloca(gallivm, i32t, "");
853
854
res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
855
856
/*
857
* Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
858
* in the SoA vectors.
859
*/
860
861
for (k = 0; k < num_pixels; ++k) {
862
LLVMValueRef index = lp_build_const_int32(gallivm, k);
863
LLVMValueRef args[4];
864
865
args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
866
args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
867
base_ptr, offset, k);
868
869
if (num_pixels == 1) {
870
args[2] = i;
871
args[3] = j;
872
}
873
else {
874
args[2] = LLVMBuildExtractElement(builder, i, index, "");
875
args[3] = LLVMBuildExtractElement(builder, j, index, "");
876
}
877
878
LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
879
880
tmp = LLVMBuildLoad(builder, tmp_ptr, "");
881
882
if (num_pixels == 1) {
883
res = tmp;
884
}
885
else {
886
res = LLVMBuildInsertElement(builder, res, tmp, index, "");
887
}
888
}
889
890
/* Bitcast from <n x i32> to <4n x i8> */
891
res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
892
893
return res;
894
}
895
896
/*
897
* Fallback to fetch_rgba().
898
*/
899
900
util_format_fetch_rgba_func_ptr fetch_rgba =
901
util_format_fetch_rgba_func(format_desc->format);
902
if (fetch_rgba) {
903
/*
904
* Fallback to calling util_format_description::fetch_rgba_float.
905
*
906
* This is definitely not the most efficient way of fetching pixels, as
907
* we miss the opportunity to do vectorization, but this it is a
908
* convenient for formats or scenarios for which there was no opportunity
909
* or incentive to optimize.
910
*/
911
912
LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
913
LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
914
LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
915
LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
916
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
917
LLVMValueRef function;
918
LLVMValueRef tmp_ptr;
919
LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
920
LLVMValueRef res;
921
unsigned k;
922
923
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
924
debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
925
__FUNCTION__, format_desc->short_name);
926
}
927
928
/*
929
* Declare and bind unpack->fetch_rgba_float().
930
*/
931
932
{
933
/*
934
* Function to call looks like:
935
* fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
936
*/
937
LLVMTypeRef ret_type;
938
LLVMTypeRef arg_types[4];
939
940
ret_type = LLVMVoidTypeInContext(gallivm->context);
941
arg_types[0] = pf32t;
942
arg_types[1] = pi8t;
943
arg_types[2] = i32t;
944
arg_types[3] = i32t;
945
946
if (gallivm->cache)
947
gallivm->cache->dont_cache = true;
948
function = lp_build_const_func_pointer(gallivm,
949
func_to_pointer((func_pointer) fetch_rgba),
950
ret_type,
951
arg_types, ARRAY_SIZE(arg_types),
952
format_desc->short_name);
953
}
954
955
tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
956
957
/*
958
* Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
959
* in the SoA vectors.
960
*/
961
962
for (k = 0; k < num_pixels; ++k) {
963
LLVMValueRef args[4];
964
965
args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
966
args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
967
base_ptr, offset, k);
968
969
if (num_pixels == 1) {
970
args[2] = i;
971
args[3] = j;
972
}
973
else {
974
LLVMValueRef index = lp_build_const_int32(gallivm, k);
975
args[2] = LLVMBuildExtractElement(builder, i, index, "");
976
args[3] = LLVMBuildExtractElement(builder, j, index, "");
977
}
978
979
LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
980
981
tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
982
}
983
984
lp_build_conv(gallivm,
985
lp_float32_vec4_type(),
986
type,
987
tmps, num_pixels, &res, 1);
988
989
return res;
990
}
991
992
assert(!util_format_is_pure_integer(format_desc->format));
993
994
assert(0);
995
return lp_build_undef(gallivm, type);
996
}
997
998