CoCalc -- lp_bld_format

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
⁴⁵⁶⁵ views
1
/**************************************************************************
2
 *
3
 * Copyright 2009 VMware, Inc.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27

28
/**
29
 * @file
30
 * AoS pixel format manipulation.
31
 *
32
 * @author Jose Fonseca <[email protected]>
33
 */
34

35

36
#include "util/format/u_format.h"
37
#include "util/u_memory.h"
38
#include "util/u_math.h"
39
#include "util/u_pointer.h"
40
#include "util/u_string.h"
41
#include "util/u_cpu_detect.h"
42

43
#include "lp_bld_arit.h"
44
#include "lp_bld_init.h"
45
#include "lp_bld_type.h"
46
#include "lp_bld_flow.h"
47
#include "lp_bld_const.h"
48
#include "lp_bld_conv.h"
49
#include "lp_bld_swizzle.h"
50
#include "lp_bld_gather.h"
51
#include "lp_bld_debug.h"
52
#include "lp_bld_format.h"
53
#include "lp_bld_pack.h"
54
#include "lp_bld_intr.h"
55
#include "lp_bld_logic.h"
56
#include "lp_bld_bitarit.h"
57
#include "lp_bld_misc.h"
58

59
/**
60
 * Basic swizzling.  Rearrange the order of the unswizzled array elements
61
 * according to the format description.  PIPE_SWIZZLE_0/ONE are supported
62
 * too.
63
 * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
64
 */
65
LLVMValueRef
66
lp_build_format_swizzle_aos(const struct util_format_description *desc,
67
                            struct lp_build_context *bld,
68
                            LLVMValueRef unswizzled)
69
{
70
   unsigned char swizzles[4];
71
   unsigned chan;
72

73
   assert(bld->type.length % 4 == 0);
74

75
   for (chan = 0; chan < 4; ++chan) {
76
      enum pipe_swizzle swizzle;
77

78
      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
79
         /*
80
          * For ZS formats do RGBA = ZZZ1
81
          */
82
         if (chan == 3) {
83
            swizzle = PIPE_SWIZZLE_1;
84
         } else if (desc->swizzle[0] == PIPE_SWIZZLE_NONE) {
85
            swizzle = PIPE_SWIZZLE_0;
86
         } else {
87
            swizzle = desc->swizzle[0];
88
         }
89
      } else {
90
         swizzle = desc->swizzle[chan];
91
      }
92
      swizzles[chan] = swizzle;
93
   }
94

95
   return lp_build_swizzle_aos(bld, unswizzled, swizzles);
96
}
97

98

99
/**
100
 * Whether the format matches the vector type, apart of swizzles.
101
 */
102
static inline boolean
103
format_matches_type(const struct util_format_description *desc,
104
                    struct lp_type type)
105
{
106
   enum util_format_type chan_type;
107
   unsigned chan;
108

109
   assert(type.length % 4 == 0);
110

111
   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
112
       desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
113
       desc->block.width != 1 ||
114
       desc->block.height != 1) {
115
      return FALSE;
116
   }
117

118
   if (type.floating) {
119
      chan_type = UTIL_FORMAT_TYPE_FLOAT;
120
   } else if (type.fixed) {
121
      chan_type = UTIL_FORMAT_TYPE_FIXED;
122
   } else if (type.sign) {
123
      chan_type = UTIL_FORMAT_TYPE_SIGNED;
124
   } else {
125
      chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
126
   }
127

128
   for (chan = 0; chan < desc->nr_channels; ++chan) {
129
      if (desc->channel[chan].size != type.width) {
130
         return FALSE;
131
      }
132

133
      if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
134
         if (desc->channel[chan].type != chan_type ||
135
             desc->channel[chan].normalized != type.norm) {
136
            return FALSE;
137
         }
138
      }
139
   }
140

141
   return TRUE;
142
}
143

144
/*
145
 * Do rounding when converting small unorm values to larger ones.
146
 * Not quite 100% accurate, as it's done by appending MSBs, but
147
 * should be good enough.
148
 */
149

150
static inline LLVMValueRef
151
scale_bits_up(struct gallivm_state *gallivm,
152
              int src_bits,
153
              int dst_bits,
154
              LLVMValueRef src,
155
              struct lp_type src_type)
156
{
157
   LLVMBuilderRef builder = gallivm->builder;
158
   LLVMValueRef result = src;
159

160
   if (src_bits == 1 && dst_bits > 1) {
161
      /*
162
       * Useful for a1 - we'd need quite some repeated copies otherwise.
163
       */
164
      struct lp_build_context bld;
165
      LLVMValueRef dst_mask;
166
      lp_build_context_init(&bld, gallivm, src_type);
167
      dst_mask = lp_build_const_int_vec(gallivm, src_type,
168
                                        (1 << dst_bits) - 1),
169
      result = lp_build_cmp(&bld, PIPE_FUNC_EQUAL, src,
170
                            lp_build_const_int_vec(gallivm, src_type, 0));
171
      result = lp_build_andnot(&bld, dst_mask, result);
172
   }
173
   else if (dst_bits > src_bits) {
174
      /* Scale up bits */
175
      int db = dst_bits - src_bits;
176

177
      /* Shift left by difference in bits */
178
      result = LLVMBuildShl(builder,
179
                            src,
180
                            lp_build_const_int_vec(gallivm, src_type, db),
181
                            "");
182

183
      if (db <= src_bits) {
184
         /* Enough bits in src to fill the remainder */
185
         LLVMValueRef lower = LLVMBuildLShr(builder,
186
                                            src,
187
                                            lp_build_const_int_vec(gallivm, src_type,
188
                                                                   src_bits - db),
189
                                            "");
190

191
         result = LLVMBuildOr(builder, result, lower, "");
192
      } else if (db > src_bits) {
193
         /* Need to repeatedly copy src bits to fill remainder in dst */
194
         unsigned n;
195

196
         for (n = src_bits; n < dst_bits; n *= 2) {
197
            LLVMValueRef shuv = lp_build_const_int_vec(gallivm, src_type, n);
198

199
            result = LLVMBuildOr(builder,
200
                                 result,
201
                                 LLVMBuildLShr(builder, result, shuv, ""),
202
                                 "");
203
         }
204
      }
205
   } else {
206
      assert (dst_bits == src_bits);
207
   }
208

209
   return result;
210
}
211

212
/**
213
 * Unpack a single pixel into its XYZW components.
214
 *
215
 * @param desc  the pixel format for the packed pixel value
216
 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
217
 *
218
 * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
219
 */
220
static inline LLVMValueRef
221
lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
222
                               const struct util_format_description *desc,
223
                               LLVMValueRef packed)
224
{
225
   LLVMBuilderRef builder = gallivm->builder;
226
   LLVMValueRef shifted, casted, scaled, masked;
227
   LLVMValueRef shifts[4];
228
   LLVMValueRef masks[4];
229
   LLVMValueRef scales[4];
230
   LLVMTypeRef vec32_type;
231

232
   boolean normalized;
233
   boolean needs_uitofp;
234
   unsigned i;
235

236
   /* TODO: Support more formats */
237
   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
238
   assert(desc->block.width == 1);
239
   assert(desc->block.height == 1);
240
   assert(desc->block.bits <= 32);
241

242
   /* Do the intermediate integer computations with 32bit integers since it
243
    * matches floating point size */
244
   assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));
245

246
   vec32_type = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
247

248
   /* Broadcast the packed value to all four channels
249
    * before: packed = BGRA
250
    * after: packed = {BGRA, BGRA, BGRA, BGRA}
251
    */
252
   packed = LLVMBuildInsertElement(builder, LLVMGetUndef(vec32_type), packed,
253
                                   LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
254
                                   "");
255
   packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(vec32_type),
256
                                   LLVMConstNull(vec32_type),
257
                                   "");
258

259
   /* Initialize vector constants */
260
   normalized = FALSE;
261
   needs_uitofp = FALSE;
262

263
   /* Loop over 4 color components */
264
   for (i = 0; i < 4; ++i) {
265
      unsigned bits = desc->channel[i].size;
266
      unsigned shift = desc->channel[i].shift;
267

268
      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
269
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
270
         masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
271
         scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
272
      }
273
      else {
274
         unsigned long long mask = (1ULL << bits) - 1;
275

276
         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
277

278
         if (bits == 32) {
279
            needs_uitofp = TRUE;
280
         }
281

282
         shifts[i] = lp_build_const_int32(gallivm, shift);
283
         masks[i] = lp_build_const_int32(gallivm, mask);
284

285
         if (desc->channel[i].normalized) {
286
            scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
287
            normalized = TRUE;
288
         }
289
         else
290
            scales[i] =  lp_build_const_float(gallivm, 1.0);
291
      }
292
   }
293

294
   /* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW}
295
    * into masked = {X, Y, Z, W}
296
    */
297
   if (desc->block.bits < 32 && normalized) {
298
      /*
299
       * Note: we cannot do the shift below on x86 natively until AVX2.
300
       *
301
       * Old llvm versions will resort to scalar extract/shift insert,
302
       * which is definitely terrible, new versions will just do
303
       * several vector shifts and shuffle/blend results together.
304
       * We could turn this into a variable left shift plus a constant
305
       * right shift, and llvm would then turn the variable left shift
306
       * into a mul for us (albeit without sse41 the mul needs emulation
307
       * too...). However, since we're going to do a float mul
308
       * anyway, we just adjust that mul instead (plus the mask), skipping
309
       * the shift completely.
310
       * We could also use a extra mul when the format isn't normalized and
311
       * we don't have AVX2 support, but don't bother for now. Unfortunately,
312
       * this strategy doesn't work for 32bit formats (such as rgb10a2 or even
313
       * rgba8 if it ends up here), as that would require UIToFP, albeit that
314
       * would be fixable with easy 16bit shuffle (unless there's channels
315
       * crossing 16bit boundaries).
316
       */
317
      for (i = 0; i < 4; ++i) {
318
         if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
319
            unsigned bits = desc->channel[i].size;
320
            unsigned shift = desc->channel[i].shift;
321
            unsigned long long mask = ((1ULL << bits) - 1) << shift;
322
            scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
323
            masks[i] = lp_build_const_int32(gallivm, mask);
324
         }
325
      }
326
      masked = LLVMBuildAnd(builder, packed, LLVMConstVector(masks, 4), "");
327
   } else {
328
      shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
329
      masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
330
   }
331

332
   if (!needs_uitofp) {
333
      /* UIToFP can't be expressed in SSE2 */
334
      casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
335
   } else {
336
      casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
337
   }
338

339
   /*
340
    * At this point 'casted' may be a vector of floats such as
341
    * {255.0, 255.0, 255.0, 255.0}. (Normalized values may be multiplied
342
    * by powers of two). Next, if the pixel values are normalized
343
    * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
344
    */
345

346
   if (normalized)
347
      scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
348
   else
349
      scaled = casted;
350

351
   return scaled;
352
}
353

354

355
/**
356
 * Pack a single pixel.
357
 *
358
 * @param rgba 4 float vector with the unpacked components.
359
 *
360
 * XXX: This is mostly for reference and testing -- operating a single pixel at
361
 * a time is rarely if ever needed.
362
 */
363
LLVMValueRef
364
lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
365
                       const struct util_format_description *desc,
366
                       LLVMValueRef rgba)
367
{
368
   LLVMBuilderRef builder = gallivm->builder;
369
   LLVMTypeRef type;
370
   LLVMValueRef packed = NULL;
371
   LLVMValueRef swizzles[4];
372
   LLVMValueRef shifted, casted, scaled, unswizzled;
373
   LLVMValueRef shifts[4];
374
   LLVMValueRef scales[4];
375
   boolean normalized;
376
   unsigned i, j;
377

378
   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
379
   assert(desc->block.width == 1);
380
   assert(desc->block.height == 1);
381

382
   type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);
383

384
   /* Unswizzle the color components into the source vector. */
385
   for (i = 0; i < 4; ++i) {
386
      for (j = 0; j < 4; ++j) {
387
         if (desc->swizzle[j] == i)
388
            break;
389
      }
390
      if (j < 4)
391
         swizzles[i] = lp_build_const_int32(gallivm, j);
392
      else
393
         swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
394
   }
395

396
   unswizzled = LLVMBuildShuffleVector(builder, rgba,
397
                                       LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
398
                                       LLVMConstVector(swizzles, 4), "");
399

400
   normalized = FALSE;
401
   for (i = 0; i < 4; ++i) {
402
      unsigned bits = desc->channel[i].size;
403
      unsigned shift = desc->channel[i].shift;
404

405
      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
406
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
407
         scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
408
      }
409
      else {
410
         unsigned mask = (1 << bits) - 1;
411

412
         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
413
         assert(bits < 32);
414

415
         shifts[i] = lp_build_const_int32(gallivm, shift);
416

417
         if (desc->channel[i].normalized) {
418
            scales[i] = lp_build_const_float(gallivm, mask);
419
            normalized = TRUE;
420
         }
421
         else
422
            scales[i] = lp_build_const_float(gallivm, 1.0);
423
      }
424
   }
425

426
   if (normalized)
427
      scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
428
   else
429
      scaled = unswizzled;
430

431
   casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");
432

433
   shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
434
   
435
   /* Bitwise or all components */
436
   for (i = 0; i < 4; ++i) {
437
      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
438
         LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
439
                                               lp_build_const_int32(gallivm, i), "");
440
         if (packed)
441
            packed = LLVMBuildOr(builder, packed, component, "");
442
         else
443
            packed = component;
444
      }
445
   }
446

447
   if (!packed)
448
      packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
449

450
   if (desc->block.bits < 32)
451
      packed = LLVMBuildTrunc(builder, packed, type, "");
452

453
   return packed;
454
}
455

456

457

458

459
/**
460
 * Fetch a pixel into a 4 float AoS.
461
 *
462
 * \param format_desc  describes format of the image we're fetching from
463
 * \param aligned  whether the data is guaranteed to be aligned
464
 * \param ptr  address of the pixel block (or the texel if uncompressed)
465
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
466
 *              these will always be (0, 0).
467
 * \param cache  optional value pointing to a lp_build_format_cache structure
468
 * \return  a 4 element vector with the pixel's RGBA values.
469
 */
470
LLVMValueRef
471
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
472
                        const struct util_format_description *format_desc,
473
                        struct lp_type type,
474
                        boolean aligned,
475
                        LLVMValueRef base_ptr,
476
                        LLVMValueRef offset,
477
                        LLVMValueRef i,
478
                        LLVMValueRef j,
479
                        LLVMValueRef cache)
480
{
481
   const struct util_format_unpack_description *unpack =
482
      util_format_unpack_description(format_desc->format);
483
   LLVMBuilderRef builder = gallivm->builder;
484
   unsigned num_pixels = type.length / 4;
485
   struct lp_build_context bld;
486

487
   assert(type.length <= LP_MAX_VECTOR_LENGTH);
488
   assert(type.length % 4 == 0);
489

490
   lp_build_context_init(&bld, gallivm, type);
491

492
   /*
493
    * Trivial case
494
    *
495
    * The format matches the type (apart of a swizzle) so no need for
496
    * scaling or converting.
497
    */
498

499
   if (format_matches_type(format_desc, type) &&
500
       format_desc->block.bits <= type.width * 4 &&
501
       /* XXX this shouldn't be needed */
502
       util_is_power_of_two_or_zero(format_desc->block.bits)) {
503
      LLVMValueRef packed;
504
      LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);
505
      struct lp_type fetch_type;
506
      unsigned vec_len = type.width * type.length;
507

508
      /*
509
       * The format matches the type (apart of a swizzle) so no need for
510
       * scaling or converting.
511
       */
512

513
      fetch_type = lp_type_uint(type.width*4);
514
      packed = lp_build_gather(gallivm, type.length/4,
515
                               format_desc->block.bits, fetch_type,
516
                               aligned, base_ptr, offset, TRUE);
517

518
      assert(format_desc->block.bits <= vec_len);
519
      (void) vec_len; /* silence unused var warning for non-debug build */
520

521
      packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
522
      return lp_build_format_swizzle_aos(format_desc, &bld, packed);
523
   }
524

525
   /*
526
    * Bit arithmetic for converting small_unorm to unorm8.
527
    *
528
    * This misses some opportunities for optimizations (like skipping mask
529
    * for the highest channel for instance, or doing bit scaling in parallel
530
    * for channels with the same bit width) but it should be passable for
531
    * all arithmetic formats.
532
    */
533
   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
534
       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
535
       util_format_fits_8unorm(format_desc) &&
536
       type.width == 8 && type.norm == 1 && type.sign == 0 &&
537
       type.fixed == 0 && type.floating == 0) {
538
      LLVMValueRef packed, res = NULL, chans[4], rgba[4];
539
      LLVMTypeRef dst_vec_type, conv_vec_type;
540
      struct lp_type fetch_type, conv_type;
541
      struct lp_build_context bld_conv;
542
      unsigned j;
543

544
      fetch_type = lp_type_uint(type.width*4);
545
      conv_type = lp_type_int_vec(type.width*4, type.width * type.length);
546
      dst_vec_type = lp_build_vec_type(gallivm, type);
547
      conv_vec_type = lp_build_vec_type(gallivm, conv_type);
548
      lp_build_context_init(&bld_conv, gallivm, conv_type);
549

550
      packed = lp_build_gather(gallivm, type.length/4,
551
                               format_desc->block.bits, fetch_type,
552
                               aligned, base_ptr, offset, TRUE);
553

554
      assert(format_desc->block.bits * type.length / 4 <=
555
             type.width * type.length);
556

557
      packed = LLVMBuildBitCast(gallivm->builder, packed, conv_vec_type, "");
558

559
      for (j = 0; j < format_desc->nr_channels; ++j) {
560
         unsigned mask = 0;
561
         unsigned sa = format_desc->channel[j].shift;
562

563
         mask = (1 << format_desc->channel[j].size) - 1;
564

565
         /* Extract bits from source */
566
         chans[j] = LLVMBuildLShr(builder, packed,
567
                                  lp_build_const_int_vec(gallivm, conv_type, sa),
568
                                  "");
569

570
         chans[j] = LLVMBuildAnd(builder, chans[j],
571
                                 lp_build_const_int_vec(gallivm, conv_type, mask),
572
                                 "");
573

574
         /* Scale bits */
575
         if (type.norm) {
576
            chans[j] = scale_bits_up(gallivm, format_desc->channel[j].size,
577
                                     type.width, chans[j], conv_type);
578
         }
579
      }
580
      /*
581
       * This is a hacked lp_build_format_swizzle_soa() since we need a
582
       * normalized 1 but only 8 bits in a 32bit vector...
583
       */
584
      for (j = 0; j < 4; ++j) {
585
         enum pipe_swizzle swizzle = format_desc->swizzle[j];
586
         if (swizzle == PIPE_SWIZZLE_1) {
587
            rgba[j] = lp_build_const_int_vec(gallivm, conv_type, (1 << type.width) - 1);
588
         } else {
589
            rgba[j] = lp_build_swizzle_soa_channel(&bld_conv, chans, swizzle);
590
         }
591
         if (j == 0) {
592
            res = rgba[j];
593
         } else {
594
            rgba[j] = LLVMBuildShl(builder, rgba[j],
595
                                   lp_build_const_int_vec(gallivm, conv_type,
596
                                                          j * type.width), "");
597
            res = LLVMBuildOr(builder, res, rgba[j], "");
598
         }
599
      }
600
      res = LLVMBuildBitCast(gallivm->builder, res, dst_vec_type, "");
601

602
      return res;
603
   }
604

605
   /*
606
    * Bit arithmetic
607
    */
608

609
   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
610
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
611
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
612
       format_desc->block.width == 1 &&
613
       format_desc->block.height == 1 &&
614
       /* XXX this shouldn't be needed */
615
       util_is_power_of_two_or_zero(format_desc->block.bits) &&
616
       format_desc->block.bits <= 32 &&
617
       format_desc->is_bitmask &&
618
       !format_desc->is_mixed &&
619
       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
620
        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) &&
621
       !format_desc->channel[0].pure_integer) {
622

623
      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
624
      LLVMValueRef res[LP_MAX_VECTOR_WIDTH / 128];
625
      struct lp_type conv_type;
626
      unsigned k, num_conv_src, num_conv_dst;
627

628
      /*
629
       * Note this path is generally terrible for fetching multiple pixels.
630
       * We should make sure we cannot hit this code path for anything but
631
       * single pixels.
632
       */
633

634
      /*
635
       * Unpack a pixel at a time into a <4 x float> RGBA vector
636
       */
637

638
      for (k = 0; k < num_pixels; ++k) {
639
         LLVMValueRef packed;
640

641
         packed = lp_build_gather_elem(gallivm, num_pixels,
642
                                       format_desc->block.bits, 32, aligned,
643
                                       base_ptr, offset, k, FALSE);
644

645
         tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
646
                                                  format_desc,
647
                                                  packed);
648
      }
649

650
      /*
651
       * Type conversion.
652
       *
653
       * TODO: We could avoid floating conversion for integer to
654
       * integer conversions.
655
       */
656

657
      if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
658
         debug_printf("%s: unpacking %s with floating point\n",
659
                      __FUNCTION__, format_desc->short_name);
660
      }
661

662
      conv_type = lp_float32_vec4_type();
663
      num_conv_src = num_pixels;
664
      num_conv_dst = 1;
665

666
      if (num_pixels % 8 == 0) {
667
         lp_build_concat_n(gallivm, lp_float32_vec4_type(),
668
                           tmps, num_pixels, tmps, num_pixels / 2);
669
         conv_type.length *= num_pixels / 4;
670
         num_conv_src = 4 * num_pixels / 8;
671
         if (type.width == 8 && type.floating == 0 && type.fixed == 0) {
672
            /*
673
             * FIXME: The fast float->unorm path (which is basically
674
             * skipping the MIN/MAX which are extremely pointless in any
675
             * case) requires that there's 2 destinations...
676
             * In any case, we really should make sure we don't hit this
677
             * code with multiple pixels for unorm8 dst types, it's
678
             * completely hopeless even if we do hit the right conversion.
679
             */
680
            type.length /= num_pixels / 4;
681
            num_conv_dst = num_pixels / 4;
682
         }
683
      }
684

685
      lp_build_conv(gallivm, conv_type, type,
686
                    tmps, num_conv_src, res, num_conv_dst);
687

688
      if (num_pixels % 8 == 0 &&
689
          (type.width == 8 && type.floating == 0 && type.fixed == 0)) {
690
         lp_build_concat_n(gallivm, type, res, num_conv_dst, res, 1);
691
      }
692

693
      return lp_build_format_swizzle_aos(format_desc, &bld, res[0]);
694
   }
695

696
   /* If all channels are of same type and we are not using half-floats */
697
   if (format_desc->is_array &&
698
       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) {
699
      assert(!format_desc->is_mixed);
700
      return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);
701
   }
702

703
   /*
704
    * YUV / subsampled formats
705
    */
706

707
   if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
708
      struct lp_type tmp_type;
709
      LLVMValueRef tmp;
710

711
      memset(&tmp_type, 0, sizeof tmp_type);
712
      tmp_type.width = 8;
713
      tmp_type.length = num_pixels * 4;
714
      tmp_type.norm = TRUE;
715

716
      tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
717
                                               format_desc,
718
                                               num_pixels,
719
                                               base_ptr,
720
                                               offset,
721
                                               i, j);
722

723
      lp_build_conv(gallivm,
724
                    tmp_type, type,
725
                    &tmp, 1, &tmp, 1);
726

727
      return tmp;
728
   }
729

730
   /*
731
    * s3tc rgb formats
732
    */
733

734
   if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
735
      struct lp_type tmp_type;
736
      LLVMValueRef tmp;
737

738
      memset(&tmp_type, 0, sizeof tmp_type);
739
      tmp_type.width = 8;
740
      tmp_type.length = num_pixels * 4;
741
      tmp_type.norm = TRUE;
742

743
      tmp = lp_build_fetch_s3tc_rgba_aos(gallivm,
744
                                         format_desc,
745
                                         num_pixels,
746
                                         base_ptr,
747
                                         offset,
748
                                         i, j,
749
                                         cache);
750

751
      lp_build_conv(gallivm,
752
                    tmp_type, type,
753
                    &tmp, 1, &tmp, 1);
754

755
       return tmp;
756
   }
757

758
   /*
759
    * rgtc rgb formats
760
    */
761

762
   if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
763
      struct lp_type tmp_type;
764
      LLVMValueRef tmp;
765

766
      memset(&tmp_type, 0, sizeof tmp_type);
767
      tmp_type.width = 8;
768
      tmp_type.length = num_pixels * 4;
769
      tmp_type.norm = TRUE;
770
      tmp_type.sign = (format_desc->format == PIPE_FORMAT_RGTC1_SNORM ||
771
                       format_desc->format == PIPE_FORMAT_RGTC2_SNORM ||
772
                       format_desc->format == PIPE_FORMAT_LATC1_SNORM ||
773
                       format_desc->format == PIPE_FORMAT_LATC2_SNORM);
774

775
      tmp = lp_build_fetch_rgtc_rgba_aos(gallivm,
776
                                         format_desc,
777
                                         num_pixels,
778
                                         base_ptr,
779
                                         offset,
780
                                         i, j,
781
                                         cache);
782

783
      lp_build_conv(gallivm,
784
                    tmp_type, type,
785
                    &tmp, 1, &tmp, 1);
786

787
       return tmp;
788
   }
789

790
   /*
791
    * Fallback to util_format_description::fetch_rgba_8unorm().
792
    */
793

794
   if (unpack->fetch_rgba_8unorm &&
795
       !type.floating && type.width == 8 && !type.sign && type.norm) {
796
      /*
797
       * Fallback to calling util_format_description::fetch_rgba_8unorm.
798
       *
799
       * This is definitely not the most efficient way of fetching pixels, as
800
       * we miss the opportunity to do vectorization, but this it is a
801
       * convenient for formats or scenarios for which there was no opportunity
802
       * or incentive to optimize.
803
       */
804

805
      LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
806
      LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
807
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
808
      LLVMValueRef function;
809
      LLVMValueRef tmp_ptr;
810
      LLVMValueRef tmp;
811
      LLVMValueRef res;
812
      unsigned k;
813

814
      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
815
         debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
816
                      __FUNCTION__, format_desc->short_name);
817
      }
818

819
      /*
820
       * Declare and bind format_desc->fetch_rgba_8unorm().
821
       */
822

823
      {
824
         /*
825
          * Function to call looks like:
826
          *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
827
          */
828
         LLVMTypeRef ret_type;
829
         LLVMTypeRef arg_types[4];
830
         LLVMTypeRef function_type;
831

832
         ret_type = LLVMVoidTypeInContext(gallivm->context);
833
         arg_types[0] = pi8t;
834
         arg_types[1] = pi8t;
835
         arg_types[2] = i32t;
836
         arg_types[3] = i32t;
837
         function_type = LLVMFunctionType(ret_type, arg_types,
838
                                          ARRAY_SIZE(arg_types), 0);
839

840
         if (gallivm->cache)
841
            gallivm->cache->dont_cache = true;
842
         /* make const pointer for the C fetch_rgba_8unorm function */
843
         function = lp_build_const_int_pointer(gallivm,
844
            func_to_pointer((func_pointer) unpack->fetch_rgba_8unorm));
845

846
         /* cast the callee pointer to the function's type */
847
         function = LLVMBuildBitCast(builder, function,
848
                                     LLVMPointerType(function_type, 0),
849
                                     "cast callee");
850
      }
851

852
      tmp_ptr = lp_build_alloca(gallivm, i32t, "");
853

854
      res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
855

856
      /*
857
       * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
858
       * in the SoA vectors.
859
       */
860

861
      for (k = 0; k < num_pixels; ++k) {
862
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
863
         LLVMValueRef args[4];
864

865
         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
866
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
867
                                            base_ptr, offset, k);
868

869
         if (num_pixels == 1) {
870
            args[2] = i;
871
            args[3] = j;
872
         }
873
         else {
874
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
875
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
876
         }
877

878
         LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
879

880
         tmp = LLVMBuildLoad(builder, tmp_ptr, "");
881

882
         if (num_pixels == 1) {
883
            res = tmp;
884
         }
885
         else {
886
            res = LLVMBuildInsertElement(builder, res, tmp, index, "");
887
         }
888
      }
889

890
      /* Bitcast from <n x i32> to <4n x i8> */
891
      res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
892

893
      return res;
894
   }
895

896
   /*
897
    * Fallback to fetch_rgba().
898
    */
899

900
   util_format_fetch_rgba_func_ptr fetch_rgba =
901
      util_format_fetch_rgba_func(format_desc->format);
902
   if (fetch_rgba) {
903
      /*
904
       * Fallback to calling util_format_description::fetch_rgba_float.
905
       *
906
       * This is definitely not the most efficient way of fetching pixels, as
907
       * we miss the opportunity to do vectorization, but this it is a
908
       * convenient for formats or scenarios for which there was no opportunity
909
       * or incentive to optimize.
910
       */
911

912
      LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
913
      LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
914
      LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
915
      LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
916
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
917
      LLVMValueRef function;
918
      LLVMValueRef tmp_ptr;
919
      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
920
      LLVMValueRef res;
921
      unsigned k;
922

923
      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
924
         debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
925
                      __FUNCTION__, format_desc->short_name);
926
      }
927

928
      /*
929
       * Declare and bind unpack->fetch_rgba_float().
930
       */
931

932
      {
933
         /*
934
          * Function to call looks like:
935
          *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
936
          */
937
         LLVMTypeRef ret_type;
938
         LLVMTypeRef arg_types[4];
939

940
         ret_type = LLVMVoidTypeInContext(gallivm->context);
941
         arg_types[0] = pf32t;
942
         arg_types[1] = pi8t;
943
         arg_types[2] = i32t;
944
         arg_types[3] = i32t;
945

946
         if (gallivm->cache)
947
            gallivm->cache->dont_cache = true;
948
         function = lp_build_const_func_pointer(gallivm,
949
                                                func_to_pointer((func_pointer) fetch_rgba),
950
                                                ret_type,
951
                                                arg_types, ARRAY_SIZE(arg_types),
952
                                                format_desc->short_name);
953
      }
954

955
      tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
956

957
      /*
958
       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
959
       * in the SoA vectors.
960
       */
961

962
      for (k = 0; k < num_pixels; ++k) {
963
         LLVMValueRef args[4];
964

965
         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
966
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
967
                                            base_ptr, offset, k);
968

969
         if (num_pixels == 1) {
970
            args[2] = i;
971
            args[3] = j;
972
         }
973
         else {
974
            LLVMValueRef index = lp_build_const_int32(gallivm, k);
975
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
976
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
977
         }
978

979
         LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
980

981
         tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
982
      }
983

984
      lp_build_conv(gallivm,
985
                    lp_float32_vec4_type(),
986
                    type,
987
                    tmps, num_pixels, &res, 1);
988

989
      return res;
990
   }
991

992
   assert(!util_format_is_pure_integer(format_desc->format));
993

994
   assert(0);
995
   return lp_build_undef(gallivm, type);
996
}
997

998
Product

Resources

Company