Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/tgsi/tgsi_exec.c
4565 views
1
/**************************************************************************
2
*
3
* Copyright 2007-2008 VMware, Inc.
4
* All Rights Reserved.
5
* Copyright 2009-2010 VMware, Inc. All rights Reserved.
6
*
7
* Permission is hereby granted, free of charge, to any person obtaining a
8
* copy of this software and associated documentation files (the
9
* "Software"), to deal in the Software without restriction, including
10
* without limitation the rights to use, copy, modify, merge, publish,
11
* distribute, sub license, and/or sell copies of the Software, and to
12
* permit persons to whom the Software is furnished to do so, subject to
13
* the following conditions:
14
*
15
* The above copyright notice and this permission notice (including the
16
* next paragraph) shall be included in all copies or substantial portions
17
* of the Software.
18
*
19
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
*
27
**************************************************************************/
28
29
/**
30
* TGSI interpreter/executor.
31
*
32
* Flow control information:
33
*
34
* Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35
* flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36
* care since a condition may be true for some quad components but false
37
* for other components.
38
*
39
* We basically execute all statements (even if they're in the part of
40
* an IF/ELSE clause that's "not taken") and use a special mask to
41
* control writing to destination registers. This is the ExecMask.
42
* See store_dest().
43
*
44
* The ExecMask is computed from three other masks (CondMask, LoopMask and
45
* ContMask) which are controlled by the flow control instructions (namely:
46
* (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47
*
48
*
49
* Authors:
50
* Michal Krol
51
* Brian Paul
52
*/
53
54
#include "pipe/p_compiler.h"
55
#include "pipe/p_state.h"
56
#include "pipe/p_shader_tokens.h"
57
#include "tgsi/tgsi_dump.h"
58
#include "tgsi/tgsi_parse.h"
59
#include "tgsi/tgsi_util.h"
60
#include "tgsi_exec.h"
61
#include "util/compiler.h"
62
#include "util/half_float.h"
63
#include "util/u_memory.h"
64
#include "util/u_math.h"
65
#include "util/rounding.h"
66
67
68
#define DEBUG_EXECUTION 0
69
70
71
#define TILE_TOP_LEFT 0
72
#define TILE_TOP_RIGHT 1
73
#define TILE_BOTTOM_LEFT 2
74
#define TILE_BOTTOM_RIGHT 3
75
76
union tgsi_double_channel {
77
double d[TGSI_QUAD_SIZE];
78
unsigned u[TGSI_QUAD_SIZE][2];
79
uint64_t u64[TGSI_QUAD_SIZE];
80
int64_t i64[TGSI_QUAD_SIZE];
81
} ALIGN16;
82
83
struct ALIGN16 tgsi_double_vector {
84
union tgsi_double_channel xy;
85
union tgsi_double_channel zw;
86
};
87
88
static void
89
micro_abs(union tgsi_exec_channel *dst,
90
const union tgsi_exec_channel *src)
91
{
92
dst->f[0] = fabsf(src->f[0]);
93
dst->f[1] = fabsf(src->f[1]);
94
dst->f[2] = fabsf(src->f[2]);
95
dst->f[3] = fabsf(src->f[3]);
96
}
97
98
static void
99
micro_arl(union tgsi_exec_channel *dst,
100
const union tgsi_exec_channel *src)
101
{
102
dst->i[0] = (int)floorf(src->f[0]);
103
dst->i[1] = (int)floorf(src->f[1]);
104
dst->i[2] = (int)floorf(src->f[2]);
105
dst->i[3] = (int)floorf(src->f[3]);
106
}
107
108
static void
109
micro_arr(union tgsi_exec_channel *dst,
110
const union tgsi_exec_channel *src)
111
{
112
dst->i[0] = (int)floorf(src->f[0] + 0.5f);
113
dst->i[1] = (int)floorf(src->f[1] + 0.5f);
114
dst->i[2] = (int)floorf(src->f[2] + 0.5f);
115
dst->i[3] = (int)floorf(src->f[3] + 0.5f);
116
}
117
118
static void
119
micro_ceil(union tgsi_exec_channel *dst,
120
const union tgsi_exec_channel *src)
121
{
122
dst->f[0] = ceilf(src->f[0]);
123
dst->f[1] = ceilf(src->f[1]);
124
dst->f[2] = ceilf(src->f[2]);
125
dst->f[3] = ceilf(src->f[3]);
126
}
127
128
static void
129
micro_cmp(union tgsi_exec_channel *dst,
130
const union tgsi_exec_channel *src0,
131
const union tgsi_exec_channel *src1,
132
const union tgsi_exec_channel *src2)
133
{
134
dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
135
dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
136
dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
137
dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
138
}
139
140
static void
141
micro_cos(union tgsi_exec_channel *dst,
142
const union tgsi_exec_channel *src)
143
{
144
dst->f[0] = cosf(src->f[0]);
145
dst->f[1] = cosf(src->f[1]);
146
dst->f[2] = cosf(src->f[2]);
147
dst->f[3] = cosf(src->f[3]);
148
}
149
150
static void
151
micro_d2f(union tgsi_exec_channel *dst,
152
const union tgsi_double_channel *src)
153
{
154
dst->f[0] = (float)src->d[0];
155
dst->f[1] = (float)src->d[1];
156
dst->f[2] = (float)src->d[2];
157
dst->f[3] = (float)src->d[3];
158
}
159
160
static void
161
micro_d2i(union tgsi_exec_channel *dst,
162
const union tgsi_double_channel *src)
163
{
164
dst->i[0] = (int)src->d[0];
165
dst->i[1] = (int)src->d[1];
166
dst->i[2] = (int)src->d[2];
167
dst->i[3] = (int)src->d[3];
168
}
169
170
static void
171
micro_d2u(union tgsi_exec_channel *dst,
172
const union tgsi_double_channel *src)
173
{
174
dst->u[0] = (unsigned)src->d[0];
175
dst->u[1] = (unsigned)src->d[1];
176
dst->u[2] = (unsigned)src->d[2];
177
dst->u[3] = (unsigned)src->d[3];
178
}
179
static void
180
micro_dabs(union tgsi_double_channel *dst,
181
const union tgsi_double_channel *src)
182
{
183
dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
184
dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
185
dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
186
dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
187
}
188
189
static void
190
micro_dadd(union tgsi_double_channel *dst,
191
const union tgsi_double_channel *src)
192
{
193
dst->d[0] = src[0].d[0] + src[1].d[0];
194
dst->d[1] = src[0].d[1] + src[1].d[1];
195
dst->d[2] = src[0].d[2] + src[1].d[2];
196
dst->d[3] = src[0].d[3] + src[1].d[3];
197
}
198
199
static void
200
micro_ddiv(union tgsi_double_channel *dst,
201
const union tgsi_double_channel *src)
202
{
203
dst->d[0] = src[0].d[0] / src[1].d[0];
204
dst->d[1] = src[0].d[1] / src[1].d[1];
205
dst->d[2] = src[0].d[2] / src[1].d[2];
206
dst->d[3] = src[0].d[3] / src[1].d[3];
207
}
208
209
static void
210
micro_ddx(union tgsi_exec_channel *dst,
211
const union tgsi_exec_channel *src)
212
{
213
dst->f[0] =
214
dst->f[1] =
215
dst->f[2] =
216
dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
217
}
218
219
static void
220
micro_ddx_fine(union tgsi_exec_channel *dst,
221
const union tgsi_exec_channel *src)
222
{
223
dst->f[0] =
224
dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
225
dst->f[2] =
226
dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
227
}
228
229
230
static void
231
micro_ddy(union tgsi_exec_channel *dst,
232
const union tgsi_exec_channel *src)
233
{
234
dst->f[0] =
235
dst->f[1] =
236
dst->f[2] =
237
dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
238
}
239
240
static void
241
micro_ddy_fine(union tgsi_exec_channel *dst,
242
const union tgsi_exec_channel *src)
243
{
244
dst->f[0] =
245
dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
246
dst->f[1] =
247
dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
248
}
249
250
static void
251
micro_dmul(union tgsi_double_channel *dst,
252
const union tgsi_double_channel *src)
253
{
254
dst->d[0] = src[0].d[0] * src[1].d[0];
255
dst->d[1] = src[0].d[1] * src[1].d[1];
256
dst->d[2] = src[0].d[2] * src[1].d[2];
257
dst->d[3] = src[0].d[3] * src[1].d[3];
258
}
259
260
static void
261
micro_dmax(union tgsi_double_channel *dst,
262
const union tgsi_double_channel *src)
263
{
264
dst->d[0] = fmax(src[0].d[0], src[1].d[0]);
265
dst->d[1] = fmax(src[0].d[1], src[1].d[1]);
266
dst->d[2] = fmax(src[0].d[2], src[1].d[2]);
267
dst->d[3] = fmax(src[0].d[3], src[1].d[3]);
268
}
269
270
static void
271
micro_dmin(union tgsi_double_channel *dst,
272
const union tgsi_double_channel *src)
273
{
274
dst->d[0] = fmin(src[0].d[0], src[1].d[0]);
275
dst->d[1] = fmin(src[0].d[1], src[1].d[1]);
276
dst->d[2] = fmin(src[0].d[2], src[1].d[2]);
277
dst->d[3] = fmin(src[0].d[3], src[1].d[3]);
278
}
279
280
static void
281
micro_dneg(union tgsi_double_channel *dst,
282
const union tgsi_double_channel *src)
283
{
284
dst->d[0] = -src->d[0];
285
dst->d[1] = -src->d[1];
286
dst->d[2] = -src->d[2];
287
dst->d[3] = -src->d[3];
288
}
289
290
static void
291
micro_dslt(union tgsi_double_channel *dst,
292
const union tgsi_double_channel *src)
293
{
294
dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
295
dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
296
dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
297
dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
298
}
299
300
static void
301
micro_dsne(union tgsi_double_channel *dst,
302
const union tgsi_double_channel *src)
303
{
304
dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
305
dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
306
dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
307
dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
308
}
309
310
static void
311
micro_dsge(union tgsi_double_channel *dst,
312
const union tgsi_double_channel *src)
313
{
314
dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
315
dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
316
dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
317
dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
318
}
319
320
static void
321
micro_dseq(union tgsi_double_channel *dst,
322
const union tgsi_double_channel *src)
323
{
324
dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
325
dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
326
dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
327
dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
328
}
329
330
static void
331
micro_drcp(union tgsi_double_channel *dst,
332
const union tgsi_double_channel *src)
333
{
334
dst->d[0] = 1.0 / src->d[0];
335
dst->d[1] = 1.0 / src->d[1];
336
dst->d[2] = 1.0 / src->d[2];
337
dst->d[3] = 1.0 / src->d[3];
338
}
339
340
static void
341
micro_dsqrt(union tgsi_double_channel *dst,
342
const union tgsi_double_channel *src)
343
{
344
dst->d[0] = sqrt(src->d[0]);
345
dst->d[1] = sqrt(src->d[1]);
346
dst->d[2] = sqrt(src->d[2]);
347
dst->d[3] = sqrt(src->d[3]);
348
}
349
350
static void
351
micro_drsq(union tgsi_double_channel *dst,
352
const union tgsi_double_channel *src)
353
{
354
dst->d[0] = 1.0 / sqrt(src->d[0]);
355
dst->d[1] = 1.0 / sqrt(src->d[1]);
356
dst->d[2] = 1.0 / sqrt(src->d[2]);
357
dst->d[3] = 1.0 / sqrt(src->d[3]);
358
}
359
360
static void
361
micro_dmad(union tgsi_double_channel *dst,
362
const union tgsi_double_channel *src)
363
{
364
dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
365
dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
366
dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
367
dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
368
}
369
370
static void
371
micro_dfrac(union tgsi_double_channel *dst,
372
const union tgsi_double_channel *src)
373
{
374
dst->d[0] = src->d[0] - floor(src->d[0]);
375
dst->d[1] = src->d[1] - floor(src->d[1]);
376
dst->d[2] = src->d[2] - floor(src->d[2]);
377
dst->d[3] = src->d[3] - floor(src->d[3]);
378
}
379
380
static void
381
micro_dflr(union tgsi_double_channel *dst,
382
const union tgsi_double_channel *src)
383
{
384
dst->d[0] = floor(src->d[0]);
385
dst->d[1] = floor(src->d[1]);
386
dst->d[2] = floor(src->d[2]);
387
dst->d[3] = floor(src->d[3]);
388
}
389
390
static void
391
micro_dldexp(union tgsi_double_channel *dst,
392
const union tgsi_double_channel *src0,
393
union tgsi_exec_channel *src1)
394
{
395
dst->d[0] = ldexp(src0->d[0], src1->i[0]);
396
dst->d[1] = ldexp(src0->d[1], src1->i[1]);
397
dst->d[2] = ldexp(src0->d[2], src1->i[2]);
398
dst->d[3] = ldexp(src0->d[3], src1->i[3]);
399
}
400
401
static void
402
micro_dfracexp(union tgsi_double_channel *dst,
403
union tgsi_exec_channel *dst_exp,
404
const union tgsi_double_channel *src)
405
{
406
dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
407
dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
408
dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
409
dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
410
}
411
412
static void
413
micro_exp2(union tgsi_exec_channel *dst,
414
const union tgsi_exec_channel *src)
415
{
416
#if DEBUG
417
/* Inf is okay for this instruction, so clamp it to silence assertions. */
418
uint i;
419
union tgsi_exec_channel clamped;
420
421
for (i = 0; i < 4; i++) {
422
if (src->f[i] > 127.99999f) {
423
clamped.f[i] = 127.99999f;
424
} else if (src->f[i] < -126.99999f) {
425
clamped.f[i] = -126.99999f;
426
} else {
427
clamped.f[i] = src->f[i];
428
}
429
}
430
src = &clamped;
431
#endif /* DEBUG */
432
433
dst->f[0] = powf(2.0f, src->f[0]);
434
dst->f[1] = powf(2.0f, src->f[1]);
435
dst->f[2] = powf(2.0f, src->f[2]);
436
dst->f[3] = powf(2.0f, src->f[3]);
437
}
438
439
static void
440
micro_f2d(union tgsi_double_channel *dst,
441
const union tgsi_exec_channel *src)
442
{
443
dst->d[0] = (double)src->f[0];
444
dst->d[1] = (double)src->f[1];
445
dst->d[2] = (double)src->f[2];
446
dst->d[3] = (double)src->f[3];
447
}
448
449
static void
450
micro_flr(union tgsi_exec_channel *dst,
451
const union tgsi_exec_channel *src)
452
{
453
dst->f[0] = floorf(src->f[0]);
454
dst->f[1] = floorf(src->f[1]);
455
dst->f[2] = floorf(src->f[2]);
456
dst->f[3] = floorf(src->f[3]);
457
}
458
459
static void
460
micro_frc(union tgsi_exec_channel *dst,
461
const union tgsi_exec_channel *src)
462
{
463
dst->f[0] = src->f[0] - floorf(src->f[0]);
464
dst->f[1] = src->f[1] - floorf(src->f[1]);
465
dst->f[2] = src->f[2] - floorf(src->f[2]);
466
dst->f[3] = src->f[3] - floorf(src->f[3]);
467
}
468
469
static void
470
micro_i2d(union tgsi_double_channel *dst,
471
const union tgsi_exec_channel *src)
472
{
473
dst->d[0] = (double)src->i[0];
474
dst->d[1] = (double)src->i[1];
475
dst->d[2] = (double)src->i[2];
476
dst->d[3] = (double)src->i[3];
477
}
478
479
static void
480
micro_iabs(union tgsi_exec_channel *dst,
481
const union tgsi_exec_channel *src)
482
{
483
dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
484
dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
485
dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
486
dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
487
}
488
489
static void
490
micro_ineg(union tgsi_exec_channel *dst,
491
const union tgsi_exec_channel *src)
492
{
493
dst->i[0] = -src->i[0];
494
dst->i[1] = -src->i[1];
495
dst->i[2] = -src->i[2];
496
dst->i[3] = -src->i[3];
497
}
498
499
static void
500
micro_lg2(union tgsi_exec_channel *dst,
501
const union tgsi_exec_channel *src)
502
{
503
dst->f[0] = logf(src->f[0]) * 1.442695f;
504
dst->f[1] = logf(src->f[1]) * 1.442695f;
505
dst->f[2] = logf(src->f[2]) * 1.442695f;
506
dst->f[3] = logf(src->f[3]) * 1.442695f;
507
}
508
509
static void
510
micro_lrp(union tgsi_exec_channel *dst,
511
const union tgsi_exec_channel *src0,
512
const union tgsi_exec_channel *src1,
513
const union tgsi_exec_channel *src2)
514
{
515
dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
516
dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
517
dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
518
dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
519
}
520
521
static void
522
micro_mad(union tgsi_exec_channel *dst,
523
const union tgsi_exec_channel *src0,
524
const union tgsi_exec_channel *src1,
525
const union tgsi_exec_channel *src2)
526
{
527
dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
528
dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
529
dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
530
dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
531
}
532
533
static void
534
micro_mov(union tgsi_exec_channel *dst,
535
const union tgsi_exec_channel *src)
536
{
537
dst->u[0] = src->u[0];
538
dst->u[1] = src->u[1];
539
dst->u[2] = src->u[2];
540
dst->u[3] = src->u[3];
541
}
542
543
static void
544
micro_rcp(union tgsi_exec_channel *dst,
545
const union tgsi_exec_channel *src)
546
{
547
#if 0 /* for debugging */
548
assert(src->f[0] != 0.0f);
549
assert(src->f[1] != 0.0f);
550
assert(src->f[2] != 0.0f);
551
assert(src->f[3] != 0.0f);
552
#endif
553
dst->f[0] = 1.0f / src->f[0];
554
dst->f[1] = 1.0f / src->f[1];
555
dst->f[2] = 1.0f / src->f[2];
556
dst->f[3] = 1.0f / src->f[3];
557
}
558
559
static void
560
micro_rnd(union tgsi_exec_channel *dst,
561
const union tgsi_exec_channel *src)
562
{
563
dst->f[0] = _mesa_roundevenf(src->f[0]);
564
dst->f[1] = _mesa_roundevenf(src->f[1]);
565
dst->f[2] = _mesa_roundevenf(src->f[2]);
566
dst->f[3] = _mesa_roundevenf(src->f[3]);
567
}
568
569
static void
570
micro_rsq(union tgsi_exec_channel *dst,
571
const union tgsi_exec_channel *src)
572
{
573
#if 0 /* for debugging */
574
assert(src->f[0] != 0.0f);
575
assert(src->f[1] != 0.0f);
576
assert(src->f[2] != 0.0f);
577
assert(src->f[3] != 0.0f);
578
#endif
579
dst->f[0] = 1.0f / sqrtf(src->f[0]);
580
dst->f[1] = 1.0f / sqrtf(src->f[1]);
581
dst->f[2] = 1.0f / sqrtf(src->f[2]);
582
dst->f[3] = 1.0f / sqrtf(src->f[3]);
583
}
584
585
static void
586
micro_sqrt(union tgsi_exec_channel *dst,
587
const union tgsi_exec_channel *src)
588
{
589
dst->f[0] = sqrtf(src->f[0]);
590
dst->f[1] = sqrtf(src->f[1]);
591
dst->f[2] = sqrtf(src->f[2]);
592
dst->f[3] = sqrtf(src->f[3]);
593
}
594
595
static void
596
micro_seq(union tgsi_exec_channel *dst,
597
const union tgsi_exec_channel *src0,
598
const union tgsi_exec_channel *src1)
599
{
600
dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
601
dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
602
dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
603
dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
604
}
605
606
static void
607
micro_sge(union tgsi_exec_channel *dst,
608
const union tgsi_exec_channel *src0,
609
const union tgsi_exec_channel *src1)
610
{
611
dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
612
dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
613
dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
614
dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
615
}
616
617
static void
618
micro_sgn(union tgsi_exec_channel *dst,
619
const union tgsi_exec_channel *src)
620
{
621
dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
622
dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
623
dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
624
dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
625
}
626
627
static void
628
micro_isgn(union tgsi_exec_channel *dst,
629
const union tgsi_exec_channel *src)
630
{
631
dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
632
dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
633
dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
634
dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
635
}
636
637
static void
638
micro_sgt(union tgsi_exec_channel *dst,
639
const union tgsi_exec_channel *src0,
640
const union tgsi_exec_channel *src1)
641
{
642
dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
643
dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
644
dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
645
dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
646
}
647
648
static void
649
micro_sin(union tgsi_exec_channel *dst,
650
const union tgsi_exec_channel *src)
651
{
652
dst->f[0] = sinf(src->f[0]);
653
dst->f[1] = sinf(src->f[1]);
654
dst->f[2] = sinf(src->f[2]);
655
dst->f[3] = sinf(src->f[3]);
656
}
657
658
static void
659
micro_sle(union tgsi_exec_channel *dst,
660
const union tgsi_exec_channel *src0,
661
const union tgsi_exec_channel *src1)
662
{
663
dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
664
dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
665
dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
666
dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
667
}
668
669
static void
670
micro_slt(union tgsi_exec_channel *dst,
671
const union tgsi_exec_channel *src0,
672
const union tgsi_exec_channel *src1)
673
{
674
dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
675
dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
676
dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
677
dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
678
}
679
680
static void
681
micro_sne(union tgsi_exec_channel *dst,
682
const union tgsi_exec_channel *src0,
683
const union tgsi_exec_channel *src1)
684
{
685
dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
686
dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
687
dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
688
dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
689
}
690
691
static void
692
micro_trunc(union tgsi_exec_channel *dst,
693
const union tgsi_exec_channel *src)
694
{
695
dst->f[0] = truncf(src->f[0]);
696
dst->f[1] = truncf(src->f[1]);
697
dst->f[2] = truncf(src->f[2]);
698
dst->f[3] = truncf(src->f[3]);
699
}
700
701
static void
702
micro_u2d(union tgsi_double_channel *dst,
703
const union tgsi_exec_channel *src)
704
{
705
dst->d[0] = (double)src->u[0];
706
dst->d[1] = (double)src->u[1];
707
dst->d[2] = (double)src->u[2];
708
dst->d[3] = (double)src->u[3];
709
}
710
711
static void
712
micro_i64abs(union tgsi_double_channel *dst,
713
const union tgsi_double_channel *src)
714
{
715
dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
716
dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
717
dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
718
dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
719
}
720
721
static void
722
micro_i64sgn(union tgsi_double_channel *dst,
723
const union tgsi_double_channel *src)
724
{
725
dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
726
dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
727
dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
728
dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
729
}
730
731
static void
732
micro_i64neg(union tgsi_double_channel *dst,
733
const union tgsi_double_channel *src)
734
{
735
dst->i64[0] = -src->i64[0];
736
dst->i64[1] = -src->i64[1];
737
dst->i64[2] = -src->i64[2];
738
dst->i64[3] = -src->i64[3];
739
}
740
741
static void
742
micro_u64seq(union tgsi_double_channel *dst,
743
const union tgsi_double_channel *src)
744
{
745
dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
746
dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
747
dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
748
dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
749
}
750
751
static void
752
micro_u64sne(union tgsi_double_channel *dst,
753
const union tgsi_double_channel *src)
754
{
755
dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
756
dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
757
dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
758
dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
759
}
760
761
static void
762
micro_i64slt(union tgsi_double_channel *dst,
763
const union tgsi_double_channel *src)
764
{
765
dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
766
dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
767
dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
768
dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
769
}
770
771
static void
772
micro_u64slt(union tgsi_double_channel *dst,
773
const union tgsi_double_channel *src)
774
{
775
dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
776
dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
777
dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
778
dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
779
}
780
781
static void
782
micro_i64sge(union tgsi_double_channel *dst,
783
const union tgsi_double_channel *src)
784
{
785
dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
786
dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
787
dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
788
dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
789
}
790
791
static void
792
micro_u64sge(union tgsi_double_channel *dst,
793
const union tgsi_double_channel *src)
794
{
795
dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
796
dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
797
dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
798
dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
799
}
800
801
static void
802
micro_u64max(union tgsi_double_channel *dst,
803
const union tgsi_double_channel *src)
804
{
805
dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
806
dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
807
dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
808
dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
809
}
810
811
static void
812
micro_i64max(union tgsi_double_channel *dst,
813
const union tgsi_double_channel *src)
814
{
815
dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
816
dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
817
dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
818
dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
819
}
820
821
static void
822
micro_u64min(union tgsi_double_channel *dst,
823
const union tgsi_double_channel *src)
824
{
825
dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
826
dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
827
dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
828
dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
829
}
830
831
static void
832
micro_i64min(union tgsi_double_channel *dst,
833
const union tgsi_double_channel *src)
834
{
835
dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
836
dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
837
dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
838
dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
839
}
840
841
static void
842
micro_u64add(union tgsi_double_channel *dst,
843
const union tgsi_double_channel *src)
844
{
845
dst->u64[0] = src[0].u64[0] + src[1].u64[0];
846
dst->u64[1] = src[0].u64[1] + src[1].u64[1];
847
dst->u64[2] = src[0].u64[2] + src[1].u64[2];
848
dst->u64[3] = src[0].u64[3] + src[1].u64[3];
849
}
850
851
static void
852
micro_u64mul(union tgsi_double_channel *dst,
853
const union tgsi_double_channel *src)
854
{
855
dst->u64[0] = src[0].u64[0] * src[1].u64[0];
856
dst->u64[1] = src[0].u64[1] * src[1].u64[1];
857
dst->u64[2] = src[0].u64[2] * src[1].u64[2];
858
dst->u64[3] = src[0].u64[3] * src[1].u64[3];
859
}
860
861
static void
862
micro_u64div(union tgsi_double_channel *dst,
863
const union tgsi_double_channel *src)
864
{
865
dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
866
dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
867
dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
868
dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
869
}
870
871
static void
872
micro_i64div(union tgsi_double_channel *dst,
873
const union tgsi_double_channel *src)
874
{
875
dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
876
dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
877
dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
878
dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
879
}
880
881
static void
882
micro_u64mod(union tgsi_double_channel *dst,
883
const union tgsi_double_channel *src)
884
{
885
dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
886
dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
887
dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
888
dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
889
}
890
891
static void
892
micro_i64mod(union tgsi_double_channel *dst,
893
const union tgsi_double_channel *src)
894
{
895
dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
896
dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
897
dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
898
dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
899
}
900
901
static void
902
micro_u64shl(union tgsi_double_channel *dst,
903
const union tgsi_double_channel *src0,
904
union tgsi_exec_channel *src1)
905
{
906
unsigned masked_count;
907
masked_count = src1->u[0] & 0x3f;
908
dst->u64[0] = src0->u64[0] << masked_count;
909
masked_count = src1->u[1] & 0x3f;
910
dst->u64[1] = src0->u64[1] << masked_count;
911
masked_count = src1->u[2] & 0x3f;
912
dst->u64[2] = src0->u64[2] << masked_count;
913
masked_count = src1->u[3] & 0x3f;
914
dst->u64[3] = src0->u64[3] << masked_count;
915
}
916
917
static void
918
micro_i64shr(union tgsi_double_channel *dst,
919
const union tgsi_double_channel *src0,
920
union tgsi_exec_channel *src1)
921
{
922
unsigned masked_count;
923
masked_count = src1->u[0] & 0x3f;
924
dst->i64[0] = src0->i64[0] >> masked_count;
925
masked_count = src1->u[1] & 0x3f;
926
dst->i64[1] = src0->i64[1] >> masked_count;
927
masked_count = src1->u[2] & 0x3f;
928
dst->i64[2] = src0->i64[2] >> masked_count;
929
masked_count = src1->u[3] & 0x3f;
930
dst->i64[3] = src0->i64[3] >> masked_count;
931
}
932
933
static void
934
micro_u64shr(union tgsi_double_channel *dst,
935
const union tgsi_double_channel *src0,
936
union tgsi_exec_channel *src1)
937
{
938
unsigned masked_count;
939
masked_count = src1->u[0] & 0x3f;
940
dst->u64[0] = src0->u64[0] >> masked_count;
941
masked_count = src1->u[1] & 0x3f;
942
dst->u64[1] = src0->u64[1] >> masked_count;
943
masked_count = src1->u[2] & 0x3f;
944
dst->u64[2] = src0->u64[2] >> masked_count;
945
masked_count = src1->u[3] & 0x3f;
946
dst->u64[3] = src0->u64[3] >> masked_count;
947
}
948
949
enum tgsi_exec_datatype {
950
TGSI_EXEC_DATA_FLOAT,
951
TGSI_EXEC_DATA_INT,
952
TGSI_EXEC_DATA_UINT,
953
TGSI_EXEC_DATA_DOUBLE,
954
TGSI_EXEC_DATA_INT64,
955
TGSI_EXEC_DATA_UINT64,
956
};
957
958
/** The execution mask depends on the conditional mask and the loop mask */
959
#define UPDATE_EXEC_MASK(MACH) \
960
MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
961
962
963
static const union tgsi_exec_channel ZeroVec =
964
{ { 0.0, 0.0, 0.0, 0.0 } };
965
966
static const union tgsi_exec_channel OneVec = {
967
{1.0f, 1.0f, 1.0f, 1.0f}
968
};
969
970
static const union tgsi_exec_channel P128Vec = {
971
{128.0f, 128.0f, 128.0f, 128.0f}
972
};
973
974
static const union tgsi_exec_channel M128Vec = {
975
{-128.0f, -128.0f, -128.0f, -128.0f}
976
};
977
978
979
/**
980
* Assert that none of the float values in 'chan' are infinite or NaN.
981
* NaN and Inf may occur normally during program execution and should
982
* not lead to crashes, etc. But when debugging, it's helpful to catch
983
* them.
984
*/
985
static inline void
986
check_inf_or_nan(const union tgsi_exec_channel *chan)
987
{
988
assert(!util_is_inf_or_nan((chan)->f[0]));
989
assert(!util_is_inf_or_nan((chan)->f[1]));
990
assert(!util_is_inf_or_nan((chan)->f[2]));
991
assert(!util_is_inf_or_nan((chan)->f[3]));
992
}
993
994
995
#ifdef DEBUG
996
static void
997
print_chan(const char *msg, const union tgsi_exec_channel *chan)
998
{
999
debug_printf("%s = {%f, %f, %f, %f}\n",
1000
msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
1001
}
1002
#endif
1003
1004
1005
#ifdef DEBUG
1006
static void
1007
print_temp(const struct tgsi_exec_machine *mach, uint index)
1008
{
1009
const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1010
int i;
1011
debug_printf("Temp[%u] =\n", index);
1012
for (i = 0; i < 4; i++) {
1013
debug_printf(" %c: { %f, %f, %f, %f }\n",
1014
"XYZW"[i],
1015
tmp->xyzw[i].f[0],
1016
tmp->xyzw[i].f[1],
1017
tmp->xyzw[i].f[2],
1018
tmp->xyzw[i].f[3]);
1019
}
1020
}
1021
#endif
1022
1023
1024
void
1025
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1026
unsigned num_bufs,
1027
const void **bufs,
1028
const unsigned *buf_sizes)
1029
{
1030
unsigned i;
1031
1032
for (i = 0; i < num_bufs; i++) {
1033
mach->Consts[i] = bufs[i];
1034
mach->ConstsSize[i] = buf_sizes[i];
1035
}
1036
}
1037
1038
/**
1039
* Initialize machine state by expanding tokens to full instructions,
1040
* allocating temporary storage, setting up constants, etc.
1041
* After this, we can call tgsi_exec_machine_run() many times.
1042
*/
1043
void
1044
tgsi_exec_machine_bind_shader(
1045
struct tgsi_exec_machine *mach,
1046
const struct tgsi_token *tokens,
1047
struct tgsi_sampler *sampler,
1048
struct tgsi_image *image,
1049
struct tgsi_buffer *buffer)
1050
{
1051
uint k;
1052
struct tgsi_parse_context parse;
1053
struct tgsi_full_instruction *instructions;
1054
struct tgsi_full_declaration *declarations;
1055
uint maxInstructions = 10, numInstructions = 0;
1056
uint maxDeclarations = 10, numDeclarations = 0;
1057
1058
#if 0
1059
tgsi_dump(tokens, 0);
1060
#endif
1061
1062
mach->Tokens = tokens;
1063
mach->Sampler = sampler;
1064
mach->Image = image;
1065
mach->Buffer = buffer;
1066
1067
if (!tokens) {
1068
/* unbind and free all */
1069
FREE(mach->Declarations);
1070
mach->Declarations = NULL;
1071
mach->NumDeclarations = 0;
1072
1073
FREE(mach->Instructions);
1074
mach->Instructions = NULL;
1075
mach->NumInstructions = 0;
1076
1077
return;
1078
}
1079
1080
k = tgsi_parse_init (&parse, mach->Tokens);
1081
if (k != TGSI_PARSE_OK) {
1082
debug_printf( "Problem parsing!\n" );
1083
return;
1084
}
1085
1086
mach->ImmLimit = 0;
1087
mach->NumOutputs = 0;
1088
1089
for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1090
mach->SysSemanticToIndex[k] = -1;
1091
1092
if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1093
!mach->UsedGeometryShader) {
1094
struct tgsi_exec_vector *inputs;
1095
struct tgsi_exec_vector *outputs;
1096
1097
inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1098
TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1099
16);
1100
1101
if (!inputs)
1102
return;
1103
1104
outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1105
TGSI_MAX_TOTAL_VERTICES, 16);
1106
1107
if (!outputs) {
1108
align_free(inputs);
1109
return;
1110
}
1111
1112
align_free(mach->Inputs);
1113
align_free(mach->Outputs);
1114
1115
mach->Inputs = inputs;
1116
mach->Outputs = outputs;
1117
mach->UsedGeometryShader = TRUE;
1118
}
1119
1120
declarations = (struct tgsi_full_declaration *)
1121
MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1122
1123
if (!declarations) {
1124
return;
1125
}
1126
1127
instructions = (struct tgsi_full_instruction *)
1128
MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1129
1130
if (!instructions) {
1131
FREE( declarations );
1132
return;
1133
}
1134
1135
while( !tgsi_parse_end_of_tokens( &parse ) ) {
1136
uint i;
1137
1138
tgsi_parse_token( &parse );
1139
switch( parse.FullToken.Token.Type ) {
1140
case TGSI_TOKEN_TYPE_DECLARATION:
1141
/* save expanded declaration */
1142
if (numDeclarations == maxDeclarations) {
1143
declarations = REALLOC(declarations,
1144
maxDeclarations
1145
* sizeof(struct tgsi_full_declaration),
1146
(maxDeclarations + 10)
1147
* sizeof(struct tgsi_full_declaration));
1148
maxDeclarations += 10;
1149
}
1150
if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT)
1151
mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1);
1152
else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1153
const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1154
mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1155
}
1156
1157
memcpy(declarations + numDeclarations,
1158
&parse.FullToken.FullDeclaration,
1159
sizeof(declarations[0]));
1160
numDeclarations++;
1161
break;
1162
1163
case TGSI_TOKEN_TYPE_IMMEDIATE:
1164
{
1165
uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1166
assert( size <= 4 );
1167
if (mach->ImmLimit >= mach->ImmsReserved) {
1168
unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;
1169
float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));
1170
if (imms) {
1171
mach->ImmsReserved = newReserved;
1172
mach->Imms = imms;
1173
} else {
1174
debug_printf("Unable to (re)allocate space for immidiate constants\n");
1175
break;
1176
}
1177
}
1178
1179
for( i = 0; i < size; i++ ) {
1180
mach->Imms[mach->ImmLimit][i] =
1181
parse.FullToken.FullImmediate.u[i].Float;
1182
}
1183
mach->ImmLimit += 1;
1184
}
1185
break;
1186
1187
case TGSI_TOKEN_TYPE_INSTRUCTION:
1188
1189
/* save expanded instruction */
1190
if (numInstructions == maxInstructions) {
1191
instructions = REALLOC(instructions,
1192
maxInstructions
1193
* sizeof(struct tgsi_full_instruction),
1194
(maxInstructions + 10)
1195
* sizeof(struct tgsi_full_instruction));
1196
maxInstructions += 10;
1197
}
1198
1199
memcpy(instructions + numInstructions,
1200
&parse.FullToken.FullInstruction,
1201
sizeof(instructions[0]));
1202
1203
numInstructions++;
1204
break;
1205
1206
case TGSI_TOKEN_TYPE_PROPERTY:
1207
if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1208
if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1209
mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1210
}
1211
}
1212
break;
1213
1214
default:
1215
assert( 0 );
1216
}
1217
}
1218
tgsi_parse_free (&parse);
1219
1220
FREE(mach->Declarations);
1221
mach->Declarations = declarations;
1222
mach->NumDeclarations = numDeclarations;
1223
1224
FREE(mach->Instructions);
1225
mach->Instructions = instructions;
1226
mach->NumInstructions = numInstructions;
1227
}
1228
1229
1230
struct tgsi_exec_machine *
1231
tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1232
{
1233
struct tgsi_exec_machine *mach;
1234
1235
mach = align_malloc( sizeof *mach, 16 );
1236
if (!mach)
1237
goto fail;
1238
1239
memset(mach, 0, sizeof(*mach));
1240
1241
mach->ShaderType = shader_type;
1242
1243
if (shader_type != PIPE_SHADER_COMPUTE) {
1244
mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1245
mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1246
if (!mach->Inputs || !mach->Outputs)
1247
goto fail;
1248
}
1249
1250
if (shader_type == PIPE_SHADER_FRAGMENT) {
1251
mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);
1252
if (!mach->InputSampleOffsetApply)
1253
goto fail;
1254
}
1255
1256
#ifdef DEBUG
1257
/* silence warnings */
1258
(void) print_chan;
1259
(void) print_temp;
1260
#endif
1261
1262
return mach;
1263
1264
fail:
1265
if (mach) {
1266
align_free(mach->InputSampleOffsetApply);
1267
align_free(mach->Inputs);
1268
align_free(mach->Outputs);
1269
align_free(mach);
1270
}
1271
return NULL;
1272
}
1273
1274
1275
void
1276
tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1277
{
1278
if (mach) {
1279
FREE(mach->Instructions);
1280
FREE(mach->Declarations);
1281
FREE(mach->Imms);
1282
1283
align_free(mach->InputSampleOffsetApply);
1284
align_free(mach->Inputs);
1285
align_free(mach->Outputs);
1286
1287
align_free(mach);
1288
}
1289
}
1290
1291
static void
1292
micro_add(union tgsi_exec_channel *dst,
1293
const union tgsi_exec_channel *src0,
1294
const union tgsi_exec_channel *src1)
1295
{
1296
dst->f[0] = src0->f[0] + src1->f[0];
1297
dst->f[1] = src0->f[1] + src1->f[1];
1298
dst->f[2] = src0->f[2] + src1->f[2];
1299
dst->f[3] = src0->f[3] + src1->f[3];
1300
}
1301
1302
static void
1303
micro_div(
1304
union tgsi_exec_channel *dst,
1305
const union tgsi_exec_channel *src0,
1306
const union tgsi_exec_channel *src1 )
1307
{
1308
if (src1->f[0] != 0) {
1309
dst->f[0] = src0->f[0] / src1->f[0];
1310
}
1311
if (src1->f[1] != 0) {
1312
dst->f[1] = src0->f[1] / src1->f[1];
1313
}
1314
if (src1->f[2] != 0) {
1315
dst->f[2] = src0->f[2] / src1->f[2];
1316
}
1317
if (src1->f[3] != 0) {
1318
dst->f[3] = src0->f[3] / src1->f[3];
1319
}
1320
}
1321
1322
static void
1323
micro_lt(
1324
union tgsi_exec_channel *dst,
1325
const union tgsi_exec_channel *src0,
1326
const union tgsi_exec_channel *src1,
1327
const union tgsi_exec_channel *src2,
1328
const union tgsi_exec_channel *src3 )
1329
{
1330
dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1331
dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1332
dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1333
dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1334
}
1335
1336
static void
1337
micro_max(union tgsi_exec_channel *dst,
1338
const union tgsi_exec_channel *src0,
1339
const union tgsi_exec_channel *src1)
1340
{
1341
dst->f[0] = fmaxf(src0->f[0], src1->f[0]);
1342
dst->f[1] = fmaxf(src0->f[1], src1->f[1]);
1343
dst->f[2] = fmaxf(src0->f[2], src1->f[2]);
1344
dst->f[3] = fmaxf(src0->f[3], src1->f[3]);
1345
}
1346
1347
static void
1348
micro_min(union tgsi_exec_channel *dst,
1349
const union tgsi_exec_channel *src0,
1350
const union tgsi_exec_channel *src1)
1351
{
1352
dst->f[0] = fminf(src0->f[0], src1->f[0]);
1353
dst->f[1] = fminf(src0->f[1], src1->f[1]);
1354
dst->f[2] = fminf(src0->f[2], src1->f[2]);
1355
dst->f[3] = fminf(src0->f[3], src1->f[3]);
1356
}
1357
1358
static void
1359
micro_mul(union tgsi_exec_channel *dst,
1360
const union tgsi_exec_channel *src0,
1361
const union tgsi_exec_channel *src1)
1362
{
1363
dst->f[0] = src0->f[0] * src1->f[0];
1364
dst->f[1] = src0->f[1] * src1->f[1];
1365
dst->f[2] = src0->f[2] * src1->f[2];
1366
dst->f[3] = src0->f[3] * src1->f[3];
1367
}
1368
1369
static void
1370
micro_neg(
1371
union tgsi_exec_channel *dst,
1372
const union tgsi_exec_channel *src )
1373
{
1374
dst->f[0] = -src->f[0];
1375
dst->f[1] = -src->f[1];
1376
dst->f[2] = -src->f[2];
1377
dst->f[3] = -src->f[3];
1378
}
1379
1380
static void
1381
micro_pow(
1382
union tgsi_exec_channel *dst,
1383
const union tgsi_exec_channel *src0,
1384
const union tgsi_exec_channel *src1 )
1385
{
1386
dst->f[0] = powf( src0->f[0], src1->f[0] );
1387
dst->f[1] = powf( src0->f[1], src1->f[1] );
1388
dst->f[2] = powf( src0->f[2], src1->f[2] );
1389
dst->f[3] = powf( src0->f[3], src1->f[3] );
1390
}
1391
1392
static void
1393
micro_ldexp(union tgsi_exec_channel *dst,
1394
const union tgsi_exec_channel *src0,
1395
const union tgsi_exec_channel *src1)
1396
{
1397
dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
1398
dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
1399
dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
1400
dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
1401
}
1402
1403
static void
1404
micro_sub(union tgsi_exec_channel *dst,
1405
const union tgsi_exec_channel *src0,
1406
const union tgsi_exec_channel *src1)
1407
{
1408
dst->f[0] = src0->f[0] - src1->f[0];
1409
dst->f[1] = src0->f[1] - src1->f[1];
1410
dst->f[2] = src0->f[2] - src1->f[2];
1411
dst->f[3] = src0->f[3] - src1->f[3];
1412
}
1413
1414
static void
1415
fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1416
const uint file,
1417
const uint swizzle,
1418
const union tgsi_exec_channel *index,
1419
const union tgsi_exec_channel *index2D,
1420
union tgsi_exec_channel *chan)
1421
{
1422
uint i;
1423
1424
assert(swizzle < 4);
1425
1426
switch (file) {
1427
case TGSI_FILE_CONSTANT:
1428
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1429
/* NOTE: copying the const value as a uint instead of float */
1430
const uint constbuf = index2D->i[i];
1431
const unsigned pos = index->i[i] * 4 + swizzle;
1432
/* const buffer bounds check */
1433
if (pos >= mach->ConstsSize[constbuf] / 4) {
1434
if (0) {
1435
/* Debug: print warning */
1436
static int count = 0;
1437
if (count++ < 100)
1438
debug_printf("TGSI Exec: const buffer index %d"
1439
" out of bounds\n", pos);
1440
}
1441
chan->u[i] = 0;
1442
} else {
1443
const uint *buf = (const uint *)mach->Consts[constbuf];
1444
chan->u[i] = buf[pos];
1445
}
1446
}
1447
break;
1448
1449
case TGSI_FILE_INPUT:
1450
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1451
/*
1452
if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1453
debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1454
index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1455
index2D->i[i], index->i[i]);
1456
}*/
1457
int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1458
assert(pos >= 0);
1459
assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1460
chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1461
}
1462
break;
1463
1464
case TGSI_FILE_SYSTEM_VALUE:
1465
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1466
chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1467
}
1468
break;
1469
1470
case TGSI_FILE_TEMPORARY:
1471
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1472
assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1473
assert(index2D->i[i] == 0);
1474
1475
chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1476
}
1477
break;
1478
1479
case TGSI_FILE_IMMEDIATE:
1480
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1481
assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1482
assert(index2D->i[i] == 0);
1483
1484
chan->f[i] = mach->Imms[index->i[i]][swizzle];
1485
}
1486
break;
1487
1488
case TGSI_FILE_ADDRESS:
1489
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1490
assert(index->i[i] >= 0 && index->i[i] < ARRAY_SIZE(mach->Addrs));
1491
assert(index2D->i[i] == 0);
1492
1493
chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1494
}
1495
break;
1496
1497
case TGSI_FILE_OUTPUT:
1498
/* vertex/fragment output vars can be read too */
1499
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1500
assert(index->i[i] >= 0);
1501
assert(index2D->i[i] == 0);
1502
1503
chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1504
}
1505
break;
1506
1507
default:
1508
assert(0);
1509
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1510
chan->u[i] = 0;
1511
}
1512
}
1513
}
1514
1515
static void
1516
get_index_registers(const struct tgsi_exec_machine *mach,
1517
const struct tgsi_full_src_register *reg,
1518
union tgsi_exec_channel *index,
1519
union tgsi_exec_channel *index2D)
1520
{
1521
uint swizzle;
1522
1523
/* We start with a direct index into a register file.
1524
*
1525
* file[1],
1526
* where:
1527
* file = Register.File
1528
* [1] = Register.Index
1529
*/
1530
index->i[0] =
1531
index->i[1] =
1532
index->i[2] =
1533
index->i[3] = reg->Register.Index;
1534
1535
/* There is an extra source register that indirectly subscripts
1536
* a register file. The direct index now becomes an offset
1537
* that is being added to the indirect register.
1538
*
1539
* file[ind[2].x+1],
1540
* where:
1541
* ind = Indirect.File
1542
* [2] = Indirect.Index
1543
* .x = Indirect.SwizzleX
1544
*/
1545
if (reg->Register.Indirect) {
1546
union tgsi_exec_channel index2;
1547
union tgsi_exec_channel indir_index;
1548
const uint execmask = mach->ExecMask;
1549
uint i;
1550
1551
/* which address register (always zero now) */
1552
index2.i[0] =
1553
index2.i[1] =
1554
index2.i[2] =
1555
index2.i[3] = reg->Indirect.Index;
1556
/* get current value of address register[swizzle] */
1557
swizzle = reg->Indirect.Swizzle;
1558
fetch_src_file_channel(mach,
1559
reg->Indirect.File,
1560
swizzle,
1561
&index2,
1562
&ZeroVec,
1563
&indir_index);
1564
1565
/* add value of address register to the offset */
1566
index->i[0] += indir_index.i[0];
1567
index->i[1] += indir_index.i[1];
1568
index->i[2] += indir_index.i[2];
1569
index->i[3] += indir_index.i[3];
1570
1571
/* for disabled execution channels, zero-out the index to
1572
* avoid using a potential garbage value.
1573
*/
1574
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1575
if ((execmask & (1 << i)) == 0)
1576
index->i[i] = 0;
1577
}
1578
}
1579
1580
/* There is an extra source register that is a second
1581
* subscript to a register file. Effectively it means that
1582
* the register file is actually a 2D array of registers.
1583
*
1584
* file[3][1],
1585
* where:
1586
* [3] = Dimension.Index
1587
*/
1588
if (reg->Register.Dimension) {
1589
index2D->i[0] =
1590
index2D->i[1] =
1591
index2D->i[2] =
1592
index2D->i[3] = reg->Dimension.Index;
1593
1594
/* Again, the second subscript index can be addressed indirectly
1595
* identically to the first one.
1596
* Nothing stops us from indirectly addressing the indirect register,
1597
* but there is no need for that, so we won't exercise it.
1598
*
1599
* file[ind[4].y+3][1],
1600
* where:
1601
* ind = DimIndirect.File
1602
* [4] = DimIndirect.Index
1603
* .y = DimIndirect.SwizzleX
1604
*/
1605
if (reg->Dimension.Indirect) {
1606
union tgsi_exec_channel index2;
1607
union tgsi_exec_channel indir_index;
1608
const uint execmask = mach->ExecMask;
1609
uint i;
1610
1611
index2.i[0] =
1612
index2.i[1] =
1613
index2.i[2] =
1614
index2.i[3] = reg->DimIndirect.Index;
1615
1616
swizzle = reg->DimIndirect.Swizzle;
1617
fetch_src_file_channel(mach,
1618
reg->DimIndirect.File,
1619
swizzle,
1620
&index2,
1621
&ZeroVec,
1622
&indir_index);
1623
1624
index2D->i[0] += indir_index.i[0];
1625
index2D->i[1] += indir_index.i[1];
1626
index2D->i[2] += indir_index.i[2];
1627
index2D->i[3] += indir_index.i[3];
1628
1629
/* for disabled execution channels, zero-out the index to
1630
* avoid using a potential garbage value.
1631
*/
1632
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1633
if ((execmask & (1 << i)) == 0) {
1634
index2D->i[i] = 0;
1635
}
1636
}
1637
}
1638
1639
/* If by any chance there was a need for a 3D array of register
1640
* files, we would have to check whether Dimension is followed
1641
* by a dimension register and continue the saga.
1642
*/
1643
} else {
1644
index2D->i[0] =
1645
index2D->i[1] =
1646
index2D->i[2] =
1647
index2D->i[3] = 0;
1648
}
1649
}
1650
1651
1652
static void
1653
fetch_source_d(const struct tgsi_exec_machine *mach,
1654
union tgsi_exec_channel *chan,
1655
const struct tgsi_full_src_register *reg,
1656
const uint chan_index)
1657
{
1658
union tgsi_exec_channel index;
1659
union tgsi_exec_channel index2D;
1660
uint swizzle;
1661
1662
get_index_registers(mach, reg, &index, &index2D);
1663
1664
1665
swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1666
fetch_src_file_channel(mach,
1667
reg->Register.File,
1668
swizzle,
1669
&index,
1670
&index2D,
1671
chan);
1672
}
1673
1674
static void
1675
fetch_source(const struct tgsi_exec_machine *mach,
1676
union tgsi_exec_channel *chan,
1677
const struct tgsi_full_src_register *reg,
1678
const uint chan_index,
1679
enum tgsi_exec_datatype src_datatype)
1680
{
1681
fetch_source_d(mach, chan, reg, chan_index);
1682
1683
if (reg->Register.Absolute) {
1684
assert(src_datatype == TGSI_EXEC_DATA_FLOAT);
1685
micro_abs(chan, chan);
1686
}
1687
1688
if (reg->Register.Negate) {
1689
if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1690
micro_neg(chan, chan);
1691
} else {
1692
micro_ineg(chan, chan);
1693
}
1694
}
1695
}
1696
1697
static union tgsi_exec_channel *
1698
store_dest_dstret(struct tgsi_exec_machine *mach,
1699
const union tgsi_exec_channel *chan,
1700
const struct tgsi_full_dst_register *reg,
1701
uint chan_index)
1702
{
1703
static union tgsi_exec_channel null;
1704
union tgsi_exec_channel *dst;
1705
int offset = 0; /* indirection offset */
1706
int index;
1707
1708
1709
/* There is an extra source register that indirectly subscripts
1710
* a register file. The direct index now becomes an offset
1711
* that is being added to the indirect register.
1712
*
1713
* file[ind[2].x+1],
1714
* where:
1715
* ind = Indirect.File
1716
* [2] = Indirect.Index
1717
* .x = Indirect.SwizzleX
1718
*/
1719
if (reg->Register.Indirect) {
1720
union tgsi_exec_channel index;
1721
union tgsi_exec_channel indir_index;
1722
uint swizzle;
1723
1724
/* which address register (always zero for now) */
1725
index.i[0] =
1726
index.i[1] =
1727
index.i[2] =
1728
index.i[3] = reg->Indirect.Index;
1729
1730
/* get current value of address register[swizzle] */
1731
swizzle = reg->Indirect.Swizzle;
1732
1733
/* fetch values from the address/indirection register */
1734
fetch_src_file_channel(mach,
1735
reg->Indirect.File,
1736
swizzle,
1737
&index,
1738
&ZeroVec,
1739
&indir_index);
1740
1741
/* save indirection offset */
1742
offset = indir_index.i[0];
1743
}
1744
1745
switch (reg->Register.File) {
1746
case TGSI_FILE_NULL:
1747
dst = &null;
1748
break;
1749
1750
case TGSI_FILE_OUTPUT:
1751
index = mach->OutputVertexOffset + reg->Register.Index;
1752
dst = &mach->Outputs[offset + index].xyzw[chan_index];
1753
#if 0
1754
debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1755
mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1756
reg->Register.Index);
1757
if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1758
debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1759
for (i = 0; i < TGSI_QUAD_SIZE; i++)
1760
if (execmask & (1 << i))
1761
debug_printf("%f, ", chan->f[i]);
1762
debug_printf(")\n");
1763
}
1764
#endif
1765
break;
1766
1767
case TGSI_FILE_TEMPORARY:
1768
index = reg->Register.Index;
1769
assert( index < TGSI_EXEC_NUM_TEMPS );
1770
dst = &mach->Temps[offset + index].xyzw[chan_index];
1771
break;
1772
1773
case TGSI_FILE_ADDRESS:
1774
index = reg->Register.Index;
1775
assert(index >= 0 && index < ARRAY_SIZE(mach->Addrs));
1776
dst = &mach->Addrs[index].xyzw[chan_index];
1777
break;
1778
1779
default:
1780
unreachable("Bad destination file");
1781
}
1782
1783
return dst;
1784
}
1785
1786
static void
1787
store_dest_double(struct tgsi_exec_machine *mach,
1788
const union tgsi_exec_channel *chan,
1789
const struct tgsi_full_dst_register *reg,
1790
uint chan_index)
1791
{
1792
union tgsi_exec_channel *dst;
1793
const uint execmask = mach->ExecMask;
1794
int i;
1795
1796
dst = store_dest_dstret(mach, chan, reg, chan_index);
1797
if (!dst)
1798
return;
1799
1800
/* doubles path */
1801
for (i = 0; i < TGSI_QUAD_SIZE; i++)
1802
if (execmask & (1 << i))
1803
dst->i[i] = chan->i[i];
1804
}
1805
1806
static void
1807
store_dest(struct tgsi_exec_machine *mach,
1808
const union tgsi_exec_channel *chan,
1809
const struct tgsi_full_dst_register *reg,
1810
const struct tgsi_full_instruction *inst,
1811
uint chan_index)
1812
{
1813
union tgsi_exec_channel *dst;
1814
const uint execmask = mach->ExecMask;
1815
int i;
1816
1817
dst = store_dest_dstret(mach, chan, reg, chan_index);
1818
if (!dst)
1819
return;
1820
1821
if (!inst->Instruction.Saturate) {
1822
for (i = 0; i < TGSI_QUAD_SIZE; i++)
1823
if (execmask & (1 << i))
1824
dst->i[i] = chan->i[i];
1825
}
1826
else {
1827
for (i = 0; i < TGSI_QUAD_SIZE; i++)
1828
if (execmask & (1 << i))
1829
dst->f[i] = fminf(fmaxf(chan->f[i], 0.0f), 1.0f);
1830
}
1831
}
1832
1833
#define FETCH(VAL,INDEX,CHAN)\
1834
fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1835
1836
#define IFETCH(VAL,INDEX,CHAN)\
1837
fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1838
1839
1840
/**
1841
* Execute ARB-style KIL which is predicated by a src register.
1842
* Kill fragment if any of the four values is less than zero.
1843
*/
1844
static void
1845
exec_kill_if(struct tgsi_exec_machine *mach,
1846
const struct tgsi_full_instruction *inst)
1847
{
1848
uint uniquemask;
1849
uint chan_index;
1850
uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1851
union tgsi_exec_channel r[1];
1852
1853
/* This mask stores component bits that were already tested. */
1854
uniquemask = 0;
1855
1856
for (chan_index = 0; chan_index < 4; chan_index++)
1857
{
1858
uint swizzle;
1859
uint i;
1860
1861
/* unswizzle channel */
1862
swizzle = tgsi_util_get_full_src_register_swizzle (
1863
&inst->Src[0],
1864
chan_index);
1865
1866
/* check if the component has not been already tested */
1867
if (uniquemask & (1 << swizzle))
1868
continue;
1869
uniquemask |= 1 << swizzle;
1870
1871
FETCH(&r[0], 0, chan_index);
1872
for (i = 0; i < 4; i++)
1873
if (r[0].f[i] < 0.0f)
1874
kilmask |= 1 << i;
1875
}
1876
1877
/* restrict to fragments currently executing */
1878
kilmask &= mach->ExecMask;
1879
1880
mach->KillMask |= kilmask;
1881
}
1882
1883
/**
1884
* Unconditional fragment kill/discard.
1885
*/
1886
static void
1887
exec_kill(struct tgsi_exec_machine *mach)
1888
{
1889
/* kill fragment for all fragments currently executing.
1890
* bit 0 = pixel 0, bit 1 = pixel 1, etc.
1891
*/
1892
mach->KillMask |= mach->ExecMask;
1893
}
1894
1895
static void
1896
emit_vertex(struct tgsi_exec_machine *mach,
1897
const struct tgsi_full_instruction *inst)
1898
{
1899
union tgsi_exec_channel r[1];
1900
unsigned stream_id;
1901
unsigned prim_count;
1902
/* FIXME: check for exec mask correctly
1903
unsigned i;
1904
for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1905
if ((mach->ExecMask & (1 << i)))
1906
*/
1907
IFETCH(&r[0], 0, TGSI_CHAN_X);
1908
stream_id = r[0].u[0];
1909
prim_count = mach->OutputPrimCount[stream_id];
1910
if (mach->ExecMask) {
1911
if (mach->Primitives[stream_id][prim_count] >= mach->MaxOutputVertices)
1912
return;
1913
1914
if (mach->Primitives[stream_id][prim_count] == 0)
1915
mach->PrimitiveOffsets[stream_id][prim_count] = mach->OutputVertexOffset;
1916
mach->OutputVertexOffset += mach->NumOutputs;
1917
mach->Primitives[stream_id][prim_count]++;
1918
}
1919
}
1920
1921
static void
1922
emit_primitive(struct tgsi_exec_machine *mach,
1923
const struct tgsi_full_instruction *inst)
1924
{
1925
unsigned *prim_count;
1926
union tgsi_exec_channel r[1];
1927
unsigned stream_id = 0;
1928
/* FIXME: check for exec mask correctly
1929
unsigned i;
1930
for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1931
if ((mach->ExecMask & (1 << i)))
1932
*/
1933
if (inst) {
1934
IFETCH(&r[0], 0, TGSI_CHAN_X);
1935
stream_id = r[0].u[0];
1936
}
1937
prim_count = &mach->OutputPrimCount[stream_id];
1938
if (mach->ExecMask) {
1939
++(*prim_count);
1940
debug_assert((*prim_count * mach->NumOutputs) < TGSI_MAX_TOTAL_VERTICES);
1941
mach->Primitives[stream_id][*prim_count] = 0;
1942
}
1943
}
1944
1945
static void
1946
conditional_emit_primitive(struct tgsi_exec_machine *mach)
1947
{
1948
if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1949
int emitted_verts = mach->Primitives[0][mach->OutputPrimCount[0]];
1950
if (emitted_verts) {
1951
emit_primitive(mach, NULL);
1952
}
1953
}
1954
}
1955
1956
1957
/*
1958
* Fetch four texture samples using STR texture coordinates.
1959
*/
1960
static void
1961
fetch_texel( struct tgsi_sampler *sampler,
1962
const unsigned sview_idx,
1963
const unsigned sampler_idx,
1964
const union tgsi_exec_channel *s,
1965
const union tgsi_exec_channel *t,
1966
const union tgsi_exec_channel *p,
1967
const union tgsi_exec_channel *c0,
1968
const union tgsi_exec_channel *c1,
1969
float derivs[3][2][TGSI_QUAD_SIZE],
1970
const int8_t offset[3],
1971
enum tgsi_sampler_control control,
1972
union tgsi_exec_channel *r,
1973
union tgsi_exec_channel *g,
1974
union tgsi_exec_channel *b,
1975
union tgsi_exec_channel *a )
1976
{
1977
uint j;
1978
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1979
1980
/* FIXME: handle explicit derivs, offsets */
1981
sampler->get_samples(sampler, sview_idx, sampler_idx,
1982
s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
1983
1984
for (j = 0; j < 4; j++) {
1985
r->f[j] = rgba[0][j];
1986
g->f[j] = rgba[1][j];
1987
b->f[j] = rgba[2][j];
1988
a->f[j] = rgba[3][j];
1989
}
1990
}
1991
1992
1993
#define TEX_MODIFIER_NONE 0
1994
#define TEX_MODIFIER_PROJECTED 1
1995
#define TEX_MODIFIER_LOD_BIAS 2
1996
#define TEX_MODIFIER_EXPLICIT_LOD 3
1997
#define TEX_MODIFIER_LEVEL_ZERO 4
1998
#define TEX_MODIFIER_GATHER 5
1999
2000
/*
2001
* Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2002
*/
2003
static void
2004
fetch_texel_offsets(struct tgsi_exec_machine *mach,
2005
const struct tgsi_full_instruction *inst,
2006
int8_t offsets[3])
2007
{
2008
if (inst->Texture.NumOffsets == 1) {
2009
union tgsi_exec_channel index;
2010
union tgsi_exec_channel offset[3];
2011
index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2012
fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2013
inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2014
fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2015
inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2016
fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2017
inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2018
offsets[0] = offset[0].i[0];
2019
offsets[1] = offset[1].i[0];
2020
offsets[2] = offset[2].i[0];
2021
} else {
2022
assert(inst->Texture.NumOffsets == 0);
2023
offsets[0] = offsets[1] = offsets[2] = 0;
2024
}
2025
}
2026
2027
2028
/*
2029
* Fetch dx and dy values for one channel (s, t or r).
2030
* Put dx values into one float array, dy values into another.
2031
*/
2032
static void
2033
fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2034
const struct tgsi_full_instruction *inst,
2035
unsigned regdsrcx,
2036
unsigned chan,
2037
float derivs[2][TGSI_QUAD_SIZE])
2038
{
2039
union tgsi_exec_channel d;
2040
FETCH(&d, regdsrcx, chan);
2041
derivs[0][0] = d.f[0];
2042
derivs[0][1] = d.f[1];
2043
derivs[0][2] = d.f[2];
2044
derivs[0][3] = d.f[3];
2045
FETCH(&d, regdsrcx + 1, chan);
2046
derivs[1][0] = d.f[0];
2047
derivs[1][1] = d.f[1];
2048
derivs[1][2] = d.f[2];
2049
derivs[1][3] = d.f[3];
2050
}
2051
2052
static uint
2053
fetch_sampler_unit(struct tgsi_exec_machine *mach,
2054
const struct tgsi_full_instruction *inst,
2055
uint sampler)
2056
{
2057
uint unit = 0;
2058
int i;
2059
if (inst->Src[sampler].Register.Indirect) {
2060
const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2061
union tgsi_exec_channel indir_index, index2;
2062
const uint execmask = mach->ExecMask;
2063
index2.i[0] =
2064
index2.i[1] =
2065
index2.i[2] =
2066
index2.i[3] = reg->Indirect.Index;
2067
2068
fetch_src_file_channel(mach,
2069
reg->Indirect.File,
2070
reg->Indirect.Swizzle,
2071
&index2,
2072
&ZeroVec,
2073
&indir_index);
2074
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2075
if (execmask & (1 << i)) {
2076
unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2077
break;
2078
}
2079
}
2080
2081
} else {
2082
unit = inst->Src[sampler].Register.Index;
2083
}
2084
return unit;
2085
}
2086
2087
/*
2088
* execute a texture instruction.
2089
*
2090
* modifier is used to control the channel routing for the
2091
* instruction variants like proj, lod, and texture with lod bias.
2092
* sampler indicates which src register the sampler is contained in.
2093
*/
2094
static void
2095
exec_tex(struct tgsi_exec_machine *mach,
2096
const struct tgsi_full_instruction *inst,
2097
uint modifier, uint sampler)
2098
{
2099
const union tgsi_exec_channel *args[5], *proj = NULL;
2100
union tgsi_exec_channel r[5];
2101
enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2102
uint chan;
2103
uint unit;
2104
int8_t offsets[3];
2105
int dim, shadow_ref, i;
2106
2107
unit = fetch_sampler_unit(mach, inst, sampler);
2108
/* always fetch all 3 offsets, overkill but keeps code simple */
2109
fetch_texel_offsets(mach, inst, offsets);
2110
2111
assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2112
assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2113
2114
dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2115
shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2116
2117
assert(dim <= 4);
2118
if (shadow_ref >= 0)
2119
assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));
2120
2121
/* fetch modifier to the last argument */
2122
if (modifier != TEX_MODIFIER_NONE) {
2123
const int last = ARRAY_SIZE(args) - 1;
2124
2125
/* fetch modifier from src0.w or src1.x */
2126
if (sampler == 1) {
2127
assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2128
FETCH(&r[last], 0, TGSI_CHAN_W);
2129
}
2130
else {
2131
FETCH(&r[last], 1, TGSI_CHAN_X);
2132
}
2133
2134
if (modifier != TEX_MODIFIER_PROJECTED) {
2135
args[last] = &r[last];
2136
}
2137
else {
2138
proj = &r[last];
2139
args[last] = &ZeroVec;
2140
}
2141
2142
/* point unused arguments to zero vector */
2143
for (i = dim; i < last; i++)
2144
args[i] = &ZeroVec;
2145
2146
if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2147
control = TGSI_SAMPLER_LOD_EXPLICIT;
2148
else if (modifier == TEX_MODIFIER_LOD_BIAS)
2149
control = TGSI_SAMPLER_LOD_BIAS;
2150
else if (modifier == TEX_MODIFIER_GATHER)
2151
control = TGSI_SAMPLER_GATHER;
2152
}
2153
else {
2154
for (i = dim; i < (int)ARRAY_SIZE(args); i++)
2155
args[i] = &ZeroVec;
2156
}
2157
2158
/* fetch coordinates */
2159
for (i = 0; i < dim; i++) {
2160
FETCH(&r[i], 0, TGSI_CHAN_X + i);
2161
2162
if (proj)
2163
micro_div(&r[i], &r[i], proj);
2164
2165
args[i] = &r[i];
2166
}
2167
2168
/* fetch reference value */
2169
if (shadow_ref >= 0) {
2170
FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2171
2172
if (proj)
2173
micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2174
2175
args[shadow_ref] = &r[shadow_ref];
2176
}
2177
2178
fetch_texel(mach->Sampler, unit, unit,
2179
args[0], args[1], args[2], args[3], args[4],
2180
NULL, offsets, control,
2181
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2182
2183
#if 0
2184
debug_printf("fetch r: %g %g %g %g\n",
2185
r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2186
debug_printf("fetch g: %g %g %g %g\n",
2187
r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2188
debug_printf("fetch b: %g %g %g %g\n",
2189
r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2190
debug_printf("fetch a: %g %g %g %g\n",
2191
r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2192
#endif
2193
2194
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2195
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2196
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2197
}
2198
}
2199
}
2200
2201
static void
2202
exec_lodq(struct tgsi_exec_machine *mach,
2203
const struct tgsi_full_instruction *inst)
2204
{
2205
uint resource_unit, sampler_unit;
2206
unsigned dim;
2207
unsigned i;
2208
union tgsi_exec_channel coords[4];
2209
const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2210
union tgsi_exec_channel r[2];
2211
2212
resource_unit = fetch_sampler_unit(mach, inst, 1);
2213
if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2214
uint target = mach->SamplerViews[resource_unit].Resource;
2215
dim = tgsi_util_get_texture_coord_dim(target);
2216
sampler_unit = fetch_sampler_unit(mach, inst, 2);
2217
} else {
2218
dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2219
sampler_unit = resource_unit;
2220
}
2221
assert(dim <= ARRAY_SIZE(coords));
2222
/* fetch coordinates */
2223
for (i = 0; i < dim; i++) {
2224
FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2225
args[i] = &coords[i];
2226
}
2227
for (i = dim; i < ARRAY_SIZE(coords); i++) {
2228
args[i] = &ZeroVec;
2229
}
2230
mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
2231
args[0]->f,
2232
args[1]->f,
2233
args[2]->f,
2234
args[3]->f,
2235
TGSI_SAMPLER_LOD_NONE,
2236
r[0].f,
2237
r[1].f);
2238
2239
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2240
store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
2241
}
2242
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2243
store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);
2244
}
2245
if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2246
unsigned char swizzles[4];
2247
unsigned chan;
2248
swizzles[0] = inst->Src[1].Register.SwizzleX;
2249
swizzles[1] = inst->Src[1].Register.SwizzleY;
2250
swizzles[2] = inst->Src[1].Register.SwizzleZ;
2251
swizzles[3] = inst->Src[1].Register.SwizzleW;
2252
2253
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2254
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2255
if (swizzles[chan] >= 2) {
2256
store_dest(mach, &ZeroVec,
2257
&inst->Dst[0], inst, chan);
2258
} else {
2259
store_dest(mach, &r[swizzles[chan]],
2260
&inst->Dst[0], inst, chan);
2261
}
2262
}
2263
}
2264
} else {
2265
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2266
store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
2267
}
2268
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2269
store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);
2270
}
2271
}
2272
}
2273
2274
static void
2275
exec_txd(struct tgsi_exec_machine *mach,
2276
const struct tgsi_full_instruction *inst)
2277
{
2278
union tgsi_exec_channel r[4];
2279
float derivs[3][2][TGSI_QUAD_SIZE];
2280
uint chan;
2281
uint unit;
2282
int8_t offsets[3];
2283
2284
unit = fetch_sampler_unit(mach, inst, 3);
2285
/* always fetch all 3 offsets, overkill but keeps code simple */
2286
fetch_texel_offsets(mach, inst, offsets);
2287
2288
switch (inst->Texture.Texture) {
2289
case TGSI_TEXTURE_1D:
2290
FETCH(&r[0], 0, TGSI_CHAN_X);
2291
2292
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2293
2294
fetch_texel(mach->Sampler, unit, unit,
2295
&r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2296
derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2297
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2298
break;
2299
2300
case TGSI_TEXTURE_SHADOW1D:
2301
case TGSI_TEXTURE_1D_ARRAY:
2302
case TGSI_TEXTURE_SHADOW1D_ARRAY:
2303
/* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2304
FETCH(&r[0], 0, TGSI_CHAN_X);
2305
FETCH(&r[1], 0, TGSI_CHAN_Y);
2306
FETCH(&r[2], 0, TGSI_CHAN_Z);
2307
2308
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2309
2310
fetch_texel(mach->Sampler, unit, unit,
2311
&r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2312
derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2313
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2314
break;
2315
2316
case TGSI_TEXTURE_2D:
2317
case TGSI_TEXTURE_RECT:
2318
FETCH(&r[0], 0, TGSI_CHAN_X);
2319
FETCH(&r[1], 0, TGSI_CHAN_Y);
2320
2321
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2322
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2323
2324
fetch_texel(mach->Sampler, unit, unit,
2325
&r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2326
derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2327
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2328
break;
2329
2330
2331
case TGSI_TEXTURE_SHADOW2D:
2332
case TGSI_TEXTURE_SHADOWRECT:
2333
case TGSI_TEXTURE_2D_ARRAY:
2334
case TGSI_TEXTURE_SHADOW2D_ARRAY:
2335
/* only SHADOW2D_ARRAY actually needs W */
2336
FETCH(&r[0], 0, TGSI_CHAN_X);
2337
FETCH(&r[1], 0, TGSI_CHAN_Y);
2338
FETCH(&r[2], 0, TGSI_CHAN_Z);
2339
FETCH(&r[3], 0, TGSI_CHAN_W);
2340
2341
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2342
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2343
2344
fetch_texel(mach->Sampler, unit, unit,
2345
&r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2346
derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2347
&r[0], &r[1], &r[2], &r[3]); /* outputs */
2348
break;
2349
2350
case TGSI_TEXTURE_3D:
2351
case TGSI_TEXTURE_CUBE:
2352
case TGSI_TEXTURE_CUBE_ARRAY:
2353
case TGSI_TEXTURE_SHADOWCUBE:
2354
/* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2355
FETCH(&r[0], 0, TGSI_CHAN_X);
2356
FETCH(&r[1], 0, TGSI_CHAN_Y);
2357
FETCH(&r[2], 0, TGSI_CHAN_Z);
2358
FETCH(&r[3], 0, TGSI_CHAN_W);
2359
2360
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2361
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2362
fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2363
2364
fetch_texel(mach->Sampler, unit, unit,
2365
&r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2366
derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2367
&r[0], &r[1], &r[2], &r[3]); /* outputs */
2368
break;
2369
2370
default:
2371
assert(0);
2372
}
2373
2374
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2375
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2376
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2377
}
2378
}
2379
}
2380
2381
2382
static void
2383
exec_txf(struct tgsi_exec_machine *mach,
2384
const struct tgsi_full_instruction *inst)
2385
{
2386
union tgsi_exec_channel r[4];
2387
uint chan;
2388
uint unit;
2389
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2390
int j;
2391
int8_t offsets[3];
2392
unsigned target;
2393
2394
unit = fetch_sampler_unit(mach, inst, 1);
2395
/* always fetch all 3 offsets, overkill but keeps code simple */
2396
fetch_texel_offsets(mach, inst, offsets);
2397
2398
IFETCH(&r[3], 0, TGSI_CHAN_W);
2399
2400
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2401
inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2402
target = mach->SamplerViews[unit].Resource;
2403
}
2404
else {
2405
target = inst->Texture.Texture;
2406
}
2407
switch(target) {
2408
case TGSI_TEXTURE_3D:
2409
case TGSI_TEXTURE_2D_ARRAY:
2410
case TGSI_TEXTURE_SHADOW2D_ARRAY:
2411
case TGSI_TEXTURE_2D_ARRAY_MSAA:
2412
IFETCH(&r[2], 0, TGSI_CHAN_Z);
2413
FALLTHROUGH;
2414
case TGSI_TEXTURE_2D:
2415
case TGSI_TEXTURE_RECT:
2416
case TGSI_TEXTURE_SHADOW1D_ARRAY:
2417
case TGSI_TEXTURE_SHADOW2D:
2418
case TGSI_TEXTURE_SHADOWRECT:
2419
case TGSI_TEXTURE_1D_ARRAY:
2420
case TGSI_TEXTURE_2D_MSAA:
2421
IFETCH(&r[1], 0, TGSI_CHAN_Y);
2422
FALLTHROUGH;
2423
case TGSI_TEXTURE_BUFFER:
2424
case TGSI_TEXTURE_1D:
2425
case TGSI_TEXTURE_SHADOW1D:
2426
IFETCH(&r[0], 0, TGSI_CHAN_X);
2427
break;
2428
default:
2429
assert(0);
2430
break;
2431
}
2432
2433
mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2434
offsets, rgba);
2435
2436
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2437
r[0].f[j] = rgba[0][j];
2438
r[1].f[j] = rgba[1][j];
2439
r[2].f[j] = rgba[2][j];
2440
r[3].f[j] = rgba[3][j];
2441
}
2442
2443
if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2444
inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2445
unsigned char swizzles[4];
2446
swizzles[0] = inst->Src[1].Register.SwizzleX;
2447
swizzles[1] = inst->Src[1].Register.SwizzleY;
2448
swizzles[2] = inst->Src[1].Register.SwizzleZ;
2449
swizzles[3] = inst->Src[1].Register.SwizzleW;
2450
2451
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2452
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2453
store_dest(mach, &r[swizzles[chan]],
2454
&inst->Dst[0], inst, chan);
2455
}
2456
}
2457
}
2458
else {
2459
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2460
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2461
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2462
}
2463
}
2464
}
2465
}
2466
2467
static void
2468
exec_txq(struct tgsi_exec_machine *mach,
2469
const struct tgsi_full_instruction *inst)
2470
{
2471
int result[4];
2472
union tgsi_exec_channel r[4], src;
2473
uint chan;
2474
uint unit;
2475
int i,j;
2476
2477
unit = fetch_sampler_unit(mach, inst, 1);
2478
2479
fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2480
2481
/* XXX: This interface can't return per-pixel values */
2482
mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2483
2484
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2485
for (j = 0; j < 4; j++) {
2486
r[j].i[i] = result[j];
2487
}
2488
}
2489
2490
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2491
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2492
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2493
}
2494
}
2495
}
2496
2497
static void
2498
exec_sample(struct tgsi_exec_machine *mach,
2499
const struct tgsi_full_instruction *inst,
2500
uint modifier, boolean compare)
2501
{
2502
const uint resource_unit = inst->Src[1].Register.Index;
2503
const uint sampler_unit = inst->Src[2].Register.Index;
2504
union tgsi_exec_channel r[5], c1;
2505
const union tgsi_exec_channel *lod = &ZeroVec;
2506
enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2507
uint chan;
2508
unsigned char swizzles[4];
2509
int8_t offsets[3];
2510
2511
/* always fetch all 3 offsets, overkill but keeps code simple */
2512
fetch_texel_offsets(mach, inst, offsets);
2513
2514
assert(modifier != TEX_MODIFIER_PROJECTED);
2515
2516
if (modifier != TEX_MODIFIER_NONE) {
2517
if (modifier == TEX_MODIFIER_LOD_BIAS) {
2518
FETCH(&c1, 3, TGSI_CHAN_X);
2519
lod = &c1;
2520
control = TGSI_SAMPLER_LOD_BIAS;
2521
}
2522
else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2523
FETCH(&c1, 3, TGSI_CHAN_X);
2524
lod = &c1;
2525
control = TGSI_SAMPLER_LOD_EXPLICIT;
2526
}
2527
else if (modifier == TEX_MODIFIER_GATHER) {
2528
control = TGSI_SAMPLER_GATHER;
2529
}
2530
else {
2531
assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2532
control = TGSI_SAMPLER_LOD_ZERO;
2533
}
2534
}
2535
2536
FETCH(&r[0], 0, TGSI_CHAN_X);
2537
2538
switch (mach->SamplerViews[resource_unit].Resource) {
2539
case TGSI_TEXTURE_1D:
2540
if (compare) {
2541
FETCH(&r[2], 3, TGSI_CHAN_X);
2542
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2543
&r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2544
NULL, offsets, control,
2545
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2546
}
2547
else {
2548
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2549
&r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2550
NULL, offsets, control,
2551
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2552
}
2553
break;
2554
2555
case TGSI_TEXTURE_1D_ARRAY:
2556
case TGSI_TEXTURE_2D:
2557
case TGSI_TEXTURE_RECT:
2558
FETCH(&r[1], 0, TGSI_CHAN_Y);
2559
if (compare) {
2560
FETCH(&r[2], 3, TGSI_CHAN_X);
2561
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2562
&r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2563
NULL, offsets, control,
2564
&r[0], &r[1], &r[2], &r[3]); /* outputs */
2565
}
2566
else {
2567
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2568
&r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2569
NULL, offsets, control,
2570
&r[0], &r[1], &r[2], &r[3]); /* outputs */
2571
}
2572
break;
2573
2574
case TGSI_TEXTURE_2D_ARRAY:
2575
case TGSI_TEXTURE_3D:
2576
case TGSI_TEXTURE_CUBE:
2577
FETCH(&r[1], 0, TGSI_CHAN_Y);
2578
FETCH(&r[2], 0, TGSI_CHAN_Z);
2579
if(compare) {
2580
FETCH(&r[3], 3, TGSI_CHAN_X);
2581
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2582
&r[0], &r[1], &r[2], &r[3], lod,
2583
NULL, offsets, control,
2584
&r[0], &r[1], &r[2], &r[3]);
2585
}
2586
else {
2587
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2588
&r[0], &r[1], &r[2], &ZeroVec, lod,
2589
NULL, offsets, control,
2590
&r[0], &r[1], &r[2], &r[3]);
2591
}
2592
break;
2593
2594
case TGSI_TEXTURE_CUBE_ARRAY:
2595
FETCH(&r[1], 0, TGSI_CHAN_Y);
2596
FETCH(&r[2], 0, TGSI_CHAN_Z);
2597
FETCH(&r[3], 0, TGSI_CHAN_W);
2598
if(compare) {
2599
FETCH(&r[4], 3, TGSI_CHAN_X);
2600
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2601
&r[0], &r[1], &r[2], &r[3], &r[4],
2602
NULL, offsets, control,
2603
&r[0], &r[1], &r[2], &r[3]);
2604
}
2605
else {
2606
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2607
&r[0], &r[1], &r[2], &r[3], lod,
2608
NULL, offsets, control,
2609
&r[0], &r[1], &r[2], &r[3]);
2610
}
2611
break;
2612
2613
2614
default:
2615
assert(0);
2616
}
2617
2618
swizzles[0] = inst->Src[1].Register.SwizzleX;
2619
swizzles[1] = inst->Src[1].Register.SwizzleY;
2620
swizzles[2] = inst->Src[1].Register.SwizzleZ;
2621
swizzles[3] = inst->Src[1].Register.SwizzleW;
2622
2623
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2624
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2625
store_dest(mach, &r[swizzles[chan]],
2626
&inst->Dst[0], inst, chan);
2627
}
2628
}
2629
}
2630
2631
static void
2632
exec_sample_d(struct tgsi_exec_machine *mach,
2633
const struct tgsi_full_instruction *inst)
2634
{
2635
const uint resource_unit = inst->Src[1].Register.Index;
2636
const uint sampler_unit = inst->Src[2].Register.Index;
2637
union tgsi_exec_channel r[4];
2638
float derivs[3][2][TGSI_QUAD_SIZE];
2639
uint chan;
2640
unsigned char swizzles[4];
2641
int8_t offsets[3];
2642
2643
/* always fetch all 3 offsets, overkill but keeps code simple */
2644
fetch_texel_offsets(mach, inst, offsets);
2645
2646
FETCH(&r[0], 0, TGSI_CHAN_X);
2647
2648
switch (mach->SamplerViews[resource_unit].Resource) {
2649
case TGSI_TEXTURE_1D:
2650
case TGSI_TEXTURE_1D_ARRAY:
2651
/* only 1D array actually needs Y */
2652
FETCH(&r[1], 0, TGSI_CHAN_Y);
2653
2654
fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2655
2656
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2657
&r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2658
derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2659
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2660
break;
2661
2662
case TGSI_TEXTURE_2D:
2663
case TGSI_TEXTURE_RECT:
2664
case TGSI_TEXTURE_2D_ARRAY:
2665
/* only 2D array actually needs Z */
2666
FETCH(&r[1], 0, TGSI_CHAN_Y);
2667
FETCH(&r[2], 0, TGSI_CHAN_Z);
2668
2669
fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2670
fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2671
2672
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2673
&r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
2674
derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2675
&r[0], &r[1], &r[2], &r[3]); /* outputs */
2676
break;
2677
2678
case TGSI_TEXTURE_3D:
2679
case TGSI_TEXTURE_CUBE:
2680
case TGSI_TEXTURE_CUBE_ARRAY:
2681
/* only cube array actually needs W */
2682
FETCH(&r[1], 0, TGSI_CHAN_Y);
2683
FETCH(&r[2], 0, TGSI_CHAN_Z);
2684
FETCH(&r[3], 0, TGSI_CHAN_W);
2685
2686
fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2687
fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2688
fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2689
2690
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2691
&r[0], &r[1], &r[2], &r[3], &ZeroVec,
2692
derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2693
&r[0], &r[1], &r[2], &r[3]);
2694
break;
2695
2696
default:
2697
assert(0);
2698
}
2699
2700
swizzles[0] = inst->Src[1].Register.SwizzleX;
2701
swizzles[1] = inst->Src[1].Register.SwizzleY;
2702
swizzles[2] = inst->Src[1].Register.SwizzleZ;
2703
swizzles[3] = inst->Src[1].Register.SwizzleW;
2704
2705
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2706
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2707
store_dest(mach, &r[swizzles[chan]],
2708
&inst->Dst[0], inst, chan);
2709
}
2710
}
2711
}
2712
2713
2714
/**
2715
* Evaluate a constant-valued coefficient at the position of the
2716
* current quad.
2717
*/
2718
static void
2719
eval_constant_coef(
2720
struct tgsi_exec_machine *mach,
2721
unsigned attrib,
2722
unsigned chan )
2723
{
2724
unsigned i;
2725
2726
for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2727
mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2728
}
2729
}
2730
2731
static void
2732
interp_constant_offset(
2733
UNUSED const struct tgsi_exec_machine *mach,
2734
UNUSED unsigned attrib,
2735
UNUSED unsigned chan,
2736
UNUSED float ofs_x,
2737
UNUSED float ofs_y,
2738
UNUSED union tgsi_exec_channel *out_chan)
2739
{
2740
}
2741
2742
/**
2743
* Evaluate a linear-valued coefficient at the position of the
2744
* current quad.
2745
*/
2746
static void
2747
interp_linear_offset(
2748
const struct tgsi_exec_machine *mach,
2749
unsigned attrib,
2750
unsigned chan,
2751
float ofs_x,
2752
float ofs_y,
2753
union tgsi_exec_channel *out_chan)
2754
{
2755
const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2756
const float dady = mach->InterpCoefs[attrib].dady[chan];
2757
const float delta = ofs_x * dadx + ofs_y * dady;
2758
out_chan->f[0] += delta;
2759
out_chan->f[1] += delta;
2760
out_chan->f[2] += delta;
2761
out_chan->f[3] += delta;
2762
}
2763
2764
static void
2765
eval_linear_coef(struct tgsi_exec_machine *mach,
2766
unsigned attrib,
2767
unsigned chan)
2768
{
2769
const float x = mach->QuadPos.xyzw[0].f[0];
2770
const float y = mach->QuadPos.xyzw[1].f[0];
2771
const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2772
const float dady = mach->InterpCoefs[attrib].dady[chan];
2773
const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2774
2775
mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2776
mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2777
mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2778
mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2779
}
2780
2781
/**
2782
* Evaluate a perspective-valued coefficient at the position of the
2783
* current quad.
2784
*/
2785
2786
static void
2787
interp_perspective_offset(
2788
const struct tgsi_exec_machine *mach,
2789
unsigned attrib,
2790
unsigned chan,
2791
float ofs_x,
2792
float ofs_y,
2793
union tgsi_exec_channel *out_chan)
2794
{
2795
const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2796
const float dady = mach->InterpCoefs[attrib].dady[chan];
2797
const float *w = mach->QuadPos.xyzw[3].f;
2798
const float delta = ofs_x * dadx + ofs_y * dady;
2799
out_chan->f[0] += delta / w[0];
2800
out_chan->f[1] += delta / w[1];
2801
out_chan->f[2] += delta / w[2];
2802
out_chan->f[3] += delta / w[3];
2803
}
2804
2805
static void
2806
eval_perspective_coef(
2807
struct tgsi_exec_machine *mach,
2808
unsigned attrib,
2809
unsigned chan )
2810
{
2811
const float x = mach->QuadPos.xyzw[0].f[0];
2812
const float y = mach->QuadPos.xyzw[1].f[0];
2813
const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2814
const float dady = mach->InterpCoefs[attrib].dady[chan];
2815
const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2816
const float *w = mach->QuadPos.xyzw[3].f;
2817
/* divide by W here */
2818
mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2819
mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2820
mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2821
mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2822
}
2823
2824
2825
typedef void (* eval_coef_func)(
2826
struct tgsi_exec_machine *mach,
2827
unsigned attrib,
2828
unsigned chan );
2829
2830
static void
2831
exec_declaration(struct tgsi_exec_machine *mach,
2832
const struct tgsi_full_declaration *decl)
2833
{
2834
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2835
mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2836
return;
2837
}
2838
2839
if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2840
if (decl->Declaration.File == TGSI_FILE_INPUT) {
2841
uint first, last, mask;
2842
2843
first = decl->Range.First;
2844
last = decl->Range.Last;
2845
mask = decl->Declaration.UsageMask;
2846
2847
/* XXX we could remove this special-case code since
2848
* mach->InterpCoefs[first].a0 should already have the
2849
* front/back-face value. But we should first update the
2850
* ureg code to emit the right UsageMask value (WRITEMASK_X).
2851
* Then, we could remove the tgsi_exec_machine::Face field.
2852
*/
2853
/* XXX make FACE a system value */
2854
if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2855
uint i;
2856
2857
assert(decl->Semantic.Index == 0);
2858
assert(first == last);
2859
2860
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2861
mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2862
}
2863
} else {
2864
eval_coef_func eval;
2865
apply_sample_offset_func interp;
2866
uint i, j;
2867
2868
switch (decl->Interp.Interpolate) {
2869
case TGSI_INTERPOLATE_CONSTANT:
2870
eval = eval_constant_coef;
2871
interp = interp_constant_offset;
2872
break;
2873
2874
case TGSI_INTERPOLATE_LINEAR:
2875
eval = eval_linear_coef;
2876
interp = interp_linear_offset;
2877
break;
2878
2879
case TGSI_INTERPOLATE_PERSPECTIVE:
2880
eval = eval_perspective_coef;
2881
interp = interp_perspective_offset;
2882
break;
2883
2884
case TGSI_INTERPOLATE_COLOR:
2885
eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2886
interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
2887
break;
2888
2889
default:
2890
assert(0);
2891
return;
2892
}
2893
2894
for (i = first; i <= last; i++)
2895
mach->InputSampleOffsetApply[i] = interp;
2896
2897
for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2898
if (mask & (1 << j)) {
2899
for (i = first; i <= last; i++) {
2900
eval(mach, i, j);
2901
}
2902
}
2903
}
2904
}
2905
2906
if (DEBUG_EXECUTION) {
2907
uint i, j;
2908
for (i = first; i <= last; ++i) {
2909
debug_printf("IN[%2u] = ", i);
2910
for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2911
if (j > 0) {
2912
debug_printf(" ");
2913
}
2914
debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
2915
mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
2916
mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
2917
mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
2918
mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
2919
}
2920
}
2921
}
2922
}
2923
}
2924
2925
}
2926
2927
typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2928
const union tgsi_exec_channel *src);
2929
2930
static void
2931
exec_scalar_unary(struct tgsi_exec_machine *mach,
2932
const struct tgsi_full_instruction *inst,
2933
micro_unary_op op,
2934
enum tgsi_exec_datatype src_datatype)
2935
{
2936
unsigned int chan;
2937
union tgsi_exec_channel src;
2938
union tgsi_exec_channel dst;
2939
2940
fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
2941
op(&dst, &src);
2942
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2943
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2944
store_dest(mach, &dst, &inst->Dst[0], inst, chan);
2945
}
2946
}
2947
}
2948
2949
static void
2950
exec_vector_unary(struct tgsi_exec_machine *mach,
2951
const struct tgsi_full_instruction *inst,
2952
micro_unary_op op,
2953
enum tgsi_exec_datatype src_datatype)
2954
{
2955
unsigned int chan;
2956
struct tgsi_exec_vector dst;
2957
2958
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2959
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2960
union tgsi_exec_channel src;
2961
2962
fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2963
op(&dst.xyzw[chan], &src);
2964
}
2965
}
2966
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2967
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2968
store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
2969
}
2970
}
2971
}
2972
2973
typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2974
const union tgsi_exec_channel *src0,
2975
const union tgsi_exec_channel *src1);
2976
2977
static void
2978
exec_scalar_binary(struct tgsi_exec_machine *mach,
2979
const struct tgsi_full_instruction *inst,
2980
micro_binary_op op,
2981
enum tgsi_exec_datatype src_datatype)
2982
{
2983
unsigned int chan;
2984
union tgsi_exec_channel src[2];
2985
union tgsi_exec_channel dst;
2986
2987
fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
2988
fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
2989
op(&dst, &src[0], &src[1]);
2990
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2991
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2992
store_dest(mach, &dst, &inst->Dst[0], inst, chan);
2993
}
2994
}
2995
}
2996
2997
static void
2998
exec_vector_binary(struct tgsi_exec_machine *mach,
2999
const struct tgsi_full_instruction *inst,
3000
micro_binary_op op,
3001
enum tgsi_exec_datatype src_datatype)
3002
{
3003
unsigned int chan;
3004
struct tgsi_exec_vector dst;
3005
3006
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3007
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3008
union tgsi_exec_channel src[2];
3009
3010
fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3011
fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3012
op(&dst.xyzw[chan], &src[0], &src[1]);
3013
}
3014
}
3015
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3016
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3017
store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3018
}
3019
}
3020
}
3021
3022
typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3023
const union tgsi_exec_channel *src0,
3024
const union tgsi_exec_channel *src1,
3025
const union tgsi_exec_channel *src2);
3026
3027
static void
3028
exec_vector_trinary(struct tgsi_exec_machine *mach,
3029
const struct tgsi_full_instruction *inst,
3030
micro_trinary_op op,
3031
enum tgsi_exec_datatype src_datatype)
3032
{
3033
unsigned int chan;
3034
struct tgsi_exec_vector dst;
3035
3036
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3037
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3038
union tgsi_exec_channel src[3];
3039
3040
fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3041
fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3042
fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3043
op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3044
}
3045
}
3046
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3047
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3048
store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3049
}
3050
}
3051
}
3052
3053
typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3054
const union tgsi_exec_channel *src0,
3055
const union tgsi_exec_channel *src1,
3056
const union tgsi_exec_channel *src2,
3057
const union tgsi_exec_channel *src3);
3058
3059
static void
3060
exec_vector_quaternary(struct tgsi_exec_machine *mach,
3061
const struct tgsi_full_instruction *inst,
3062
micro_quaternary_op op,
3063
enum tgsi_exec_datatype src_datatype)
3064
{
3065
unsigned int chan;
3066
struct tgsi_exec_vector dst;
3067
3068
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3069
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3070
union tgsi_exec_channel src[4];
3071
3072
fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3073
fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3074
fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3075
fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3076
op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3077
}
3078
}
3079
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3080
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3081
store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3082
}
3083
}
3084
}
3085
3086
static void
3087
exec_dp3(struct tgsi_exec_machine *mach,
3088
const struct tgsi_full_instruction *inst)
3089
{
3090
unsigned int chan;
3091
union tgsi_exec_channel arg[3];
3092
3093
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3094
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3095
micro_mul(&arg[2], &arg[0], &arg[1]);
3096
3097
for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3098
fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3099
fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3100
micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3101
}
3102
3103
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3104
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3105
store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3106
}
3107
}
3108
}
3109
3110
static void
3111
exec_dp4(struct tgsi_exec_machine *mach,
3112
const struct tgsi_full_instruction *inst)
3113
{
3114
unsigned int chan;
3115
union tgsi_exec_channel arg[3];
3116
3117
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3118
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3119
micro_mul(&arg[2], &arg[0], &arg[1]);
3120
3121
for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3122
fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3123
fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3124
micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3125
}
3126
3127
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3128
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3129
store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3130
}
3131
}
3132
}
3133
3134
static void
3135
exec_dp2(struct tgsi_exec_machine *mach,
3136
const struct tgsi_full_instruction *inst)
3137
{
3138
unsigned int chan;
3139
union tgsi_exec_channel arg[3];
3140
3141
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3142
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3143
micro_mul(&arg[2], &arg[0], &arg[1]);
3144
3145
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3146
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3147
micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3148
3149
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3150
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3151
store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3152
}
3153
}
3154
}
3155
3156
static void
3157
exec_pk2h(struct tgsi_exec_machine *mach,
3158
const struct tgsi_full_instruction *inst)
3159
{
3160
unsigned chan;
3161
union tgsi_exec_channel arg[2], dst;
3162
3163
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3164
fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3165
for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3166
dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) |
3167
(_mesa_float_to_half(arg[1].f[chan]) << 16);
3168
}
3169
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3170
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3171
store_dest(mach, &dst, &inst->Dst[0], inst, chan);
3172
}
3173
}
3174
}
3175
3176
static void
3177
exec_up2h(struct tgsi_exec_machine *mach,
3178
const struct tgsi_full_instruction *inst)
3179
{
3180
unsigned chan;
3181
union tgsi_exec_channel arg, dst[2];
3182
3183
fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3184
for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3185
dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff);
3186
dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16);
3187
}
3188
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3189
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3190
store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan);
3191
}
3192
}
3193
}
3194
3195
static void
3196
micro_ucmp(union tgsi_exec_channel *dst,
3197
const union tgsi_exec_channel *src0,
3198
const union tgsi_exec_channel *src1,
3199
const union tgsi_exec_channel *src2)
3200
{
3201
dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
3202
dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
3203
dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
3204
dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
3205
}
3206
3207
static void
3208
exec_ucmp(struct tgsi_exec_machine *mach,
3209
const struct tgsi_full_instruction *inst)
3210
{
3211
unsigned int chan;
3212
struct tgsi_exec_vector dst;
3213
3214
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3215
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3216
union tgsi_exec_channel src[3];
3217
3218
fetch_source(mach, &src[0], &inst->Src[0], chan,
3219
TGSI_EXEC_DATA_UINT);
3220
fetch_source(mach, &src[1], &inst->Src[1], chan,
3221
TGSI_EXEC_DATA_FLOAT);
3222
fetch_source(mach, &src[2], &inst->Src[2], chan,
3223
TGSI_EXEC_DATA_FLOAT);
3224
micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3225
}
3226
}
3227
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3228
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3229
store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3230
}
3231
}
3232
}
3233
3234
static void
3235
exec_dst(struct tgsi_exec_machine *mach,
3236
const struct tgsi_full_instruction *inst)
3237
{
3238
union tgsi_exec_channel r[2];
3239
union tgsi_exec_channel d[4];
3240
3241
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3242
fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3243
fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3244
micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3245
}
3246
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3247
fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3248
}
3249
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3250
fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3251
}
3252
3253
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3254
store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);
3255
}
3256
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3257
store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);
3258
}
3259
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3260
store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);
3261
}
3262
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3263
store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W);
3264
}
3265
}
3266
3267
static void
3268
exec_log(struct tgsi_exec_machine *mach,
3269
const struct tgsi_full_instruction *inst)
3270
{
3271
union tgsi_exec_channel r[3];
3272
3273
fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3274
micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
3275
micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
3276
micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
3277
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3278
store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
3279
}
3280
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3281
micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
3282
micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3283
store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y);
3284
}
3285
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3286
store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z);
3287
}
3288
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3289
store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3290
}
3291
}
3292
3293
static void
3294
exec_exp(struct tgsi_exec_machine *mach,
3295
const struct tgsi_full_instruction *inst)
3296
{
3297
union tgsi_exec_channel r[3];
3298
3299
fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3300
micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
3301
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3302
micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
3303
store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X);
3304
}
3305
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3306
micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3307
store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y);
3308
}
3309
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3310
micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
3311
store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z);
3312
}
3313
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3314
store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3315
}
3316
}
3317
3318
static void
3319
exec_lit(struct tgsi_exec_machine *mach,
3320
const struct tgsi_full_instruction *inst)
3321
{
3322
union tgsi_exec_channel r[3];
3323
union tgsi_exec_channel d[3];
3324
3325
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3326
fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3327
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3328
fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3329
micro_max(&r[1], &r[1], &ZeroVec);
3330
3331
fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3332
micro_min(&r[2], &r[2], &P128Vec);
3333
micro_max(&r[2], &r[2], &M128Vec);
3334
micro_pow(&r[1], &r[1], &r[2]);
3335
micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3336
store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);
3337
}
3338
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3339
micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3340
store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);
3341
}
3342
}
3343
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3344
store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);
3345
}
3346
3347
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3348
store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3349
}
3350
}
3351
3352
static void
3353
exec_break(struct tgsi_exec_machine *mach)
3354
{
3355
if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3356
/* turn off loop channels for each enabled exec channel */
3357
mach->LoopMask &= ~mach->ExecMask;
3358
/* Todo: if mach->LoopMask == 0, jump to end of loop */
3359
UPDATE_EXEC_MASK(mach);
3360
} else {
3361
assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3362
3363
mach->Switch.mask = 0x0;
3364
3365
UPDATE_EXEC_MASK(mach);
3366
}
3367
}
3368
3369
static void
3370
exec_switch(struct tgsi_exec_machine *mach,
3371
const struct tgsi_full_instruction *inst)
3372
{
3373
assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3374
assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3375
3376
mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3377
fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3378
mach->Switch.mask = 0x0;
3379
mach->Switch.defaultMask = 0x0;
3380
3381
mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3382
mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3383
3384
UPDATE_EXEC_MASK(mach);
3385
}
3386
3387
static void
3388
exec_case(struct tgsi_exec_machine *mach,
3389
const struct tgsi_full_instruction *inst)
3390
{
3391
uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3392
union tgsi_exec_channel src;
3393
uint mask = 0;
3394
3395
fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3396
3397
if (mach->Switch.selector.u[0] == src.u[0]) {
3398
mask |= 0x1;
3399
}
3400
if (mach->Switch.selector.u[1] == src.u[1]) {
3401
mask |= 0x2;
3402
}
3403
if (mach->Switch.selector.u[2] == src.u[2]) {
3404
mask |= 0x4;
3405
}
3406
if (mach->Switch.selector.u[3] == src.u[3]) {
3407
mask |= 0x8;
3408
}
3409
3410
mach->Switch.defaultMask |= mask;
3411
3412
mach->Switch.mask |= mask & prevMask;
3413
3414
UPDATE_EXEC_MASK(mach);
3415
}
3416
3417
/* FIXME: this will only work if default is last */
3418
static void
3419
exec_default(struct tgsi_exec_machine *mach)
3420
{
3421
uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3422
3423
mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3424
3425
UPDATE_EXEC_MASK(mach);
3426
}
3427
3428
static void
3429
exec_endswitch(struct tgsi_exec_machine *mach)
3430
{
3431
mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3432
mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3433
3434
UPDATE_EXEC_MASK(mach);
3435
}
3436
3437
typedef void (* micro_dop)(union tgsi_double_channel *dst,
3438
const union tgsi_double_channel *src);
3439
3440
typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3441
const union tgsi_double_channel *src0,
3442
union tgsi_exec_channel *src1);
3443
3444
typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3445
const union tgsi_exec_channel *src);
3446
3447
typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3448
const union tgsi_double_channel *src);
3449
3450
static void
3451
fetch_double_channel(struct tgsi_exec_machine *mach,
3452
union tgsi_double_channel *chan,
3453
const struct tgsi_full_src_register *reg,
3454
uint chan_0,
3455
uint chan_1)
3456
{
3457
union tgsi_exec_channel src[2];
3458
uint i;
3459
3460
fetch_source_d(mach, &src[0], reg, chan_0);
3461
fetch_source_d(mach, &src[1], reg, chan_1);
3462
3463
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3464
chan->u[i][0] = src[0].u[i];
3465
chan->u[i][1] = src[1].u[i];
3466
}
3467
assert(!reg->Register.Absolute);
3468
assert(!reg->Register.Negate);
3469
}
3470
3471
static void
3472
store_double_channel(struct tgsi_exec_machine *mach,
3473
const union tgsi_double_channel *chan,
3474
const struct tgsi_full_dst_register *reg,
3475
const struct tgsi_full_instruction *inst,
3476
uint chan_0,
3477
uint chan_1)
3478
{
3479
union tgsi_exec_channel dst[2];
3480
uint i;
3481
union tgsi_double_channel temp;
3482
const uint execmask = mach->ExecMask;
3483
3484
if (!inst->Instruction.Saturate) {
3485
for (i = 0; i < TGSI_QUAD_SIZE; i++)
3486
if (execmask & (1 << i)) {
3487
dst[0].u[i] = chan->u[i][0];
3488
dst[1].u[i] = chan->u[i][1];
3489
}
3490
}
3491
else {
3492
for (i = 0; i < TGSI_QUAD_SIZE; i++)
3493
if (execmask & (1 << i)) {
3494
if (chan->d[i] < 0.0 || isnan(chan->d[i]))
3495
temp.d[i] = 0.0;
3496
else if (chan->d[i] > 1.0)
3497
temp.d[i] = 1.0;
3498
else
3499
temp.d[i] = chan->d[i];
3500
3501
dst[0].u[i] = temp.u[i][0];
3502
dst[1].u[i] = temp.u[i][1];
3503
}
3504
}
3505
3506
store_dest_double(mach, &dst[0], reg, chan_0);
3507
if (chan_1 != (unsigned)-1)
3508
store_dest_double(mach, &dst[1], reg, chan_1);
3509
}
3510
3511
static void
3512
exec_double_unary(struct tgsi_exec_machine *mach,
3513
const struct tgsi_full_instruction *inst,
3514
micro_dop op)
3515
{
3516
union tgsi_double_channel src;
3517
union tgsi_double_channel dst;
3518
3519
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3520
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3521
op(&dst, &src);
3522
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3523
}
3524
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3525
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3526
op(&dst, &src);
3527
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3528
}
3529
}
3530
3531
static void
3532
exec_double_binary(struct tgsi_exec_machine *mach,
3533
const struct tgsi_full_instruction *inst,
3534
micro_dop op,
3535
enum tgsi_exec_datatype dst_datatype)
3536
{
3537
union tgsi_double_channel src[2];
3538
union tgsi_double_channel dst;
3539
int first_dest_chan, second_dest_chan;
3540
int wmask;
3541
3542
wmask = inst->Dst[0].Register.WriteMask;
3543
/* these are & because of the way DSLT etc store their destinations */
3544
if (wmask & TGSI_WRITEMASK_XY) {
3545
first_dest_chan = TGSI_CHAN_X;
3546
second_dest_chan = TGSI_CHAN_Y;
3547
if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3548
first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3549
second_dest_chan = -1;
3550
}
3551
3552
fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3553
fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3554
op(&dst, src);
3555
store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3556
}
3557
3558
if (wmask & TGSI_WRITEMASK_ZW) {
3559
first_dest_chan = TGSI_CHAN_Z;
3560
second_dest_chan = TGSI_CHAN_W;
3561
if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3562
first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3563
second_dest_chan = -1;
3564
}
3565
3566
fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3567
fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3568
op(&dst, src);
3569
store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3570
}
3571
}
3572
3573
static void
3574
exec_double_trinary(struct tgsi_exec_machine *mach,
3575
const struct tgsi_full_instruction *inst,
3576
micro_dop op)
3577
{
3578
union tgsi_double_channel src[3];
3579
union tgsi_double_channel dst;
3580
3581
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3582
fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3583
fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3584
fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3585
op(&dst, src);
3586
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3587
}
3588
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3589
fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3590
fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3591
fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3592
op(&dst, src);
3593
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3594
}
3595
}
3596
3597
static void
3598
exec_dldexp(struct tgsi_exec_machine *mach,
3599
const struct tgsi_full_instruction *inst)
3600
{
3601
union tgsi_double_channel src0;
3602
union tgsi_exec_channel src1;
3603
union tgsi_double_channel dst;
3604
int wmask;
3605
3606
wmask = inst->Dst[0].Register.WriteMask;
3607
if (wmask & TGSI_WRITEMASK_XY) {
3608
fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3609
fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3610
micro_dldexp(&dst, &src0, &src1);
3611
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3612
}
3613
3614
if (wmask & TGSI_WRITEMASK_ZW) {
3615
fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3616
fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3617
micro_dldexp(&dst, &src0, &src1);
3618
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3619
}
3620
}
3621
3622
static void
3623
exec_dfracexp(struct tgsi_exec_machine *mach,
3624
const struct tgsi_full_instruction *inst)
3625
{
3626
union tgsi_double_channel src;
3627
union tgsi_double_channel dst;
3628
union tgsi_exec_channel dst_exp;
3629
3630
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3631
micro_dfracexp(&dst, &dst_exp, &src);
3632
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
3633
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3634
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
3635
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3636
for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3637
if (inst->Dst[1].Register.WriteMask & (1 << chan))
3638
store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan);
3639
}
3640
}
3641
3642
static void
3643
exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3644
const struct tgsi_full_instruction *inst,
3645
micro_dop_sop op)
3646
{
3647
union tgsi_double_channel src0;
3648
union tgsi_exec_channel src1;
3649
union tgsi_double_channel dst;
3650
int wmask;
3651
3652
wmask = inst->Dst[0].Register.WriteMask;
3653
if (wmask & TGSI_WRITEMASK_XY) {
3654
fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3655
fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3656
op(&dst, &src0, &src1);
3657
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3658
}
3659
3660
if (wmask & TGSI_WRITEMASK_ZW) {
3661
fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3662
fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3663
op(&dst, &src0, &src1);
3664
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3665
}
3666
}
3667
3668
static int
3669
get_image_coord_dim(unsigned tgsi_tex)
3670
{
3671
int dim;
3672
switch (tgsi_tex) {
3673
case TGSI_TEXTURE_BUFFER:
3674
case TGSI_TEXTURE_1D:
3675
dim = 1;
3676
break;
3677
case TGSI_TEXTURE_2D:
3678
case TGSI_TEXTURE_RECT:
3679
case TGSI_TEXTURE_1D_ARRAY:
3680
case TGSI_TEXTURE_2D_MSAA:
3681
dim = 2;
3682
break;
3683
case TGSI_TEXTURE_3D:
3684
case TGSI_TEXTURE_CUBE:
3685
case TGSI_TEXTURE_2D_ARRAY:
3686
case TGSI_TEXTURE_2D_ARRAY_MSAA:
3687
case TGSI_TEXTURE_CUBE_ARRAY:
3688
dim = 3;
3689
break;
3690
default:
3691
assert(!"unknown texture target");
3692
dim = 0;
3693
break;
3694
}
3695
3696
return dim;
3697
}
3698
3699
static int
3700
get_image_coord_sample(unsigned tgsi_tex)
3701
{
3702
int sample = 0;
3703
switch (tgsi_tex) {
3704
case TGSI_TEXTURE_2D_MSAA:
3705
sample = 3;
3706
break;
3707
case TGSI_TEXTURE_2D_ARRAY_MSAA:
3708
sample = 4;
3709
break;
3710
default:
3711
break;
3712
}
3713
return sample;
3714
}
3715
3716
static void
3717
exec_load_img(struct tgsi_exec_machine *mach,
3718
const struct tgsi_full_instruction *inst)
3719
{
3720
union tgsi_exec_channel r[4], sample_r;
3721
uint unit;
3722
int sample;
3723
int i, j;
3724
int dim;
3725
uint chan;
3726
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3727
struct tgsi_image_params params;
3728
3729
unit = fetch_sampler_unit(mach, inst, 0);
3730
dim = get_image_coord_dim(inst->Memory.Texture);
3731
sample = get_image_coord_sample(inst->Memory.Texture);
3732
assert(dim <= 3);
3733
3734
params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3735
params.unit = unit;
3736
params.tgsi_tex_instr = inst->Memory.Texture;
3737
params.format = inst->Memory.Format;
3738
3739
for (i = 0; i < dim; i++) {
3740
IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3741
}
3742
3743
if (sample)
3744
IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3745
3746
mach->Image->load(mach->Image, &params,
3747
r[0].i, r[1].i, r[2].i, sample_r.i,
3748
rgba);
3749
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3750
r[0].f[j] = rgba[0][j];
3751
r[1].f[j] = rgba[1][j];
3752
r[2].f[j] = rgba[2][j];
3753
r[3].f[j] = rgba[3][j];
3754
}
3755
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3756
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3757
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
3758
}
3759
}
3760
}
3761
3762
static void
3763
exec_load_membuf(struct tgsi_exec_machine *mach,
3764
const struct tgsi_full_instruction *inst)
3765
{
3766
uint32_t unit = fetch_sampler_unit(mach, inst, 0);
3767
3768
uint32_t size;
3769
const char *ptr;
3770
switch (inst->Src[0].Register.File) {
3771
case TGSI_FILE_MEMORY:
3772
ptr = mach->LocalMem;
3773
size = mach->LocalMemSize;
3774
break;
3775
3776
case TGSI_FILE_BUFFER:
3777
ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);
3778
break;
3779
3780
case TGSI_FILE_CONSTANT:
3781
if (unit < ARRAY_SIZE(mach->Consts)) {
3782
ptr = mach->Consts[unit];
3783
size = mach->ConstsSize[unit];
3784
} else {
3785
ptr = NULL;
3786
size = 0;
3787
}
3788
break;
3789
3790
default:
3791
unreachable("unsupported TGSI_OPCODE_LOAD file");
3792
}
3793
3794
union tgsi_exec_channel offset;
3795
IFETCH(&offset, 1, TGSI_CHAN_X);
3796
3797
assert(inst->Dst[0].Register.WriteMask);
3798
uint32_t load_size = util_last_bit(inst->Dst[0].Register.WriteMask) * 4;
3799
3800
union tgsi_exec_channel rgba[TGSI_NUM_CHANNELS];
3801
memset(&rgba, 0, sizeof(rgba));
3802
for (int j = 0; j < TGSI_QUAD_SIZE; j++) {
3803
if (size >= load_size && offset.u[j] <= (size - load_size)) {
3804
for (int chan = 0; chan < load_size / 4; chan++)
3805
rgba[chan].u[j] = *(uint32_t *)(ptr + offset.u[j] + chan * 4);
3806
}
3807
}
3808
3809
for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3810
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3811
store_dest(mach, &rgba[chan], &inst->Dst[0], inst, chan);
3812
}
3813
}
3814
}
3815
3816
static void
3817
exec_load(struct tgsi_exec_machine *mach,
3818
const struct tgsi_full_instruction *inst)
3819
{
3820
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
3821
exec_load_img(mach, inst);
3822
else
3823
exec_load_membuf(mach, inst);
3824
}
3825
3826
static uint
3827
fetch_store_img_unit(struct tgsi_exec_machine *mach,
3828
const struct tgsi_full_dst_register *dst)
3829
{
3830
uint unit = 0;
3831
int i;
3832
if (dst->Register.Indirect) {
3833
union tgsi_exec_channel indir_index, index2;
3834
const uint execmask = mach->ExecMask;
3835
index2.i[0] =
3836
index2.i[1] =
3837
index2.i[2] =
3838
index2.i[3] = dst->Indirect.Index;
3839
3840
fetch_src_file_channel(mach,
3841
dst->Indirect.File,
3842
dst->Indirect.Swizzle,
3843
&index2,
3844
&ZeroVec,
3845
&indir_index);
3846
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3847
if (execmask & (1 << i)) {
3848
unit = dst->Register.Index + indir_index.i[i];
3849
break;
3850
}
3851
}
3852
} else {
3853
unit = dst->Register.Index;
3854
}
3855
return unit;
3856
}
3857
3858
static void
3859
exec_store_img(struct tgsi_exec_machine *mach,
3860
const struct tgsi_full_instruction *inst)
3861
{
3862
union tgsi_exec_channel r[3], sample_r;
3863
union tgsi_exec_channel value[4];
3864
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3865
struct tgsi_image_params params;
3866
int dim;
3867
int sample;
3868
int i, j;
3869
uint unit;
3870
unit = fetch_store_img_unit(mach, &inst->Dst[0]);
3871
dim = get_image_coord_dim(inst->Memory.Texture);
3872
sample = get_image_coord_sample(inst->Memory.Texture);
3873
assert(dim <= 3);
3874
3875
params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3876
params.unit = unit;
3877
params.tgsi_tex_instr = inst->Memory.Texture;
3878
params.format = inst->Memory.Format;
3879
3880
for (i = 0; i < dim; i++) {
3881
IFETCH(&r[i], 0, TGSI_CHAN_X + i);
3882
}
3883
3884
for (i = 0; i < 4; i++) {
3885
FETCH(&value[i], 1, TGSI_CHAN_X + i);
3886
}
3887
if (sample)
3888
IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
3889
3890
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3891
rgba[0][j] = value[0].f[j];
3892
rgba[1][j] = value[1].f[j];
3893
rgba[2][j] = value[2].f[j];
3894
rgba[3][j] = value[3].f[j];
3895
}
3896
3897
mach->Image->store(mach->Image, &params,
3898
r[0].i, r[1].i, r[2].i, sample_r.i,
3899
rgba);
3900
}
3901
3902
static void
3903
exec_store_buf(struct tgsi_exec_machine *mach,
3904
const struct tgsi_full_instruction *inst)
3905
{
3906
uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]);
3907
uint32_t size;
3908
char *ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);
3909
3910
int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3911
3912
union tgsi_exec_channel offset;
3913
IFETCH(&offset, 0, TGSI_CHAN_X);
3914
3915
union tgsi_exec_channel value[4];
3916
for (int i = 0; i < 4; i++)
3917
FETCH(&value[i], 1, TGSI_CHAN_X + i);
3918
3919
for (int j = 0; j < TGSI_QUAD_SIZE; j++) {
3920
if (!(execmask & (1 << j)))
3921
continue;
3922
if (size < offset.u[j])
3923
continue;
3924
3925
uint32_t *invocation_ptr = (uint32_t *)(ptr + offset.u[j]);
3926
uint32_t size_avail = size - offset.u[j];
3927
3928
for (int chan = 0; chan < MIN2(4, size_avail / 4); chan++) {
3929
if (inst->Dst[0].Register.WriteMask & (1 << chan))
3930
memcpy(&invocation_ptr[chan], &value[chan].u[j], 4);
3931
}
3932
}
3933
}
3934
3935
static void
3936
exec_store_mem(struct tgsi_exec_machine *mach,
3937
const struct tgsi_full_instruction *inst)
3938
{
3939
union tgsi_exec_channel r[3];
3940
union tgsi_exec_channel value[4];
3941
uint i, chan;
3942
char *ptr = mach->LocalMem;
3943
int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3944
3945
IFETCH(&r[0], 0, TGSI_CHAN_X);
3946
3947
for (i = 0; i < 4; i++) {
3948
FETCH(&value[i], 1, TGSI_CHAN_X + i);
3949
}
3950
3951
if (r[0].u[0] >= mach->LocalMemSize)
3952
return;
3953
ptr += r[0].u[0];
3954
3955
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3956
if (execmask & (1 << i)) {
3957
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3958
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3959
memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
3960
}
3961
}
3962
}
3963
}
3964
}
3965
3966
static void
3967
exec_store(struct tgsi_exec_machine *mach,
3968
const struct tgsi_full_instruction *inst)
3969
{
3970
if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
3971
exec_store_img(mach, inst);
3972
else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
3973
exec_store_buf(mach, inst);
3974
else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
3975
exec_store_mem(mach, inst);
3976
}
3977
3978
static void
3979
exec_atomop_img(struct tgsi_exec_machine *mach,
3980
const struct tgsi_full_instruction *inst)
3981
{
3982
union tgsi_exec_channel r[4], sample_r;
3983
union tgsi_exec_channel value[4], value2[4];
3984
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3985
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3986
struct tgsi_image_params params;
3987
int dim;
3988
int sample;
3989
int i, j;
3990
uint unit, chan;
3991
unit = fetch_sampler_unit(mach, inst, 0);
3992
dim = get_image_coord_dim(inst->Memory.Texture);
3993
sample = get_image_coord_sample(inst->Memory.Texture);
3994
assert(dim <= 3);
3995
3996
params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3997
params.unit = unit;
3998
params.tgsi_tex_instr = inst->Memory.Texture;
3999
params.format = inst->Memory.Format;
4000
4001
for (i = 0; i < dim; i++) {
4002
IFETCH(&r[i], 1, TGSI_CHAN_X + i);
4003
}
4004
4005
for (i = 0; i < 4; i++) {
4006
FETCH(&value[i], 2, TGSI_CHAN_X + i);
4007
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4008
FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4009
}
4010
if (sample)
4011
IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
4012
4013
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4014
rgba[0][j] = value[0].f[j];
4015
rgba[1][j] = value[1].f[j];
4016
rgba[2][j] = value[2].f[j];
4017
rgba[3][j] = value[3].f[j];
4018
}
4019
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4020
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4021
rgba2[0][j] = value2[0].f[j];
4022
rgba2[1][j] = value2[1].f[j];
4023
rgba2[2][j] = value2[2].f[j];
4024
rgba2[3][j] = value2[3].f[j];
4025
}
4026
}
4027
4028
mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
4029
r[0].i, r[1].i, r[2].i, sample_r.i,
4030
rgba, rgba2);
4031
4032
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4033
r[0].f[j] = rgba[0][j];
4034
r[1].f[j] = rgba[1][j];
4035
r[2].f[j] = rgba[2][j];
4036
r[3].f[j] = rgba[3][j];
4037
}
4038
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4039
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4040
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
4041
}
4042
}
4043
}
4044
4045
static void
4046
exec_atomop_membuf(struct tgsi_exec_machine *mach,
4047
const struct tgsi_full_instruction *inst)
4048
{
4049
union tgsi_exec_channel offset, r0, r1;
4050
uint chan, i;
4051
int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
4052
IFETCH(&offset, 1, TGSI_CHAN_X);
4053
4054
if (!(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X))
4055
return;
4056
4057
void *ptr[TGSI_QUAD_SIZE];
4058
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
4059
uint32_t unit = fetch_sampler_unit(mach, inst, 0);
4060
uint32_t size;
4061
char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size);
4062
for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
4063
if (likely(size >= 4 && offset.u[i] <= size - 4))
4064
ptr[i] = buffer + offset.u[i];
4065
else
4066
ptr[i] = NULL;
4067
}
4068
} else {
4069
assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY);
4070
4071
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4072
if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4))
4073
ptr[i] = (char *)mach->LocalMem + offset.u[i];
4074
else
4075
ptr[i] = NULL;
4076
}
4077
}
4078
4079
FETCH(&r0, 2, TGSI_CHAN_X);
4080
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4081
FETCH(&r1, 3, TGSI_CHAN_X);
4082
4083
/* The load/op/store sequence has to happen inside the loop since ptr
4084
* may have the same ptr in some of the invocations.
4085
*/
4086
for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
4087
if (!(execmask & (1 << i)))
4088
continue;
4089
4090
uint32_t val = 0;
4091
if (ptr[i]) {
4092
memcpy(&val, ptr[i], sizeof(val));
4093
4094
uint32_t result;
4095
switch (inst->Instruction.Opcode) {
4096
case TGSI_OPCODE_ATOMUADD:
4097
result = val + r0.u[i];
4098
break;
4099
case TGSI_OPCODE_ATOMXOR:
4100
result = val ^ r0.u[i];
4101
break;
4102
case TGSI_OPCODE_ATOMOR:
4103
result = val | r0.u[i];
4104
break;
4105
case TGSI_OPCODE_ATOMAND:
4106
result = val & r0.u[i];
4107
break;
4108
case TGSI_OPCODE_ATOMUMIN:
4109
result = MIN2(val, r0.u[i]);
4110
break;
4111
case TGSI_OPCODE_ATOMUMAX:
4112
result = MAX2(val, r0.u[i]);
4113
break;
4114
case TGSI_OPCODE_ATOMIMIN:
4115
result = MIN2((int32_t)val, r0.i[i]);
4116
break;
4117
case TGSI_OPCODE_ATOMIMAX:
4118
result = MAX2((int32_t)val, r0.i[i]);
4119
break;
4120
case TGSI_OPCODE_ATOMXCHG:
4121
result = r0.u[i];
4122
break;
4123
case TGSI_OPCODE_ATOMCAS:
4124
if (val == r0.u[i])
4125
result = r1.u[i];
4126
else
4127
result = val;
4128
break;
4129
case TGSI_OPCODE_ATOMFADD:
4130
result = fui(uif(val) + r0.f[i]);
4131
break;
4132
default:
4133
unreachable("bad atomic op");
4134
}
4135
memcpy(ptr[i], &result, sizeof(result));
4136
}
4137
4138
r0.u[i] = val;
4139
}
4140
4141
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
4142
store_dest(mach, &r0, &inst->Dst[0], inst, chan);
4143
}
4144
4145
static void
4146
exec_atomop(struct tgsi_exec_machine *mach,
4147
const struct tgsi_full_instruction *inst)
4148
{
4149
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4150
exec_atomop_img(mach, inst);
4151
else
4152
exec_atomop_membuf(mach, inst);
4153
}
4154
4155
static void
4156
exec_resq_img(struct tgsi_exec_machine *mach,
4157
const struct tgsi_full_instruction *inst)
4158
{
4159
int result[4];
4160
union tgsi_exec_channel r[4];
4161
uint unit;
4162
int i, chan, j;
4163
struct tgsi_image_params params;
4164
4165
unit = fetch_sampler_unit(mach, inst, 0);
4166
4167
params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
4168
params.unit = unit;
4169
params.tgsi_tex_instr = inst->Memory.Texture;
4170
params.format = inst->Memory.Format;
4171
4172
mach->Image->get_dims(mach->Image, &params, result);
4173
4174
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4175
for (j = 0; j < 4; j++) {
4176
r[j].i[i] = result[j];
4177
}
4178
}
4179
4180
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4181
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4182
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
4183
}
4184
}
4185
}
4186
4187
static void
4188
exec_resq_buf(struct tgsi_exec_machine *mach,
4189
const struct tgsi_full_instruction *inst)
4190
{
4191
uint32_t unit = fetch_sampler_unit(mach, inst, 0);
4192
uint32_t size;
4193
(void)mach->Buffer->lookup(mach->Buffer, unit, &size);
4194
4195
union tgsi_exec_channel r;
4196
for (int i = 0; i < TGSI_QUAD_SIZE; i++)
4197
r.i[i] = size;
4198
4199
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
4200
for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4201
store_dest(mach, &r, &inst->Dst[0], inst, TGSI_CHAN_X);
4202
}
4203
}
4204
}
4205
4206
static void
4207
exec_resq(struct tgsi_exec_machine *mach,
4208
const struct tgsi_full_instruction *inst)
4209
{
4210
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4211
exec_resq_img(mach, inst);
4212
else
4213
exec_resq_buf(mach, inst);
4214
}
4215
4216
static void
4217
micro_f2u64(union tgsi_double_channel *dst,
4218
const union tgsi_exec_channel *src)
4219
{
4220
dst->u64[0] = (uint64_t)src->f[0];
4221
dst->u64[1] = (uint64_t)src->f[1];
4222
dst->u64[2] = (uint64_t)src->f[2];
4223
dst->u64[3] = (uint64_t)src->f[3];
4224
}
4225
4226
static void
4227
micro_f2i64(union tgsi_double_channel *dst,
4228
const union tgsi_exec_channel *src)
4229
{
4230
dst->i64[0] = (int64_t)src->f[0];
4231
dst->i64[1] = (int64_t)src->f[1];
4232
dst->i64[2] = (int64_t)src->f[2];
4233
dst->i64[3] = (int64_t)src->f[3];
4234
}
4235
4236
static void
4237
micro_u2i64(union tgsi_double_channel *dst,
4238
const union tgsi_exec_channel *src)
4239
{
4240
dst->u64[0] = (uint64_t)src->u[0];
4241
dst->u64[1] = (uint64_t)src->u[1];
4242
dst->u64[2] = (uint64_t)src->u[2];
4243
dst->u64[3] = (uint64_t)src->u[3];
4244
}
4245
4246
static void
4247
micro_i2i64(union tgsi_double_channel *dst,
4248
const union tgsi_exec_channel *src)
4249
{
4250
dst->i64[0] = (int64_t)src->i[0];
4251
dst->i64[1] = (int64_t)src->i[1];
4252
dst->i64[2] = (int64_t)src->i[2];
4253
dst->i64[3] = (int64_t)src->i[3];
4254
}
4255
4256
static void
4257
micro_d2u64(union tgsi_double_channel *dst,
4258
const union tgsi_double_channel *src)
4259
{
4260
dst->u64[0] = (uint64_t)src->d[0];
4261
dst->u64[1] = (uint64_t)src->d[1];
4262
dst->u64[2] = (uint64_t)src->d[2];
4263
dst->u64[3] = (uint64_t)src->d[3];
4264
}
4265
4266
static void
4267
micro_d2i64(union tgsi_double_channel *dst,
4268
const union tgsi_double_channel *src)
4269
{
4270
dst->i64[0] = (int64_t)src->d[0];
4271
dst->i64[1] = (int64_t)src->d[1];
4272
dst->i64[2] = (int64_t)src->d[2];
4273
dst->i64[3] = (int64_t)src->d[3];
4274
}
4275
4276
static void
4277
micro_u642d(union tgsi_double_channel *dst,
4278
const union tgsi_double_channel *src)
4279
{
4280
dst->d[0] = (double)src->u64[0];
4281
dst->d[1] = (double)src->u64[1];
4282
dst->d[2] = (double)src->u64[2];
4283
dst->d[3] = (double)src->u64[3];
4284
}
4285
4286
static void
4287
micro_i642d(union tgsi_double_channel *dst,
4288
const union tgsi_double_channel *src)
4289
{
4290
dst->d[0] = (double)src->i64[0];
4291
dst->d[1] = (double)src->i64[1];
4292
dst->d[2] = (double)src->i64[2];
4293
dst->d[3] = (double)src->i64[3];
4294
}
4295
4296
static void
4297
micro_u642f(union tgsi_exec_channel *dst,
4298
const union tgsi_double_channel *src)
4299
{
4300
dst->f[0] = (float)src->u64[0];
4301
dst->f[1] = (float)src->u64[1];
4302
dst->f[2] = (float)src->u64[2];
4303
dst->f[3] = (float)src->u64[3];
4304
}
4305
4306
static void
4307
micro_i642f(union tgsi_exec_channel *dst,
4308
const union tgsi_double_channel *src)
4309
{
4310
dst->f[0] = (float)src->i64[0];
4311
dst->f[1] = (float)src->i64[1];
4312
dst->f[2] = (float)src->i64[2];
4313
dst->f[3] = (float)src->i64[3];
4314
}
4315
4316
static void
4317
exec_t_2_64(struct tgsi_exec_machine *mach,
4318
const struct tgsi_full_instruction *inst,
4319
micro_dop_s op,
4320
enum tgsi_exec_datatype src_datatype)
4321
{
4322
union tgsi_exec_channel src;
4323
union tgsi_double_channel dst;
4324
4325
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4326
fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4327
op(&dst, &src);
4328
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4329
}
4330
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4331
fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4332
op(&dst, &src);
4333
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4334
}
4335
}
4336
4337
static void
4338
exec_64_2_t(struct tgsi_exec_machine *mach,
4339
const struct tgsi_full_instruction *inst,
4340
micro_sop_d op)
4341
{
4342
union tgsi_double_channel src;
4343
union tgsi_exec_channel dst;
4344
int wm = inst->Dst[0].Register.WriteMask;
4345
int i;
4346
int bit;
4347
for (i = 0; i < 2; i++) {
4348
bit = ffs(wm);
4349
if (bit) {
4350
wm &= ~(1 << (bit - 1));
4351
if (i == 0)
4352
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4353
else
4354
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4355
op(&dst, &src);
4356
store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1);
4357
}
4358
}
4359
}
4360
4361
static void
4362
micro_i2f(union tgsi_exec_channel *dst,
4363
const union tgsi_exec_channel *src)
4364
{
4365
dst->f[0] = (float)src->i[0];
4366
dst->f[1] = (float)src->i[1];
4367
dst->f[2] = (float)src->i[2];
4368
dst->f[3] = (float)src->i[3];
4369
}
4370
4371
static void
4372
micro_not(union tgsi_exec_channel *dst,
4373
const union tgsi_exec_channel *src)
4374
{
4375
dst->u[0] = ~src->u[0];
4376
dst->u[1] = ~src->u[1];
4377
dst->u[2] = ~src->u[2];
4378
dst->u[3] = ~src->u[3];
4379
}
4380
4381
static void
4382
micro_shl(union tgsi_exec_channel *dst,
4383
const union tgsi_exec_channel *src0,
4384
const union tgsi_exec_channel *src1)
4385
{
4386
unsigned masked_count;
4387
masked_count = src1->u[0] & 0x1f;
4388
dst->u[0] = src0->u[0] << masked_count;
4389
masked_count = src1->u[1] & 0x1f;
4390
dst->u[1] = src0->u[1] << masked_count;
4391
masked_count = src1->u[2] & 0x1f;
4392
dst->u[2] = src0->u[2] << masked_count;
4393
masked_count = src1->u[3] & 0x1f;
4394
dst->u[3] = src0->u[3] << masked_count;
4395
}
4396
4397
static void
4398
micro_and(union tgsi_exec_channel *dst,
4399
const union tgsi_exec_channel *src0,
4400
const union tgsi_exec_channel *src1)
4401
{
4402
dst->u[0] = src0->u[0] & src1->u[0];
4403
dst->u[1] = src0->u[1] & src1->u[1];
4404
dst->u[2] = src0->u[2] & src1->u[2];
4405
dst->u[3] = src0->u[3] & src1->u[3];
4406
}
4407
4408
static void
4409
micro_or(union tgsi_exec_channel *dst,
4410
const union tgsi_exec_channel *src0,
4411
const union tgsi_exec_channel *src1)
4412
{
4413
dst->u[0] = src0->u[0] | src1->u[0];
4414
dst->u[1] = src0->u[1] | src1->u[1];
4415
dst->u[2] = src0->u[2] | src1->u[2];
4416
dst->u[3] = src0->u[3] | src1->u[3];
4417
}
4418
4419
static void
4420
micro_xor(union tgsi_exec_channel *dst,
4421
const union tgsi_exec_channel *src0,
4422
const union tgsi_exec_channel *src1)
4423
{
4424
dst->u[0] = src0->u[0] ^ src1->u[0];
4425
dst->u[1] = src0->u[1] ^ src1->u[1];
4426
dst->u[2] = src0->u[2] ^ src1->u[2];
4427
dst->u[3] = src0->u[3] ^ src1->u[3];
4428
}
4429
4430
static void
4431
micro_mod(union tgsi_exec_channel *dst,
4432
const union tgsi_exec_channel *src0,
4433
const union tgsi_exec_channel *src1)
4434
{
4435
dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
4436
dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
4437
dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
4438
dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
4439
}
4440
4441
static void
4442
micro_f2i(union tgsi_exec_channel *dst,
4443
const union tgsi_exec_channel *src)
4444
{
4445
dst->i[0] = (int)src->f[0];
4446
dst->i[1] = (int)src->f[1];
4447
dst->i[2] = (int)src->f[2];
4448
dst->i[3] = (int)src->f[3];
4449
}
4450
4451
static void
4452
micro_fseq(union tgsi_exec_channel *dst,
4453
const union tgsi_exec_channel *src0,
4454
const union tgsi_exec_channel *src1)
4455
{
4456
dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4457
dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4458
dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4459
dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4460
}
4461
4462
static void
4463
micro_fsge(union tgsi_exec_channel *dst,
4464
const union tgsi_exec_channel *src0,
4465
const union tgsi_exec_channel *src1)
4466
{
4467
dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4468
dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4469
dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4470
dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4471
}
4472
4473
static void
4474
micro_fslt(union tgsi_exec_channel *dst,
4475
const union tgsi_exec_channel *src0,
4476
const union tgsi_exec_channel *src1)
4477
{
4478
dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4479
dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4480
dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4481
dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4482
}
4483
4484
static void
4485
micro_fsne(union tgsi_exec_channel *dst,
4486
const union tgsi_exec_channel *src0,
4487
const union tgsi_exec_channel *src1)
4488
{
4489
dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4490
dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4491
dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4492
dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4493
}
4494
4495
static void
4496
micro_idiv(union tgsi_exec_channel *dst,
4497
const union tgsi_exec_channel *src0,
4498
const union tgsi_exec_channel *src1)
4499
{
4500
dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4501
dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4502
dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4503
dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4504
}
4505
4506
static void
4507
micro_imax(union tgsi_exec_channel *dst,
4508
const union tgsi_exec_channel *src0,
4509
const union tgsi_exec_channel *src1)
4510
{
4511
dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4512
dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4513
dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4514
dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4515
}
4516
4517
static void
4518
micro_imin(union tgsi_exec_channel *dst,
4519
const union tgsi_exec_channel *src0,
4520
const union tgsi_exec_channel *src1)
4521
{
4522
dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4523
dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4524
dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4525
dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4526
}
4527
4528
static void
4529
micro_isge(union tgsi_exec_channel *dst,
4530
const union tgsi_exec_channel *src0,
4531
const union tgsi_exec_channel *src1)
4532
{
4533
dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4534
dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4535
dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4536
dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4537
}
4538
4539
static void
4540
micro_ishr(union tgsi_exec_channel *dst,
4541
const union tgsi_exec_channel *src0,
4542
const union tgsi_exec_channel *src1)
4543
{
4544
unsigned masked_count;
4545
masked_count = src1->i[0] & 0x1f;
4546
dst->i[0] = src0->i[0] >> masked_count;
4547
masked_count = src1->i[1] & 0x1f;
4548
dst->i[1] = src0->i[1] >> masked_count;
4549
masked_count = src1->i[2] & 0x1f;
4550
dst->i[2] = src0->i[2] >> masked_count;
4551
masked_count = src1->i[3] & 0x1f;
4552
dst->i[3] = src0->i[3] >> masked_count;
4553
}
4554
4555
static void
4556
micro_islt(union tgsi_exec_channel *dst,
4557
const union tgsi_exec_channel *src0,
4558
const union tgsi_exec_channel *src1)
4559
{
4560
dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4561
dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4562
dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4563
dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4564
}
4565
4566
static void
4567
micro_f2u(union tgsi_exec_channel *dst,
4568
const union tgsi_exec_channel *src)
4569
{
4570
dst->u[0] = (uint)src->f[0];
4571
dst->u[1] = (uint)src->f[1];
4572
dst->u[2] = (uint)src->f[2];
4573
dst->u[3] = (uint)src->f[3];
4574
}
4575
4576
static void
4577
micro_u2f(union tgsi_exec_channel *dst,
4578
const union tgsi_exec_channel *src)
4579
{
4580
dst->f[0] = (float)src->u[0];
4581
dst->f[1] = (float)src->u[1];
4582
dst->f[2] = (float)src->u[2];
4583
dst->f[3] = (float)src->u[3];
4584
}
4585
4586
static void
4587
micro_uadd(union tgsi_exec_channel *dst,
4588
const union tgsi_exec_channel *src0,
4589
const union tgsi_exec_channel *src1)
4590
{
4591
dst->u[0] = src0->u[0] + src1->u[0];
4592
dst->u[1] = src0->u[1] + src1->u[1];
4593
dst->u[2] = src0->u[2] + src1->u[2];
4594
dst->u[3] = src0->u[3] + src1->u[3];
4595
}
4596
4597
static void
4598
micro_udiv(union tgsi_exec_channel *dst,
4599
const union tgsi_exec_channel *src0,
4600
const union tgsi_exec_channel *src1)
4601
{
4602
dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4603
dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4604
dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4605
dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4606
}
4607
4608
static void
4609
micro_umad(union tgsi_exec_channel *dst,
4610
const union tgsi_exec_channel *src0,
4611
const union tgsi_exec_channel *src1,
4612
const union tgsi_exec_channel *src2)
4613
{
4614
dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4615
dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4616
dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4617
dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4618
}
4619
4620
static void
4621
micro_umax(union tgsi_exec_channel *dst,
4622
const union tgsi_exec_channel *src0,
4623
const union tgsi_exec_channel *src1)
4624
{
4625
dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4626
dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4627
dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4628
dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4629
}
4630
4631
static void
4632
micro_umin(union tgsi_exec_channel *dst,
4633
const union tgsi_exec_channel *src0,
4634
const union tgsi_exec_channel *src1)
4635
{
4636
dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4637
dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4638
dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4639
dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4640
}
4641
4642
static void
4643
micro_umod(union tgsi_exec_channel *dst,
4644
const union tgsi_exec_channel *src0,
4645
const union tgsi_exec_channel *src1)
4646
{
4647
dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4648
dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4649
dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4650
dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4651
}
4652
4653
static void
4654
micro_umul(union tgsi_exec_channel *dst,
4655
const union tgsi_exec_channel *src0,
4656
const union tgsi_exec_channel *src1)
4657
{
4658
dst->u[0] = src0->u[0] * src1->u[0];
4659
dst->u[1] = src0->u[1] * src1->u[1];
4660
dst->u[2] = src0->u[2] * src1->u[2];
4661
dst->u[3] = src0->u[3] * src1->u[3];
4662
}
4663
4664
static void
4665
micro_imul_hi(union tgsi_exec_channel *dst,
4666
const union tgsi_exec_channel *src0,
4667
const union tgsi_exec_channel *src1)
4668
{
4669
#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4670
dst->i[0] = I64M(src0->i[0], src1->i[0]);
4671
dst->i[1] = I64M(src0->i[1], src1->i[1]);
4672
dst->i[2] = I64M(src0->i[2], src1->i[2]);
4673
dst->i[3] = I64M(src0->i[3], src1->i[3]);
4674
#undef I64M
4675
}
4676
4677
static void
4678
micro_umul_hi(union tgsi_exec_channel *dst,
4679
const union tgsi_exec_channel *src0,
4680
const union tgsi_exec_channel *src1)
4681
{
4682
#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4683
dst->u[0] = U64M(src0->u[0], src1->u[0]);
4684
dst->u[1] = U64M(src0->u[1], src1->u[1]);
4685
dst->u[2] = U64M(src0->u[2], src1->u[2]);
4686
dst->u[3] = U64M(src0->u[3], src1->u[3]);
4687
#undef U64M
4688
}
4689
4690
static void
4691
micro_useq(union tgsi_exec_channel *dst,
4692
const union tgsi_exec_channel *src0,
4693
const union tgsi_exec_channel *src1)
4694
{
4695
dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4696
dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4697
dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4698
dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4699
}
4700
4701
static void
4702
micro_usge(union tgsi_exec_channel *dst,
4703
const union tgsi_exec_channel *src0,
4704
const union tgsi_exec_channel *src1)
4705
{
4706
dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4707
dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4708
dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4709
dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4710
}
4711
4712
static void
4713
micro_ushr(union tgsi_exec_channel *dst,
4714
const union tgsi_exec_channel *src0,
4715
const union tgsi_exec_channel *src1)
4716
{
4717
unsigned masked_count;
4718
masked_count = src1->u[0] & 0x1f;
4719
dst->u[0] = src0->u[0] >> masked_count;
4720
masked_count = src1->u[1] & 0x1f;
4721
dst->u[1] = src0->u[1] >> masked_count;
4722
masked_count = src1->u[2] & 0x1f;
4723
dst->u[2] = src0->u[2] >> masked_count;
4724
masked_count = src1->u[3] & 0x1f;
4725
dst->u[3] = src0->u[3] >> masked_count;
4726
}
4727
4728
static void
4729
micro_uslt(union tgsi_exec_channel *dst,
4730
const union tgsi_exec_channel *src0,
4731
const union tgsi_exec_channel *src1)
4732
{
4733
dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4734
dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4735
dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4736
dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4737
}
4738
4739
static void
4740
micro_usne(union tgsi_exec_channel *dst,
4741
const union tgsi_exec_channel *src0,
4742
const union tgsi_exec_channel *src1)
4743
{
4744
dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4745
dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4746
dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4747
dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
4748
}
4749
4750
static void
4751
micro_uarl(union tgsi_exec_channel *dst,
4752
const union tgsi_exec_channel *src)
4753
{
4754
dst->i[0] = src->u[0];
4755
dst->i[1] = src->u[1];
4756
dst->i[2] = src->u[2];
4757
dst->i[3] = src->u[3];
4758
}
4759
4760
/**
4761
* Signed bitfield extract (i.e. sign-extend the extracted bits)
4762
*/
4763
static void
4764
micro_ibfe(union tgsi_exec_channel *dst,
4765
const union tgsi_exec_channel *src0,
4766
const union tgsi_exec_channel *src1,
4767
const union tgsi_exec_channel *src2)
4768
{
4769
int i;
4770
for (i = 0; i < 4; i++) {
4771
int width = src2->i[i];
4772
int offset = src1->i[i] & 0x1f;
4773
if (width == 32 && offset == 0) {
4774
dst->i[i] = src0->i[i];
4775
continue;
4776
}
4777
width &= 0x1f;
4778
if (width == 0)
4779
dst->i[i] = 0;
4780
else if (width + offset < 32)
4781
dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
4782
else
4783
dst->i[i] = src0->i[i] >> offset;
4784
}
4785
}
4786
4787
/**
4788
* Unsigned bitfield extract
4789
*/
4790
static void
4791
micro_ubfe(union tgsi_exec_channel *dst,
4792
const union tgsi_exec_channel *src0,
4793
const union tgsi_exec_channel *src1,
4794
const union tgsi_exec_channel *src2)
4795
{
4796
int i;
4797
for (i = 0; i < 4; i++) {
4798
int width = src2->u[i];
4799
int offset = src1->u[i] & 0x1f;
4800
if (width == 32 && offset == 0) {
4801
dst->u[i] = src0->u[i];
4802
continue;
4803
}
4804
width &= 0x1f;
4805
if (width == 0)
4806
dst->u[i] = 0;
4807
else if (width + offset < 32)
4808
dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
4809
else
4810
dst->u[i] = src0->u[i] >> offset;
4811
}
4812
}
4813
4814
/**
4815
* Bitfield insert: copy low bits from src1 into a region of src0.
4816
*/
4817
static void
4818
micro_bfi(union tgsi_exec_channel *dst,
4819
const union tgsi_exec_channel *src0,
4820
const union tgsi_exec_channel *src1,
4821
const union tgsi_exec_channel *src2,
4822
const union tgsi_exec_channel *src3)
4823
{
4824
int i;
4825
for (i = 0; i < 4; i++) {
4826
int width = src3->u[i];
4827
int offset = src2->u[i] & 0x1f;
4828
if (width == 32) {
4829
dst->u[i] = src1->u[i];
4830
} else {
4831
int bitmask = ((1 << width) - 1) << offset;
4832
dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
4833
}
4834
}
4835
}
4836
4837
static void
4838
micro_brev(union tgsi_exec_channel *dst,
4839
const union tgsi_exec_channel *src)
4840
{
4841
dst->u[0] = util_bitreverse(src->u[0]);
4842
dst->u[1] = util_bitreverse(src->u[1]);
4843
dst->u[2] = util_bitreverse(src->u[2]);
4844
dst->u[3] = util_bitreverse(src->u[3]);
4845
}
4846
4847
static void
4848
micro_popc(union tgsi_exec_channel *dst,
4849
const union tgsi_exec_channel *src)
4850
{
4851
dst->u[0] = util_bitcount(src->u[0]);
4852
dst->u[1] = util_bitcount(src->u[1]);
4853
dst->u[2] = util_bitcount(src->u[2]);
4854
dst->u[3] = util_bitcount(src->u[3]);
4855
}
4856
4857
static void
4858
micro_lsb(union tgsi_exec_channel *dst,
4859
const union tgsi_exec_channel *src)
4860
{
4861
dst->i[0] = ffs(src->u[0]) - 1;
4862
dst->i[1] = ffs(src->u[1]) - 1;
4863
dst->i[2] = ffs(src->u[2]) - 1;
4864
dst->i[3] = ffs(src->u[3]) - 1;
4865
}
4866
4867
static void
4868
micro_imsb(union tgsi_exec_channel *dst,
4869
const union tgsi_exec_channel *src)
4870
{
4871
dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
4872
dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
4873
dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
4874
dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
4875
}
4876
4877
static void
4878
micro_umsb(union tgsi_exec_channel *dst,
4879
const union tgsi_exec_channel *src)
4880
{
4881
dst->i[0] = util_last_bit(src->u[0]) - 1;
4882
dst->i[1] = util_last_bit(src->u[1]) - 1;
4883
dst->i[2] = util_last_bit(src->u[2]) - 1;
4884
dst->i[3] = util_last_bit(src->u[3]) - 1;
4885
}
4886
4887
4888
static void
4889
exec_interp_at_sample(struct tgsi_exec_machine *mach,
4890
const struct tgsi_full_instruction *inst)
4891
{
4892
union tgsi_exec_channel index;
4893
union tgsi_exec_channel index2D;
4894
union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
4895
const struct tgsi_full_src_register *reg = &inst->Src[0];
4896
4897
assert(reg->Register.File == TGSI_FILE_INPUT);
4898
assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);
4899
4900
get_index_registers(mach, reg, &index, &index2D);
4901
float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];
4902
4903
/* Short cut: sample 0 is like a normal fetch */
4904
for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4905
if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4906
continue;
4907
4908
fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
4909
&result[chan]);
4910
if (sample != 0.0f) {
4911
4912
/* TODO: define the samples > 0, but so far we only do fake MSAA */
4913
float x = 0;
4914
float y = 0;
4915
4916
unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];
4917
assert(pos >= 0);
4918
assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
4919
mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);
4920
}
4921
store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);
4922
}
4923
}
4924
4925
4926
static void
4927
exec_interp_at_offset(struct tgsi_exec_machine *mach,
4928
const struct tgsi_full_instruction *inst)
4929
{
4930
union tgsi_exec_channel index;
4931
union tgsi_exec_channel index2D;
4932
union tgsi_exec_channel ofsx;
4933
union tgsi_exec_channel ofsy;
4934
const struct tgsi_full_src_register *reg = &inst->Src[0];
4935
4936
assert(reg->Register.File == TGSI_FILE_INPUT);
4937
4938
get_index_registers(mach, reg, &index, &index2D);
4939
unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];
4940
4941
fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
4942
fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
4943
4944
for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4945
if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4946
continue;
4947
union tgsi_exec_channel result;
4948
fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);
4949
mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);
4950
store_dest(mach, &result, &inst->Dst[0], inst, chan);
4951
}
4952
}
4953
4954
4955
static void
4956
exec_interp_at_centroid(struct tgsi_exec_machine *mach,
4957
const struct tgsi_full_instruction *inst)
4958
{
4959
union tgsi_exec_channel index;
4960
union tgsi_exec_channel index2D;
4961
union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
4962
const struct tgsi_full_src_register *reg = &inst->Src[0];
4963
4964
assert(reg->Register.File == TGSI_FILE_INPUT);
4965
get_index_registers(mach, reg, &index, &index2D);
4966
4967
for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4968
if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4969
continue;
4970
4971
/* Here we should add the change to use a sample that lies within the
4972
* primitive (Section 15.2):
4973
*
4974
* "When interpolating variables declared using centroid in ,
4975
* the variable is sampled at a location within the pixel covered
4976
* by the primitive generating the fragment.
4977
* ...
4978
* The built-in functions interpolateAtCentroid ... will sample
4979
* variables as though they were declared with the centroid ...
4980
* qualifier[s]."
4981
*
4982
* Since we only support 1 sample currently, this is just a pass-through.
4983
*/
4984
fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
4985
&result[chan]);
4986
store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);
4987
}
4988
4989
}
4990
4991
4992
/**
4993
* Execute a TGSI instruction.
4994
* Returns TRUE if a barrier instruction is hit,
4995
* otherwise FALSE.
4996
*/
4997
static boolean
4998
exec_instruction(
4999
struct tgsi_exec_machine *mach,
5000
const struct tgsi_full_instruction *inst,
5001
int *pc )
5002
{
5003
union tgsi_exec_channel r[10];
5004
5005
(*pc)++;
5006
5007
switch (inst->Instruction.Opcode) {
5008
case TGSI_OPCODE_ARL:
5009
exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_FLOAT);
5010
break;
5011
5012
case TGSI_OPCODE_MOV:
5013
exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_FLOAT);
5014
break;
5015
5016
case TGSI_OPCODE_LIT:
5017
exec_lit(mach, inst);
5018
break;
5019
5020
case TGSI_OPCODE_RCP:
5021
exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT);
5022
break;
5023
5024
case TGSI_OPCODE_RSQ:
5025
exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT);
5026
break;
5027
5028
case TGSI_OPCODE_EXP:
5029
exec_exp(mach, inst);
5030
break;
5031
5032
case TGSI_OPCODE_LOG:
5033
exec_log(mach, inst);
5034
break;
5035
5036
case TGSI_OPCODE_MUL:
5037
exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT);
5038
break;
5039
5040
case TGSI_OPCODE_ADD:
5041
exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT);
5042
break;
5043
5044
case TGSI_OPCODE_DP3:
5045
exec_dp3(mach, inst);
5046
break;
5047
5048
case TGSI_OPCODE_DP4:
5049
exec_dp4(mach, inst);
5050
break;
5051
5052
case TGSI_OPCODE_DST:
5053
exec_dst(mach, inst);
5054
break;
5055
5056
case TGSI_OPCODE_MIN:
5057
exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT);
5058
break;
5059
5060
case TGSI_OPCODE_MAX:
5061
exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT);
5062
break;
5063
5064
case TGSI_OPCODE_SLT:
5065
exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT);
5066
break;
5067
5068
case TGSI_OPCODE_SGE:
5069
exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT);
5070
break;
5071
5072
case TGSI_OPCODE_MAD:
5073
exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT);
5074
break;
5075
5076
case TGSI_OPCODE_LRP:
5077
exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT);
5078
break;
5079
5080
case TGSI_OPCODE_SQRT:
5081
exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT);
5082
break;
5083
5084
case TGSI_OPCODE_FRC:
5085
exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT);
5086
break;
5087
5088
case TGSI_OPCODE_FLR:
5089
exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT);
5090
break;
5091
5092
case TGSI_OPCODE_ROUND:
5093
exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT);
5094
break;
5095
5096
case TGSI_OPCODE_EX2:
5097
exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT);
5098
break;
5099
5100
case TGSI_OPCODE_LG2:
5101
exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT);
5102
break;
5103
5104
case TGSI_OPCODE_POW:
5105
exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT);
5106
break;
5107
5108
case TGSI_OPCODE_LDEXP:
5109
exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT);
5110
break;
5111
5112
case TGSI_OPCODE_COS:
5113
exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT);
5114
break;
5115
5116
case TGSI_OPCODE_DDX_FINE:
5117
exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT);
5118
break;
5119
5120
case TGSI_OPCODE_DDX:
5121
exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT);
5122
break;
5123
5124
case TGSI_OPCODE_DDY_FINE:
5125
exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT);
5126
break;
5127
5128
case TGSI_OPCODE_DDY:
5129
exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT);
5130
break;
5131
5132
case TGSI_OPCODE_KILL:
5133
exec_kill (mach);
5134
break;
5135
5136
case TGSI_OPCODE_KILL_IF:
5137
exec_kill_if (mach, inst);
5138
break;
5139
5140
case TGSI_OPCODE_PK2H:
5141
exec_pk2h(mach, inst);
5142
break;
5143
5144
case TGSI_OPCODE_PK2US:
5145
assert (0);
5146
break;
5147
5148
case TGSI_OPCODE_PK4B:
5149
assert (0);
5150
break;
5151
5152
case TGSI_OPCODE_PK4UB:
5153
assert (0);
5154
break;
5155
5156
case TGSI_OPCODE_SEQ:
5157
exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT);
5158
break;
5159
5160
case TGSI_OPCODE_SGT:
5161
exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT);
5162
break;
5163
5164
case TGSI_OPCODE_SIN:
5165
exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT);
5166
break;
5167
5168
case TGSI_OPCODE_SLE:
5169
exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT);
5170
break;
5171
5172
case TGSI_OPCODE_SNE:
5173
exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT);
5174
break;
5175
5176
case TGSI_OPCODE_TEX:
5177
/* simple texture lookup */
5178
/* src[0] = texcoord */
5179
/* src[1] = sampler unit */
5180
exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5181
break;
5182
5183
case TGSI_OPCODE_TXB:
5184
/* Texture lookup with lod bias */
5185
/* src[0] = texcoord (src[0].w = LOD bias) */
5186
/* src[1] = sampler unit */
5187
exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5188
break;
5189
5190
case TGSI_OPCODE_TXD:
5191
/* Texture lookup with explict partial derivatives */
5192
/* src[0] = texcoord */
5193
/* src[1] = d[strq]/dx */
5194
/* src[2] = d[strq]/dy */
5195
/* src[3] = sampler unit */
5196
exec_txd(mach, inst);
5197
break;
5198
5199
case TGSI_OPCODE_TXL:
5200
/* Texture lookup with explit LOD */
5201
/* src[0] = texcoord (src[0].w = LOD) */
5202
/* src[1] = sampler unit */
5203
exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5204
break;
5205
5206
case TGSI_OPCODE_TXP:
5207
/* Texture lookup with projection */
5208
/* src[0] = texcoord (src[0].w = projection) */
5209
/* src[1] = sampler unit */
5210
exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5211
break;
5212
5213
case TGSI_OPCODE_TG4:
5214
/* src[0] = texcoord */
5215
/* src[1] = component */
5216
/* src[2] = sampler unit */
5217
exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5218
break;
5219
5220
case TGSI_OPCODE_LODQ:
5221
/* src[0] = texcoord */
5222
/* src[1] = sampler unit */
5223
exec_lodq(mach, inst);
5224
break;
5225
5226
case TGSI_OPCODE_UP2H:
5227
exec_up2h(mach, inst);
5228
break;
5229
5230
case TGSI_OPCODE_UP2US:
5231
assert (0);
5232
break;
5233
5234
case TGSI_OPCODE_UP4B:
5235
assert (0);
5236
break;
5237
5238
case TGSI_OPCODE_UP4UB:
5239
assert (0);
5240
break;
5241
5242
case TGSI_OPCODE_ARR:
5243
exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_FLOAT);
5244
break;
5245
5246
case TGSI_OPCODE_CAL:
5247
/* skip the call if no execution channels are enabled */
5248
if (mach->ExecMask) {
5249
/* do the call */
5250
5251
/* First, record the depths of the execution stacks.
5252
* This is important for deeply nested/looped return statements.
5253
* We have to unwind the stacks by the correct amount. For a
5254
* real code generator, we could determine the number of entries
5255
* to pop off each stack with simple static analysis and avoid
5256
* implementing this data structure at run time.
5257
*/
5258
mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5259
mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5260
mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5261
mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5262
mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5263
/* note that PC was already incremented above */
5264
mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5265
5266
mach->CallStackTop++;
5267
5268
/* Second, push the Cond, Loop, Cont, Func stacks */
5269
assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5270
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5271
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5272
assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5273
assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5274
assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5275
5276
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5277
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5278
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5279
mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5280
mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5281
mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5282
5283
/* Finally, jump to the subroutine. The label is a pointer
5284
* (an instruction number) to the BGNSUB instruction.
5285
*/
5286
*pc = inst->Label.Label;
5287
assert(mach->Instructions[*pc].Instruction.Opcode
5288
== TGSI_OPCODE_BGNSUB);
5289
}
5290
break;
5291
5292
case TGSI_OPCODE_RET:
5293
mach->FuncMask &= ~mach->ExecMask;
5294
UPDATE_EXEC_MASK(mach);
5295
5296
if (mach->FuncMask == 0x0) {
5297
/* really return now (otherwise, keep executing */
5298
5299
if (mach->CallStackTop == 0) {
5300
/* returning from main() */
5301
mach->CondStackTop = 0;
5302
mach->LoopStackTop = 0;
5303
mach->ContStackTop = 0;
5304
mach->LoopLabelStackTop = 0;
5305
mach->SwitchStackTop = 0;
5306
mach->BreakStackTop = 0;
5307
*pc = -1;
5308
return FALSE;
5309
}
5310
5311
assert(mach->CallStackTop > 0);
5312
mach->CallStackTop--;
5313
5314
mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5315
mach->CondMask = mach->CondStack[mach->CondStackTop];
5316
5317
mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5318
mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5319
5320
mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5321
mach->ContMask = mach->ContStack[mach->ContStackTop];
5322
5323
mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5324
mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5325
5326
mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5327
mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5328
5329
assert(mach->FuncStackTop > 0);
5330
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5331
5332
*pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5333
5334
UPDATE_EXEC_MASK(mach);
5335
}
5336
break;
5337
5338
case TGSI_OPCODE_SSG:
5339
exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT);
5340
break;
5341
5342
case TGSI_OPCODE_CMP:
5343
exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT);
5344
break;
5345
5346
case TGSI_OPCODE_DIV:
5347
exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT);
5348
break;
5349
5350
case TGSI_OPCODE_DP2:
5351
exec_dp2(mach, inst);
5352
break;
5353
5354
case TGSI_OPCODE_IF:
5355
/* push CondMask */
5356
assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5357
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5358
FETCH( &r[0], 0, TGSI_CHAN_X );
5359
for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
5360
if (!r[0].f[i])
5361
mach->CondMask &= ~(1 << i);
5362
}
5363
UPDATE_EXEC_MASK(mach);
5364
/* If no channels are taking the then branch, jump to ELSE. */
5365
if (!mach->CondMask)
5366
*pc = inst->Label.Label;
5367
break;
5368
5369
case TGSI_OPCODE_UIF:
5370
/* push CondMask */
5371
assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5372
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5373
IFETCH( &r[0], 0, TGSI_CHAN_X );
5374
for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
5375
if (!r[0].u[i])
5376
mach->CondMask &= ~(1 << i);
5377
}
5378
UPDATE_EXEC_MASK(mach);
5379
/* If no channels are taking the then branch, jump to ELSE. */
5380
if (!mach->CondMask)
5381
*pc = inst->Label.Label;
5382
break;
5383
5384
case TGSI_OPCODE_ELSE:
5385
/* invert CondMask wrt previous mask */
5386
{
5387
uint prevMask;
5388
assert(mach->CondStackTop > 0);
5389
prevMask = mach->CondStack[mach->CondStackTop - 1];
5390
mach->CondMask = ~mach->CondMask & prevMask;
5391
UPDATE_EXEC_MASK(mach);
5392
5393
/* If no channels are taking ELSE, jump to ENDIF */
5394
if (!mach->CondMask)
5395
*pc = inst->Label.Label;
5396
}
5397
break;
5398
5399
case TGSI_OPCODE_ENDIF:
5400
/* pop CondMask */
5401
assert(mach->CondStackTop > 0);
5402
mach->CondMask = mach->CondStack[--mach->CondStackTop];
5403
UPDATE_EXEC_MASK(mach);
5404
break;
5405
5406
case TGSI_OPCODE_END:
5407
/* make sure we end primitives which haven't
5408
* been explicitly emitted */
5409
conditional_emit_primitive(mach);
5410
/* halt execution */
5411
*pc = -1;
5412
break;
5413
5414
case TGSI_OPCODE_CEIL:
5415
exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT);
5416
break;
5417
5418
case TGSI_OPCODE_I2F:
5419
exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_INT);
5420
break;
5421
5422
case TGSI_OPCODE_NOT:
5423
exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT);
5424
break;
5425
5426
case TGSI_OPCODE_TRUNC:
5427
exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT);
5428
break;
5429
5430
case TGSI_OPCODE_SHL:
5431
exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT);
5432
break;
5433
5434
case TGSI_OPCODE_AND:
5435
exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT);
5436
break;
5437
5438
case TGSI_OPCODE_OR:
5439
exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT);
5440
break;
5441
5442
case TGSI_OPCODE_MOD:
5443
exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT);
5444
break;
5445
5446
case TGSI_OPCODE_XOR:
5447
exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT);
5448
break;
5449
5450
case TGSI_OPCODE_TXF:
5451
exec_txf(mach, inst);
5452
break;
5453
5454
case TGSI_OPCODE_TXQ:
5455
exec_txq(mach, inst);
5456
break;
5457
5458
case TGSI_OPCODE_EMIT:
5459
emit_vertex(mach, inst);
5460
break;
5461
5462
case TGSI_OPCODE_ENDPRIM:
5463
emit_primitive(mach, inst);
5464
break;
5465
5466
case TGSI_OPCODE_BGNLOOP:
5467
/* push LoopMask and ContMasks */
5468
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5469
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5470
assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5471
assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5472
5473
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5474
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5475
mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5476
mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5477
mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5478
break;
5479
5480
case TGSI_OPCODE_ENDLOOP:
5481
/* Restore ContMask, but don't pop */
5482
assert(mach->ContStackTop > 0);
5483
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5484
UPDATE_EXEC_MASK(mach);
5485
if (mach->ExecMask) {
5486
/* repeat loop: jump to instruction just past BGNLOOP */
5487
assert(mach->LoopLabelStackTop > 0);
5488
*pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5489
}
5490
else {
5491
/* exit loop: pop LoopMask */
5492
assert(mach->LoopStackTop > 0);
5493
mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5494
/* pop ContMask */
5495
assert(mach->ContStackTop > 0);
5496
mach->ContMask = mach->ContStack[--mach->ContStackTop];
5497
assert(mach->LoopLabelStackTop > 0);
5498
--mach->LoopLabelStackTop;
5499
5500
mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5501
}
5502
UPDATE_EXEC_MASK(mach);
5503
break;
5504
5505
case TGSI_OPCODE_BRK:
5506
exec_break(mach);
5507
break;
5508
5509
case TGSI_OPCODE_CONT:
5510
/* turn off cont channels for each enabled exec channel */
5511
mach->ContMask &= ~mach->ExecMask;
5512
/* Todo: if mach->LoopMask == 0, jump to end of loop */
5513
UPDATE_EXEC_MASK(mach);
5514
break;
5515
5516
case TGSI_OPCODE_BGNSUB:
5517
/* no-op */
5518
break;
5519
5520
case TGSI_OPCODE_ENDSUB:
5521
/*
5522
* XXX: This really should be a no-op. We should never reach this opcode.
5523
*/
5524
5525
assert(mach->CallStackTop > 0);
5526
mach->CallStackTop--;
5527
5528
mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5529
mach->CondMask = mach->CondStack[mach->CondStackTop];
5530
5531
mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5532
mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5533
5534
mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5535
mach->ContMask = mach->ContStack[mach->ContStackTop];
5536
5537
mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5538
mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5539
5540
mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5541
mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5542
5543
assert(mach->FuncStackTop > 0);
5544
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5545
5546
*pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5547
5548
UPDATE_EXEC_MASK(mach);
5549
break;
5550
5551
case TGSI_OPCODE_NOP:
5552
break;
5553
5554
case TGSI_OPCODE_F2I:
5555
exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_FLOAT);
5556
break;
5557
5558
case TGSI_OPCODE_FSEQ:
5559
exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_FLOAT);
5560
break;
5561
5562
case TGSI_OPCODE_FSGE:
5563
exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_FLOAT);
5564
break;
5565
5566
case TGSI_OPCODE_FSLT:
5567
exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_FLOAT);
5568
break;
5569
5570
case TGSI_OPCODE_FSNE:
5571
exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_FLOAT);
5572
break;
5573
5574
case TGSI_OPCODE_IDIV:
5575
exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT);
5576
break;
5577
5578
case TGSI_OPCODE_IMAX:
5579
exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT);
5580
break;
5581
5582
case TGSI_OPCODE_IMIN:
5583
exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT);
5584
break;
5585
5586
case TGSI_OPCODE_INEG:
5587
exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT);
5588
break;
5589
5590
case TGSI_OPCODE_ISGE:
5591
exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT);
5592
break;
5593
5594
case TGSI_OPCODE_ISHR:
5595
exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT);
5596
break;
5597
5598
case TGSI_OPCODE_ISLT:
5599
exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT);
5600
break;
5601
5602
case TGSI_OPCODE_F2U:
5603
exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_FLOAT);
5604
break;
5605
5606
case TGSI_OPCODE_U2F:
5607
exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_UINT);
5608
break;
5609
5610
case TGSI_OPCODE_UADD:
5611
exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT);
5612
break;
5613
5614
case TGSI_OPCODE_UDIV:
5615
exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT);
5616
break;
5617
5618
case TGSI_OPCODE_UMAD:
5619
exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT);
5620
break;
5621
5622
case TGSI_OPCODE_UMAX:
5623
exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT);
5624
break;
5625
5626
case TGSI_OPCODE_UMIN:
5627
exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT);
5628
break;
5629
5630
case TGSI_OPCODE_UMOD:
5631
exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT);
5632
break;
5633
5634
case TGSI_OPCODE_UMUL:
5635
exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT);
5636
break;
5637
5638
case TGSI_OPCODE_IMUL_HI:
5639
exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT);
5640
break;
5641
5642
case TGSI_OPCODE_UMUL_HI:
5643
exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT);
5644
break;
5645
5646
case TGSI_OPCODE_USEQ:
5647
exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT);
5648
break;
5649
5650
case TGSI_OPCODE_USGE:
5651
exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT);
5652
break;
5653
5654
case TGSI_OPCODE_USHR:
5655
exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT);
5656
break;
5657
5658
case TGSI_OPCODE_USLT:
5659
exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT);
5660
break;
5661
5662
case TGSI_OPCODE_USNE:
5663
exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT);
5664
break;
5665
5666
case TGSI_OPCODE_SWITCH:
5667
exec_switch(mach, inst);
5668
break;
5669
5670
case TGSI_OPCODE_CASE:
5671
exec_case(mach, inst);
5672
break;
5673
5674
case TGSI_OPCODE_DEFAULT:
5675
exec_default(mach);
5676
break;
5677
5678
case TGSI_OPCODE_ENDSWITCH:
5679
exec_endswitch(mach);
5680
break;
5681
5682
case TGSI_OPCODE_SAMPLE_I:
5683
exec_txf(mach, inst);
5684
break;
5685
5686
case TGSI_OPCODE_SAMPLE_I_MS:
5687
exec_txf(mach, inst);
5688
break;
5689
5690
case TGSI_OPCODE_SAMPLE:
5691
exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
5692
break;
5693
5694
case TGSI_OPCODE_SAMPLE_B:
5695
exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
5696
break;
5697
5698
case TGSI_OPCODE_SAMPLE_C:
5699
exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
5700
break;
5701
5702
case TGSI_OPCODE_SAMPLE_C_LZ:
5703
exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
5704
break;
5705
5706
case TGSI_OPCODE_SAMPLE_D:
5707
exec_sample_d(mach, inst);
5708
break;
5709
5710
case TGSI_OPCODE_SAMPLE_L:
5711
exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
5712
break;
5713
5714
case TGSI_OPCODE_GATHER4:
5715
exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE);
5716
break;
5717
5718
case TGSI_OPCODE_SVIEWINFO:
5719
exec_txq(mach, inst);
5720
break;
5721
5722
case TGSI_OPCODE_SAMPLE_POS:
5723
assert(0);
5724
break;
5725
5726
case TGSI_OPCODE_SAMPLE_INFO:
5727
assert(0);
5728
break;
5729
5730
case TGSI_OPCODE_LOD:
5731
exec_lodq(mach, inst);
5732
break;
5733
5734
case TGSI_OPCODE_UARL:
5735
exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_UINT);
5736
break;
5737
5738
case TGSI_OPCODE_UCMP:
5739
exec_ucmp(mach, inst);
5740
break;
5741
5742
case TGSI_OPCODE_IABS:
5743
exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT);
5744
break;
5745
5746
case TGSI_OPCODE_ISSG:
5747
exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT);
5748
break;
5749
5750
case TGSI_OPCODE_TEX2:
5751
/* simple texture lookup */
5752
/* src[0] = texcoord */
5753
/* src[1] = compare */
5754
/* src[2] = sampler unit */
5755
exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
5756
break;
5757
case TGSI_OPCODE_TXB2:
5758
/* simple texture lookup */
5759
/* src[0] = texcoord */
5760
/* src[1] = bias */
5761
/* src[2] = sampler unit */
5762
exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
5763
break;
5764
case TGSI_OPCODE_TXL2:
5765
/* simple texture lookup */
5766
/* src[0] = texcoord */
5767
/* src[1] = lod */
5768
/* src[2] = sampler unit */
5769
exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
5770
break;
5771
5772
case TGSI_OPCODE_IBFE:
5773
exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT);
5774
break;
5775
case TGSI_OPCODE_UBFE:
5776
exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT);
5777
break;
5778
case TGSI_OPCODE_BFI:
5779
exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT);
5780
break;
5781
case TGSI_OPCODE_BREV:
5782
exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT);
5783
break;
5784
case TGSI_OPCODE_POPC:
5785
exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT);
5786
break;
5787
case TGSI_OPCODE_LSB:
5788
exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_UINT);
5789
break;
5790
case TGSI_OPCODE_IMSB:
5791
exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT);
5792
break;
5793
case TGSI_OPCODE_UMSB:
5794
exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_UINT);
5795
break;
5796
5797
case TGSI_OPCODE_F2D:
5798
exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
5799
break;
5800
5801
case TGSI_OPCODE_D2F:
5802
exec_64_2_t(mach, inst, micro_d2f);
5803
break;
5804
5805
case TGSI_OPCODE_DABS:
5806
exec_double_unary(mach, inst, micro_dabs);
5807
break;
5808
5809
case TGSI_OPCODE_DNEG:
5810
exec_double_unary(mach, inst, micro_dneg);
5811
break;
5812
5813
case TGSI_OPCODE_DADD:
5814
exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
5815
break;
5816
5817
case TGSI_OPCODE_DDIV:
5818
exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
5819
break;
5820
5821
case TGSI_OPCODE_DMUL:
5822
exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
5823
break;
5824
5825
case TGSI_OPCODE_DMAX:
5826
exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
5827
break;
5828
5829
case TGSI_OPCODE_DMIN:
5830
exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
5831
break;
5832
5833
case TGSI_OPCODE_DSLT:
5834
exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
5835
break;
5836
5837
case TGSI_OPCODE_DSGE:
5838
exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
5839
break;
5840
5841
case TGSI_OPCODE_DSEQ:
5842
exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
5843
break;
5844
5845
case TGSI_OPCODE_DSNE:
5846
exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
5847
break;
5848
5849
case TGSI_OPCODE_DRCP:
5850
exec_double_unary(mach, inst, micro_drcp);
5851
break;
5852
5853
case TGSI_OPCODE_DSQRT:
5854
exec_double_unary(mach, inst, micro_dsqrt);
5855
break;
5856
5857
case TGSI_OPCODE_DRSQ:
5858
exec_double_unary(mach, inst, micro_drsq);
5859
break;
5860
5861
case TGSI_OPCODE_DMAD:
5862
exec_double_trinary(mach, inst, micro_dmad);
5863
break;
5864
5865
case TGSI_OPCODE_DFRAC:
5866
exec_double_unary(mach, inst, micro_dfrac);
5867
break;
5868
5869
case TGSI_OPCODE_DFLR:
5870
exec_double_unary(mach, inst, micro_dflr);
5871
break;
5872
5873
case TGSI_OPCODE_DLDEXP:
5874
exec_dldexp(mach, inst);
5875
break;
5876
5877
case TGSI_OPCODE_DFRACEXP:
5878
exec_dfracexp(mach, inst);
5879
break;
5880
5881
case TGSI_OPCODE_I2D:
5882
exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT);
5883
break;
5884
5885
case TGSI_OPCODE_D2I:
5886
exec_64_2_t(mach, inst, micro_d2i);
5887
break;
5888
5889
case TGSI_OPCODE_U2D:
5890
exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_FLOAT);
5891
break;
5892
5893
case TGSI_OPCODE_D2U:
5894
exec_64_2_t(mach, inst, micro_d2u);
5895
break;
5896
5897
case TGSI_OPCODE_LOAD:
5898
exec_load(mach, inst);
5899
break;
5900
5901
case TGSI_OPCODE_STORE:
5902
exec_store(mach, inst);
5903
break;
5904
5905
case TGSI_OPCODE_ATOMUADD:
5906
case TGSI_OPCODE_ATOMXCHG:
5907
case TGSI_OPCODE_ATOMCAS:
5908
case TGSI_OPCODE_ATOMAND:
5909
case TGSI_OPCODE_ATOMOR:
5910
case TGSI_OPCODE_ATOMXOR:
5911
case TGSI_OPCODE_ATOMUMIN:
5912
case TGSI_OPCODE_ATOMUMAX:
5913
case TGSI_OPCODE_ATOMIMIN:
5914
case TGSI_OPCODE_ATOMIMAX:
5915
case TGSI_OPCODE_ATOMFADD:
5916
exec_atomop(mach, inst);
5917
break;
5918
5919
case TGSI_OPCODE_RESQ:
5920
exec_resq(mach, inst);
5921
break;
5922
case TGSI_OPCODE_BARRIER:
5923
case TGSI_OPCODE_MEMBAR:
5924
return TRUE;
5925
break;
5926
5927
case TGSI_OPCODE_I64ABS:
5928
exec_double_unary(mach, inst, micro_i64abs);
5929
break;
5930
5931
case TGSI_OPCODE_I64SSG:
5932
exec_double_unary(mach, inst, micro_i64sgn);
5933
break;
5934
5935
case TGSI_OPCODE_I64NEG:
5936
exec_double_unary(mach, inst, micro_i64neg);
5937
break;
5938
5939
case TGSI_OPCODE_U64SEQ:
5940
exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
5941
break;
5942
5943
case TGSI_OPCODE_U64SNE:
5944
exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
5945
break;
5946
5947
case TGSI_OPCODE_I64SLT:
5948
exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
5949
break;
5950
case TGSI_OPCODE_U64SLT:
5951
exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
5952
break;
5953
5954
case TGSI_OPCODE_I64SGE:
5955
exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
5956
break;
5957
case TGSI_OPCODE_U64SGE:
5958
exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
5959
break;
5960
5961
case TGSI_OPCODE_I64MIN:
5962
exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
5963
break;
5964
case TGSI_OPCODE_U64MIN:
5965
exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
5966
break;
5967
case TGSI_OPCODE_I64MAX:
5968
exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
5969
break;
5970
case TGSI_OPCODE_U64MAX:
5971
exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
5972
break;
5973
case TGSI_OPCODE_U64ADD:
5974
exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
5975
break;
5976
case TGSI_OPCODE_U64MUL:
5977
exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
5978
break;
5979
case TGSI_OPCODE_U64SHL:
5980
exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
5981
break;
5982
case TGSI_OPCODE_I64SHR:
5983
exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
5984
break;
5985
case TGSI_OPCODE_U64SHR:
5986
exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
5987
break;
5988
case TGSI_OPCODE_U64DIV:
5989
exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
5990
break;
5991
case TGSI_OPCODE_I64DIV:
5992
exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
5993
break;
5994
case TGSI_OPCODE_U64MOD:
5995
exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
5996
break;
5997
case TGSI_OPCODE_I64MOD:
5998
exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
5999
break;
6000
6001
case TGSI_OPCODE_F2U64:
6002
exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
6003
break;
6004
6005
case TGSI_OPCODE_F2I64:
6006
exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
6007
break;
6008
6009
case TGSI_OPCODE_U2I64:
6010
exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
6011
break;
6012
case TGSI_OPCODE_I2I64:
6013
exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
6014
break;
6015
6016
case TGSI_OPCODE_D2U64:
6017
exec_double_unary(mach, inst, micro_d2u64);
6018
break;
6019
6020
case TGSI_OPCODE_D2I64:
6021
exec_double_unary(mach, inst, micro_d2i64);
6022
break;
6023
6024
case TGSI_OPCODE_U642F:
6025
exec_64_2_t(mach, inst, micro_u642f);
6026
break;
6027
case TGSI_OPCODE_I642F:
6028
exec_64_2_t(mach, inst, micro_i642f);
6029
break;
6030
6031
case TGSI_OPCODE_U642D:
6032
exec_double_unary(mach, inst, micro_u642d);
6033
break;
6034
case TGSI_OPCODE_I642D:
6035
exec_double_unary(mach, inst, micro_i642d);
6036
break;
6037
case TGSI_OPCODE_INTERP_SAMPLE:
6038
exec_interp_at_sample(mach, inst);
6039
break;
6040
case TGSI_OPCODE_INTERP_OFFSET:
6041
exec_interp_at_offset(mach, inst);
6042
break;
6043
case TGSI_OPCODE_INTERP_CENTROID:
6044
exec_interp_at_centroid(mach, inst);
6045
break;
6046
default:
6047
assert( 0 );
6048
}
6049
return FALSE;
6050
}
6051
6052
static void
6053
tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
6054
{
6055
uint default_mask = 0xf;
6056
6057
mach->KillMask = 0;
6058
mach->OutputVertexOffset = 0;
6059
6060
if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6061
for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
6062
mach->OutputPrimCount[i] = 0;
6063
mach->Primitives[i][0] = 0;
6064
}
6065
/* GS runs on a single primitive for now */
6066
default_mask = 0x1;
6067
}
6068
6069
if (mach->NonHelperMask == 0)
6070
mach->NonHelperMask = default_mask;
6071
mach->CondMask = default_mask;
6072
mach->LoopMask = default_mask;
6073
mach->ContMask = default_mask;
6074
mach->FuncMask = default_mask;
6075
mach->ExecMask = default_mask;
6076
6077
mach->Switch.mask = default_mask;
6078
6079
assert(mach->CondStackTop == 0);
6080
assert(mach->LoopStackTop == 0);
6081
assert(mach->ContStackTop == 0);
6082
assert(mach->SwitchStackTop == 0);
6083
assert(mach->BreakStackTop == 0);
6084
assert(mach->CallStackTop == 0);
6085
}
6086
6087
/**
6088
* Run TGSI interpreter.
6089
* \return bitmask of "alive" quad components
6090
*/
6091
uint
6092
tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
6093
{
6094
uint i;
6095
6096
mach->pc = start_pc;
6097
6098
if (!start_pc) {
6099
tgsi_exec_machine_setup_masks(mach);
6100
6101
/* execute declarations (interpolants) */
6102
for (i = 0; i < mach->NumDeclarations; i++) {
6103
exec_declaration( mach, mach->Declarations+i );
6104
}
6105
}
6106
6107
{
6108
#if DEBUG_EXECUTION
6109
struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS];
6110
struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6111
uint inst = 1;
6112
6113
if (!start_pc) {
6114
memset(mach->Temps, 0, sizeof(temps));
6115
if (mach->Outputs)
6116
memset(mach->Outputs, 0, sizeof(outputs));
6117
memset(temps, 0, sizeof(temps));
6118
memset(outputs, 0, sizeof(outputs));
6119
}
6120
#endif
6121
6122
/* execute instructions, until pc is set to -1 */
6123
while (mach->pc != -1) {
6124
boolean barrier_hit;
6125
#if DEBUG_EXECUTION
6126
uint i;
6127
6128
tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6129
#endif
6130
6131
assert(mach->pc < (int) mach->NumInstructions);
6132
barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6133
6134
/* for compute shaders if we hit a barrier return now for later rescheduling */
6135
if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6136
return 0;
6137
6138
#if DEBUG_EXECUTION
6139
for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
6140
if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6141
uint j;
6142
6143
memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6144
debug_printf("TEMP[%2u] = ", i);
6145
for (j = 0; j < 4; j++) {
6146
if (j > 0) {
6147
debug_printf(" ");
6148
}
6149
debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6150
temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6151
temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6152
temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6153
temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6154
}
6155
}
6156
}
6157
if (mach->Outputs) {
6158
for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6159
if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6160
uint j;
6161
6162
memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6163
debug_printf("OUT[%2u] = ", i);
6164
for (j = 0; j < 4; j++) {
6165
if (j > 0) {
6166
debug_printf(" ");
6167
}
6168
debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6169
outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6170
outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6171
outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6172
outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6173
}
6174
}
6175
}
6176
}
6177
#endif
6178
}
6179
}
6180
6181
#if 0
6182
/* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6183
if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6184
/*
6185
* Scale back depth component.
6186
*/
6187
for (i = 0; i < 4; i++)
6188
mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6189
}
6190
#endif
6191
6192
/* Strictly speaking, these assertions aren't really needed but they
6193
* can potentially catch some bugs in the control flow code.
6194
*/
6195
assert(mach->CondStackTop == 0);
6196
assert(mach->LoopStackTop == 0);
6197
assert(mach->ContStackTop == 0);
6198
assert(mach->SwitchStackTop == 0);
6199
assert(mach->BreakStackTop == 0);
6200
assert(mach->CallStackTop == 0);
6201
6202
return ~mach->KillMask;
6203
}
6204
6205