CoCalc -- codegen.c

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/pp/codegen.c
⁴⁵⁷⁴ views
1
/*
2
 * Copyright (c) 2017 Lima Project
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the
12
 * next paragraph) shall be included in all copies or substantial portions
13
 * of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * DEALINGS IN THE SOFTWARE.
22
 *
23
 */
24

25
#include "util/ralloc.h"
26
#include "util/half_float.h"
27
#include "util/bitscan.h"
28

29
#include "ppir.h"
30
#include "codegen.h"
31
#include "lima_context.h"
32

33
static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34
{
35
   unsigned ret = 0;
36
   for (int i = 0; i < 4; i++)
37
      ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38
   return ret;
39
}
40

41
static int get_scl_reg_index(ppir_src *src, int component)
42
{
43
   int ret = ppir_target_get_src_reg_index(src);
44
   ret += src->swizzle[component];
45
   return ret;
46
}
47

48
static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49
{
50
   ppir_codegen_field_varying *f = code;
51
   ppir_load_node *load = ppir_node_to_load(node);
52
   ppir_dest *dest = &load->dest;
53
   int index = ppir_target_get_dest_reg_index(dest);
54
   int num_components = load->num_components;
55

56
   if (node->op != ppir_op_load_coords_reg) {
57
      assert(node->op == ppir_op_load_varying ||
58
             node->op == ppir_op_load_coords ||
59
             node->op == ppir_op_load_fragcoord ||
60
             node->op == ppir_op_load_pointcoord ||
61
             node->op == ppir_op_load_frontface);
62

63
      f->imm.dest = index >> 2;
64
      f->imm.mask = dest->write_mask << (index & 0x3);
65

66
      int alignment = num_components == 3 ? 3 : num_components - 1;
67
      f->imm.alignment = alignment;
68

69
      if (load->num_src) {
70
         index = ppir_target_get_src_reg_index(&load->src);
71
         f->imm.offset_vector = index >> 2;
72
         f->imm.offset_scalar = index & 0x3;
73
      } else
74
         f->imm.offset_vector = 0xf;
75

76
      if (alignment == 3)
77
         f->imm.index = load->index >> 2;
78
      else
79
         f->imm.index = load->index >> alignment;
80

81
      switch (node->op) {
82
         case ppir_op_load_fragcoord:
83
            f->imm.source_type = 2;
84
            f->imm.perspective = 3;
85
            break;
86
         case ppir_op_load_pointcoord:
87
            f->imm.source_type = 3;
88
            break;
89
         case ppir_op_load_frontface:
90
            f->imm.source_type = 3;
91
            f->imm.perspective = 1;
92
            break;
93
         case ppir_op_load_coords:
94
            /* num_components == 3 implies cubemap as we don't support 3D textures */
95
            f->imm.source_type = num_components == 3 ? 2 : 0;
96
            break;
97
         default:
98
            break;
99
      }
100
   }
101
   else {  /* node->op == ppir_op_load_coords_reg */
102
      f->reg.dest = index >> 2;
103
      f->reg.mask = dest->write_mask << (index & 0x3);
104

105
      if (load->num_src) {
106
         /* num_components == 3 implies cubemap as we don't support 3D textures */
107
         if (num_components == 3) {
108
            f->reg.source_type = 2;
109
            f->reg.perspective = 1;
110
         } else {
111
            f->reg.source_type = 1;
112
         }
113
         ppir_src *src = &load->src;
114
         index = ppir_target_get_src_reg_index(src);
115
         f->reg.source = index >> 2;
116
         f->reg.negate = src->negate;
117
         f->reg.absolute = src->absolute;
118
         f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
119
      }
120
   }
121
}
122

123
static void ppir_codegen_encode_texld(ppir_node *node, void *code)
124
{
125
   ppir_codegen_field_sampler *f = code;
126
   ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
127

128
   f->index = ldtex->sampler;
129

130
   f->lod_bias_en = ldtex->lod_bias_en;
131
   f->explicit_lod = ldtex->explicit_lod;
132
   if (ldtex->lod_bias_en)
133
      f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]);
134

135
   switch (ldtex->sampler_dim) {
136
   case GLSL_SAMPLER_DIM_2D:
137
   case GLSL_SAMPLER_DIM_RECT:
138
   case GLSL_SAMPLER_DIM_EXTERNAL:
139
      f->type = ppir_codegen_sampler_type_2d;
140
      break;
141
   case GLSL_SAMPLER_DIM_CUBE:
142
      f->type = ppir_codegen_sampler_type_cube;
143
      break;
144
   default:
145
      break;
146
   }
147

148
   f->offset_en = 0;
149
   f->unknown_2 = 0x39001;
150
}
151

152
static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
153
{
154
   ppir_codegen_field_uniform *f = code;
155
   ppir_load_node *load = ppir_node_to_load(node);
156

157
   switch (node->op) {
158
      case ppir_op_load_uniform:
159
         f->source = ppir_codegen_uniform_src_uniform;
160
         break;
161
      case ppir_op_load_temp:
162
         f->source = ppir_codegen_uniform_src_temporary;
163
         break;
164
      default:
165
         assert(0);
166
   }
167

168
   /* Uniforms are always aligned to vec4 boundary */
169
   f->alignment = 2;
170
   f->index = load->index;
171

172
   if (load->num_src) {
173
      f->offset_en = 1;
174
      f->offset_reg = ppir_target_get_src_reg_index(&load->src);
175
   }
176
}
177

178
static unsigned shift_to_op(int shift)
179
{
180
   assert(shift >= -3 && shift <= 3);
181
   return shift < 0 ? shift + 8 : shift;
182
}
183

184
static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
185
{
186
   ppir_codegen_field_vec4_mul *f = code;
187
   ppir_alu_node *alu = ppir_node_to_alu(node);
188

189
   ppir_dest *dest = &alu->dest;
190
   int dest_shift = 0;
191
   if (dest->type != ppir_target_pipeline) {
192
      int index = ppir_target_get_dest_reg_index(dest);
193
      dest_shift = index & 0x3;
194
      f->dest = index >> 2;
195
      f->mask = dest->write_mask << dest_shift;
196
   }
197
   f->dest_modifier = dest->modifier;
198

199
   switch (node->op) {
200
   case ppir_op_mul:
201
      f->op = shift_to_op(alu->shift);
202
      break;
203
   case ppir_op_mov:
204
      f->op = ppir_codegen_vec4_mul_op_mov;
205
      break;
206
   case ppir_op_max:
207
      f->op = ppir_codegen_vec4_mul_op_max;
208
      break;
209
   case ppir_op_min:
210
      f->op = ppir_codegen_vec4_mul_op_min;
211
      break;
212
   case ppir_op_and:
213
      f->op = ppir_codegen_vec4_mul_op_and;
214
      break;
215
   case ppir_op_or:
216
      f->op = ppir_codegen_vec4_mul_op_or;
217
      break;
218
   case ppir_op_xor:
219
      f->op = ppir_codegen_vec4_mul_op_xor;
220
      break;
221
   case ppir_op_gt:
222
      f->op = ppir_codegen_vec4_mul_op_gt;
223
      break;
224
   case ppir_op_ge:
225
      f->op = ppir_codegen_vec4_mul_op_ge;
226
      break;
227
   case ppir_op_eq:
228
      f->op = ppir_codegen_vec4_mul_op_eq;
229
      break;
230
   case ppir_op_ne:
231
      f->op = ppir_codegen_vec4_mul_op_ne;
232
      break;
233
   case ppir_op_not:
234
      f->op = ppir_codegen_vec4_mul_op_not;
235
      break;
236
   default:
237
      break;
238
   }
239

240
   ppir_src *src = alu->src;
241
   int index = ppir_target_get_src_reg_index(src);
242
   f->arg0_source = index >> 2;
243
   f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
244
   f->arg0_absolute = src->absolute;
245
   f->arg0_negate = src->negate;
246

247
   if (alu->num_src == 2) {
248
      src = alu->src + 1;
249
      index = ppir_target_get_src_reg_index(src);
250
      f->arg1_source = index >> 2;
251
      f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
252
      f->arg1_absolute = src->absolute;
253
      f->arg1_negate = src->negate;
254
   }
255
}
256

257
static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
258
{
259
   ppir_codegen_field_float_mul *f = code;
260
   ppir_alu_node *alu = ppir_node_to_alu(node);
261

262
   ppir_dest *dest = &alu->dest;
263
   int dest_component = ffs(dest->write_mask) - 1;
264
   assert(dest_component >= 0);
265

266
   if (dest->type != ppir_target_pipeline) {
267
      f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
268
      f->output_en = true;
269
   }
270
   f->dest_modifier = dest->modifier;
271

272
   switch (node->op) {
273
   case ppir_op_mul:
274
      f->op = shift_to_op(alu->shift);
275
      break;
276
   case ppir_op_mov:
277
      f->op = ppir_codegen_float_mul_op_mov;
278
      break;
279
   case ppir_op_max:
280
      f->op = ppir_codegen_float_mul_op_max;
281
      break;
282
   case ppir_op_min:
283
      f->op = ppir_codegen_float_mul_op_min;
284
      break;
285
   case ppir_op_and:
286
      f->op = ppir_codegen_float_mul_op_and;
287
      break;
288
   case ppir_op_or:
289
      f->op = ppir_codegen_float_mul_op_or;
290
      break;
291
   case ppir_op_xor:
292
      f->op = ppir_codegen_float_mul_op_xor;
293
      break;
294
   case ppir_op_gt:
295
      f->op = ppir_codegen_float_mul_op_gt;
296
      break;
297
   case ppir_op_ge:
298
      f->op = ppir_codegen_float_mul_op_ge;
299
      break;
300
   case ppir_op_eq:
301
      f->op = ppir_codegen_float_mul_op_eq;
302
      break;
303
   case ppir_op_ne:
304
      f->op = ppir_codegen_float_mul_op_ne;
305
      break;
306
   case ppir_op_not:
307
      f->op = ppir_codegen_float_mul_op_not;
308
      break;
309
   default:
310
      break;
311
   }
312

313
   ppir_src *src = alu->src;
314
   f->arg0_source = get_scl_reg_index(src, dest_component);
315
   f->arg0_absolute = src->absolute;
316
   f->arg0_negate = src->negate;
317

318
   if (alu->num_src == 2) {
319
      src = alu->src + 1;
320
      f->arg1_source = get_scl_reg_index(src, dest_component);
321
      f->arg1_absolute = src->absolute;
322
      f->arg1_negate = src->negate;
323
   }
324
}
325

326
static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
327
{
328
   ppir_codegen_field_vec4_acc *f = code;
329
   ppir_alu_node *alu = ppir_node_to_alu(node);
330

331
   ppir_dest *dest = &alu->dest;
332
   int index = ppir_target_get_dest_reg_index(dest);
333
   int dest_shift = index & 0x3;
334
   f->dest = index >> 2;
335
   f->mask = dest->write_mask << dest_shift;
336
   f->dest_modifier = dest->modifier;
337

338
   switch (node->op) {
339
   case ppir_op_add:
340
      f->op = ppir_codegen_vec4_acc_op_add;
341
      break;
342
   case ppir_op_mov:
343
      f->op = ppir_codegen_vec4_acc_op_mov;
344
      break;
345
   case ppir_op_sum3:
346
      f->op = ppir_codegen_vec4_acc_op_sum3;
347
      dest_shift = 0;
348
      break;
349
   case ppir_op_sum4:
350
      f->op = ppir_codegen_vec4_acc_op_sum4;
351
      dest_shift = 0;
352
      break;
353
   case ppir_op_floor:
354
      f->op = ppir_codegen_vec4_acc_op_floor;
355
      break;
356
   case ppir_op_ceil:
357
      f->op = ppir_codegen_vec4_acc_op_ceil;
358
      break;
359
   case ppir_op_fract:
360
      f->op = ppir_codegen_vec4_acc_op_fract;
361
      break;
362
   case ppir_op_gt:
363
      f->op = ppir_codegen_vec4_acc_op_gt;
364
      break;
365
   case ppir_op_ge:
366
      f->op = ppir_codegen_vec4_acc_op_ge;
367
      break;
368
   case ppir_op_eq:
369
      f->op = ppir_codegen_vec4_acc_op_eq;
370
      break;
371
   case ppir_op_ne:
372
      f->op = ppir_codegen_vec4_acc_op_ne;
373
      break;
374
   case ppir_op_select:
375
      f->op = ppir_codegen_vec4_acc_op_sel;
376
      break;
377
   case ppir_op_max:
378
      f->op = ppir_codegen_vec4_acc_op_max;
379
      break;
380
   case ppir_op_min:
381
      f->op = ppir_codegen_vec4_acc_op_min;
382
      break;
383
   case ppir_op_ddx:
384
      f->op = ppir_codegen_vec4_acc_op_dFdx;
385
      break;
386
   case ppir_op_ddy:
387
      f->op = ppir_codegen_vec4_acc_op_dFdy;
388
      break;
389
   default:
390
      break;
391
   }
392

393
   ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
394
   index = ppir_target_get_src_reg_index(src);
395

396
   if (src->type == ppir_target_pipeline &&
397
       src->pipeline == ppir_pipeline_reg_vmul)
398
      f->mul_in = true;
399
   else
400
      f->arg0_source = index >> 2;
401

402
   f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
403
   f->arg0_absolute = src->absolute;
404
   f->arg0_negate = src->negate;
405

406
   if (++src < alu->src + alu->num_src) {
407
      index = ppir_target_get_src_reg_index(src);
408
      f->arg1_source = index >> 2;
409
      f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
410
      f->arg1_absolute = src->absolute;
411
      f->arg1_negate = src->negate;
412
   }
413
}
414

415
static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
416
{
417
   ppir_codegen_field_float_acc *f = code;
418
   ppir_alu_node *alu = ppir_node_to_alu(node);
419

420
   ppir_dest *dest = &alu->dest;
421
   int dest_component = ffs(dest->write_mask) - 1;
422
   assert(dest_component >= 0);
423

424
   f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
425
   f->output_en = true;
426
   f->dest_modifier = dest->modifier;
427

428
   switch (node->op) {
429
   case ppir_op_add:
430
      f->op = shift_to_op(alu->shift);
431
      break;
432
   case ppir_op_mov:
433
      f->op = ppir_codegen_float_acc_op_mov;
434
      break;
435
   case ppir_op_max:
436
      f->op = ppir_codegen_float_acc_op_max;
437
      break;
438
   case ppir_op_min:
439
      f->op = ppir_codegen_float_acc_op_min;
440
      break;
441
   case ppir_op_floor:
442
      f->op = ppir_codegen_float_acc_op_floor;
443
      break;
444
   case ppir_op_ceil:
445
      f->op = ppir_codegen_float_acc_op_ceil;
446
      break;
447
   case ppir_op_fract:
448
      f->op = ppir_codegen_float_acc_op_fract;
449
      break;
450
   case ppir_op_gt:
451
      f->op = ppir_codegen_float_acc_op_gt;
452
      break;
453
   case ppir_op_ge:
454
      f->op = ppir_codegen_float_acc_op_ge;
455
      break;
456
   case ppir_op_eq:
457
      f->op = ppir_codegen_float_acc_op_eq;
458
      break;
459
   case ppir_op_ne:
460
      f->op = ppir_codegen_float_acc_op_ne;
461
      break;
462
   case ppir_op_select:
463
      f->op = ppir_codegen_float_acc_op_sel;
464
      break;
465
   case ppir_op_ddx:
466
      f->op = ppir_codegen_float_acc_op_dFdx;
467
      break;
468
   case ppir_op_ddy:
469
      f->op = ppir_codegen_float_acc_op_dFdy;
470
      break;
471
   default:
472
      break;
473
   }
474

475
   ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
476
   if (src->type == ppir_target_pipeline &&
477
       src->pipeline == ppir_pipeline_reg_fmul)
478
      f->mul_in = true;
479
   else
480
      f->arg0_source = get_scl_reg_index(src, dest_component);
481
   f->arg0_absolute = src->absolute;
482
   f->arg0_negate = src->negate;
483

484
   if (++src < alu->src + alu->num_src) {
485
      f->arg1_source = get_scl_reg_index(src, dest_component);
486
      f->arg1_absolute = src->absolute;
487
      f->arg1_negate = src->negate;
488
   }
489
}
490

491
static void ppir_codegen_encode_combine(ppir_node *node, void *code)
492
{
493
   ppir_codegen_field_combine *f = code;
494
   ppir_alu_node *alu = ppir_node_to_alu(node);
495

496
   switch (node->op) {
497
   case ppir_op_rsqrt:
498
   case ppir_op_log2:
499
   case ppir_op_exp2:
500
   case ppir_op_rcp:
501
   case ppir_op_sqrt:
502
   case ppir_op_sin:
503
   case ppir_op_cos:
504
   {
505
      f->scalar.dest_vec = false;
506
      f->scalar.arg1_en = false;
507

508
      ppir_dest *dest = &alu->dest;
509
      int dest_component = ffs(dest->write_mask) - 1;
510
      assert(dest_component >= 0);
511
      f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
512
      f->scalar.dest_modifier = dest->modifier;
513

514
      ppir_src *src = alu->src;
515
      f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
516
      f->scalar.arg0_absolute = src->absolute;
517
      f->scalar.arg0_negate = src->negate;
518

519
      switch (node->op) {
520
      case ppir_op_rsqrt:
521
         f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
522
         break;
523
      case ppir_op_log2:
524
         f->scalar.op = ppir_codegen_combine_scalar_op_log2;
525
         break;
526
      case ppir_op_exp2:
527
         f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
528
         break;
529
      case ppir_op_rcp:
530
         f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
531
         break;
532
      case ppir_op_sqrt:
533
         f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
534
         break;
535
      case ppir_op_sin:
536
         f->scalar.op = ppir_codegen_combine_scalar_op_sin;
537
         break;
538
      case ppir_op_cos:
539
         f->scalar.op = ppir_codegen_combine_scalar_op_cos;
540
         break;
541
      default:
542
         break;
543
      }
544
      break;
545
   }
546
   default:
547
      break;
548
   }
549
}
550

551
static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
552
{
553
   assert(node->op == ppir_op_store_temp);
554

555
   ppir_codegen_field_temp_write *f = code;
556
   ppir_store_node *snode = ppir_node_to_store(node);
557
   int num_components = snode->num_components;
558

559
   f->temp_write.dest = 0x03; // 11 - temporary
560
   f->temp_write.source = snode->src.reg->index;
561

562
   int alignment = num_components == 4 ? 2 : num_components - 1;
563
   f->temp_write.alignment = alignment;
564
   f->temp_write.index = snode->index << (2 - alignment);
565

566
   f->temp_write.offset_reg = snode->index >> 2;
567
}
568

569
static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
570
{
571
   for (int i = 0; i < constant->num; i++)
572
      code[i] = _mesa_float_to_half(constant->value[i].f);
573
}
574

575
static void ppir_codegen_encode_discard(ppir_node *node, void *code)
576
{
577
   ppir_codegen_field_branch *b = code;
578
   assert(node->op == ppir_op_discard);
579

580
   b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
581
   b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
582
   b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
583
}
584

585
static void ppir_codegen_encode_branch(ppir_node *node, void *code)
586
{
587
   ppir_codegen_field_branch *b = code;
588
   ppir_branch_node *branch;
589
   ppir_instr *target_instr;
590
   ppir_block *target;
591
   if (node->op == ppir_op_discard) {
592
      ppir_codegen_encode_discard(node, code);
593
      return;
594
   }
595

596
   assert(node->op == ppir_op_branch);
597
   branch = ppir_node_to_branch(node);
598

599
   b->branch.unknown_0 = 0x0;
600
   b->branch.unknown_1 = 0x0;
601

602
   if (branch->num_src == 2) {
603
      b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
604
      b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
605
      b->branch.cond_gt = branch->cond_gt;
606
      b->branch.cond_eq = branch->cond_eq;
607
      b->branch.cond_lt = branch->cond_lt;
608
   } else if (branch->num_src == 0) {
609
      /* Unconditional branch */
610
      b->branch.arg0_source = 0;
611
      b->branch.arg1_source = 0;
612
      b->branch.cond_gt = true;
613
      b->branch.cond_eq = true;
614
      b->branch.cond_lt = true;
615
   } else {
616
      assert(false);
617
   }
618

619
   target = branch->target;
620
   while (list_is_empty(&target->instr_list)) {
621
      if (!target->list.next)
622
         break;
623
      target = LIST_ENTRY(ppir_block, target->list.next, list);
624
   }
625

626
   assert(!list_is_empty(&target->instr_list));
627

628
   target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
629
   b->branch.target = target_instr->offset - node->instr->offset;
630
   b->branch.next_count = target_instr->encode_size;
631
}
632

633
typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
634

635
static const ppir_codegen_instr_slot_encode_func
636
ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
637
   [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
638
   [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
639
   [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
640
   [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
641
   [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
642
   [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
643
   [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
644
   [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
645
   [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
646
   [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
647
};
648

649
static const int ppir_codegen_field_size[] = {
650
   34, 62, 41, 43, 30, 44, 31, 30, 41, 73
651
};
652

653
static inline int align_to_word(int size)
654
{
655
   return ((size + 0x1f) >> 5);
656
}
657

658
static int get_instr_encode_size(ppir_instr *instr)
659
{
660
   int size = 0;
661

662
   for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
663
      if (instr->slots[i])
664
         size += ppir_codegen_field_size[i];
665
   }
666

667
   for (int i = 0; i < 2; i++) {
668
      if (instr->constant[i].num)
669
         size += 64;
670
   }
671

672
   return align_to_word(size) + 1;
673
}
674

675
static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
676
{
677
   int off1 = dst_offset & 0x1f;
678
   uint32_t *cpy_dst = dst, *cpy_src = src;
679

680
   cpy_dst += (dst_offset >> 5);
681

682
   if (off1) {
683
      int off2 = 32 - off1;
684
      int cpy_size = 0;
685
      while (1) {
686
         *cpy_dst |= *cpy_src << off1;
687
         cpy_dst++;
688

689
         cpy_size += off2;
690
         if (cpy_size >= src_size)
691
            break;
692

693
         *cpy_dst |= *cpy_src >> off2;
694
         cpy_src++;
695

696
         cpy_size += off1;
697
         if (cpy_size >= src_size)
698
            break;
699
      }
700
   }
701
   else
702
      memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
703
}
704

705
static int encode_instr(ppir_instr *instr, void *code, void *last_code)
706
{
707
   int size = 0;
708
   ppir_codegen_ctrl *ctrl = code;
709

710
   for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
711
      if (instr->slots[i]) {
712
         /* max field size (73), align to dword */
713
         uint8_t output[12] = {0};
714

715
         ppir_codegen_encode_slot[i](instr->slots[i], output);
716
         bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
717

718
         size += ppir_codegen_field_size[i];
719
         ctrl->fields |= 1 << i;
720
      }
721
   }
722

723
   if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
724
      ctrl->sync = true;
725

726
   if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
727
      ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
728
      if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
729
         ctrl->sync = true;
730
   }
731

732
   if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
733
      ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
734
      if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
735
         ctrl->sync = true;
736
   }
737

738
   for (int i = 0; i < 2; i++) {
739
      if (instr->constant[i].num) {
740
         uint16_t output[4] = {0};
741

742
         ppir_codegen_encode_const(instr->constant + i, output);
743
         bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
744

745
         size += 64;
746
         ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
747
      }
748
   }
749

750
   size = align_to_word(size) + 1;
751

752
   ctrl->count = size;
753
   if (instr->is_end)
754
      ctrl->stop = true;
755

756
   if (last_code) {
757
      ppir_codegen_ctrl *last_ctrl = last_code;
758
      last_ctrl->next_count = size;
759
      last_ctrl->prefetch = true;
760
   }
761

762
   return size;
763
}
764

765
static void ppir_codegen_print_prog(ppir_compiler *comp)
766
{
767
   uint32_t *prog = comp->prog->shader;
768
   unsigned offset = 0;
769

770
   printf("========ppir codegen========\n");
771
   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
772
      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
773
         printf("%03d (@%6d): ", instr->index, instr->offset);
774
         int n = prog[0] & 0x1f;
775
         for (int i = 0; i < n; i++) {
776
            if (i && i % 6 == 0)
777
               printf("\n    ");
778
            printf("%08x ", prog[i]);
779
         }
780
         printf("\n");
781
         ppir_disassemble_instr(prog, offset);
782
         prog += n;
783
         offset += n;
784
      }
785
   }
786
   printf("-----------------------\n");
787
}
788

789
bool ppir_codegen_prog(ppir_compiler *comp)
790
{
791
   int size = 0;
792
   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
793
      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
794
         instr->offset = size;
795
         instr->encode_size = get_instr_encode_size(instr);
796
         size += instr->encode_size;
797
      }
798
   }
799

800
   uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
801
   if (!prog)
802
      return false;
803

804
   uint32_t *code = prog, *last_code = NULL;
805
   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
806
      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
807
         int offset = encode_instr(instr, code, last_code);
808
         last_code = code;
809
         code += offset;
810
      }
811
   }
812

813
   if (comp->prog->shader)
814
      ralloc_free(comp->prog->shader);
815

816
   comp->prog->shader = prog;
817
   comp->prog->state.shader_size = size * sizeof(uint32_t);
818

819
   if (lima_debug & LIMA_DEBUG_PP)
820
      ppir_codegen_print_prog(comp);
821

822
   return true;
823
}
824

825
Product

Resources

Company