Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/common/mi_builder.h
4547 views
1
/*
2
* Copyright © 2019 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#ifndef MI_BUILDER_H
25
#define MI_BUILDER_H
26
27
#include "dev/intel_device_info.h"
28
#include "genxml/genX_bits.h"
29
#include "util/bitscan.h"
30
#include "util/fast_idiv_by_const.h"
31
#include "util/u_math.h"
32
33
#ifndef MI_BUILDER_NUM_ALLOC_GPRS
34
/** The number of GPRs the MI builder is allowed to allocate
35
*
36
* This may be set by a user of this API so that it can reserve some GPRs at
37
* the top end for its own use.
38
*/
39
#define MI_BUILDER_NUM_ALLOC_GPRS 16
40
#endif
41
42
/** These must be defined by the user of the builder
43
*
44
* void *__gen_get_batch_dwords(__gen_user_data *user_data,
45
* unsigned num_dwords);
46
*
47
* __gen_address_type
48
* __gen_address_offset(__gen_address_type addr, uint64_t offset);
49
*
50
*
51
* If self-modifying batches are supported, we must be able to pass batch
52
* addresses around as void*s so pinning as well as batch chaining or some
53
* other mechanism for ensuring batch pointers remain valid during building is
54
* required. The following function must also be defined, it returns an
55
* address in canonical form:
56
*
57
* __gen_address_type
58
* __gen_get_batch_address(__gen_user_data *user_data, void *location);
59
*
60
* Also, __gen_combine_address must accept a location value of NULL and return
61
* a fully valid 64-bit address.
62
*/
63
64
/*
65
* Start of the actual MI builder
66
*/
67
68
#define __genxml_cmd_length(cmd) cmd ## _length
69
#define __genxml_cmd_header(cmd) cmd ## _header
70
#define __genxml_cmd_pack(cmd) cmd ## _pack
71
72
#define mi_builder_pack(b, cmd, dst, name) \
73
for (struct cmd name = { __genxml_cmd_header(cmd) }, \
74
*_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
75
__genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \
76
_dst = NULL)
77
78
#define mi_builder_emit(b, cmd, name) \
79
mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
80
81
82
enum mi_value_type {
83
MI_VALUE_TYPE_IMM,
84
MI_VALUE_TYPE_MEM32,
85
MI_VALUE_TYPE_MEM64,
86
MI_VALUE_TYPE_REG32,
87
MI_VALUE_TYPE_REG64,
88
};
89
90
struct mi_value {
91
enum mi_value_type type;
92
93
union {
94
uint64_t imm;
95
__gen_address_type addr;
96
uint32_t reg;
97
};
98
99
#if GFX_VERx10 >= 75
100
bool invert;
101
#endif
102
};
103
104
struct mi_reg_num {
105
uint32_t num;
106
#if GFX_VER >= 11
107
bool cs;
108
#endif
109
};
110
111
static inline struct mi_reg_num
112
mi_adjust_reg_num(uint32_t reg)
113
{
114
#if GFX_VER >= 11
115
bool cs = reg >= 0x2000 && reg < 0x4000;
116
return (struct mi_reg_num) {
117
.num = reg - (cs ? 0x2000 : 0),
118
.cs = cs,
119
};
120
#else
121
return (struct mi_reg_num) { .num = reg, };
122
#endif
123
}
124
125
#if GFX_VER >= 9
126
#define MI_BUILDER_MAX_MATH_DWORDS 256
127
#else
128
#define MI_BUILDER_MAX_MATH_DWORDS 64
129
#endif
130
131
struct mi_builder {
132
const struct intel_device_info *devinfo;
133
__gen_user_data *user_data;
134
135
#if GFX_VERx10 >= 75
136
uint32_t gprs;
137
uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
138
139
unsigned num_math_dwords;
140
uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
141
#endif
142
};
143
144
static inline void
145
mi_builder_init(struct mi_builder *b,
146
const struct intel_device_info *devinfo,
147
__gen_user_data *user_data)
148
{
149
memset(b, 0, sizeof(*b));
150
b->devinfo = devinfo;
151
b->user_data = user_data;
152
153
#if GFX_VERx10 >= 75
154
b->gprs = 0;
155
b->num_math_dwords = 0;
156
#endif
157
}
158
159
static inline void
160
mi_builder_flush_math(struct mi_builder *b)
161
{
162
#if GFX_VERx10 >= 75
163
if (b->num_math_dwords == 0)
164
return;
165
166
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
167
1 + b->num_math_dwords);
168
mi_builder_pack(b, GENX(MI_MATH), dw, math) {
169
math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
170
}
171
memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
172
b->num_math_dwords = 0;
173
#endif
174
}
175
176
#define _MI_BUILDER_GPR_BASE 0x2600
177
/* The actual hardware limit on GPRs */
178
#define _MI_BUILDER_NUM_HW_GPRS 16
179
180
#if GFX_VERx10 >= 75
181
182
static inline bool
183
mi_value_is_reg(struct mi_value val)
184
{
185
return val.type == MI_VALUE_TYPE_REG32 ||
186
val.type == MI_VALUE_TYPE_REG64;
187
}
188
189
static inline bool
190
mi_value_is_gpr(struct mi_value val)
191
{
192
return mi_value_is_reg(val) &&
193
val.reg >= _MI_BUILDER_GPR_BASE &&
194
val.reg < _MI_BUILDER_GPR_BASE +
195
_MI_BUILDER_NUM_HW_GPRS * 8;
196
}
197
198
static inline bool
199
_mi_value_is_allocated_gpr(struct mi_value val)
200
{
201
return mi_value_is_reg(val) &&
202
val.reg >= _MI_BUILDER_GPR_BASE &&
203
val.reg < _MI_BUILDER_GPR_BASE +
204
MI_BUILDER_NUM_ALLOC_GPRS * 8;
205
}
206
207
static inline uint32_t
208
_mi_value_as_gpr(struct mi_value val)
209
{
210
assert(mi_value_is_gpr(val));
211
assert(val.reg % 8 == 0);
212
return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
213
}
214
215
static inline struct mi_value
216
mi_new_gpr(struct mi_builder *b)
217
{
218
unsigned gpr = ffs(~b->gprs) - 1;
219
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
220
assert(b->gpr_refs[gpr] == 0);
221
b->gprs |= (1u << gpr);
222
b->gpr_refs[gpr] = 1;
223
224
return (struct mi_value) {
225
.type = MI_VALUE_TYPE_REG64,
226
.reg = _MI_BUILDER_GPR_BASE + gpr * 8,
227
};
228
}
229
#endif /* GFX_VERx10 >= 75 */
230
231
/** Take a reference to a mi_value
232
*
233
* The MI builder uses reference counting to automatically free ALU GPRs for
234
* re-use in calculations. All mi_* math functions consume the reference
235
* they are handed for each source and return a reference to a value which the
236
* caller must consume. In particular, if you pas the same value into a
237
* single mi_* math function twice (say to add a number to itself), you
238
* are responsible for calling mi_value_ref() to get a second reference
239
* because the mi_* math function will consume it twice.
240
*/
241
static inline struct mi_value
242
mi_value_ref(struct mi_builder *b, struct mi_value val)
243
{
244
#if GFX_VERx10 >= 75
245
if (_mi_value_is_allocated_gpr(val)) {
246
unsigned gpr = _mi_value_as_gpr(val);
247
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
248
assert(b->gprs & (1u << gpr));
249
assert(b->gpr_refs[gpr] < UINT8_MAX);
250
b->gpr_refs[gpr]++;
251
}
252
#endif /* GFX_VERx10 >= 75 */
253
254
return val;
255
}
256
257
/** Drop a reference to a mi_value
258
*
259
* See also mi_value_ref.
260
*/
261
static inline void
262
mi_value_unref(struct mi_builder *b, struct mi_value val)
263
{
264
#if GFX_VERx10 >= 75
265
if (_mi_value_is_allocated_gpr(val)) {
266
unsigned gpr = _mi_value_as_gpr(val);
267
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
268
assert(b->gprs & (1u << gpr));
269
assert(b->gpr_refs[gpr] > 0);
270
if (--b->gpr_refs[gpr] == 0)
271
b->gprs &= ~(1u << gpr);
272
}
273
#endif /* GFX_VERx10 >= 75 */
274
}
275
276
static inline struct mi_value
277
mi_imm(uint64_t imm)
278
{
279
return (struct mi_value) {
280
.type = MI_VALUE_TYPE_IMM,
281
.imm = imm,
282
};
283
}
284
285
static inline struct mi_value
286
mi_reg32(uint32_t reg)
287
{
288
struct mi_value val = {
289
.type = MI_VALUE_TYPE_REG32,
290
.reg = reg,
291
};
292
#if GFX_VERx10 >= 75
293
assert(!_mi_value_is_allocated_gpr(val));
294
#endif
295
return val;
296
}
297
298
static inline struct mi_value
299
mi_reg64(uint32_t reg)
300
{
301
struct mi_value val = {
302
.type = MI_VALUE_TYPE_REG64,
303
.reg = reg,
304
};
305
#if GFX_VERx10 >= 75
306
assert(!_mi_value_is_allocated_gpr(val));
307
#endif
308
return val;
309
}
310
311
static inline struct mi_value
312
mi_mem32(__gen_address_type addr)
313
{
314
return (struct mi_value) {
315
.type = MI_VALUE_TYPE_MEM32,
316
.addr = addr,
317
};
318
}
319
320
static inline struct mi_value
321
mi_mem64(__gen_address_type addr)
322
{
323
return (struct mi_value) {
324
.type = MI_VALUE_TYPE_MEM64,
325
.addr = addr,
326
};
327
}
328
329
static inline struct mi_value
330
mi_value_half(struct mi_value value, bool top_32_bits)
331
{
332
switch (value.type) {
333
case MI_VALUE_TYPE_IMM:
334
if (top_32_bits)
335
value.imm >>= 32;
336
else
337
value.imm &= 0xffffffffu;
338
return value;
339
340
case MI_VALUE_TYPE_MEM32:
341
assert(!top_32_bits);
342
return value;
343
344
case MI_VALUE_TYPE_MEM64:
345
if (top_32_bits)
346
value.addr = __gen_address_offset(value.addr, 4);
347
value.type = MI_VALUE_TYPE_MEM32;
348
return value;
349
350
case MI_VALUE_TYPE_REG32:
351
assert(!top_32_bits);
352
return value;
353
354
case MI_VALUE_TYPE_REG64:
355
if (top_32_bits)
356
value.reg += 4;
357
value.type = MI_VALUE_TYPE_REG32;
358
return value;
359
}
360
361
unreachable("Invalid mi_value type");
362
}
363
364
static inline void
365
_mi_copy_no_unref(struct mi_builder *b,
366
struct mi_value dst, struct mi_value src)
367
{
368
#if GFX_VERx10 >= 75
369
/* TODO: We could handle src.invert by emitting a bit of math if we really
370
* wanted to.
371
*/
372
assert(!dst.invert && !src.invert);
373
#endif
374
mi_builder_flush_math(b);
375
376
switch (dst.type) {
377
case MI_VALUE_TYPE_IMM:
378
unreachable("Cannot copy to an immediate");
379
380
case MI_VALUE_TYPE_MEM64:
381
case MI_VALUE_TYPE_REG64:
382
switch (src.type) {
383
case MI_VALUE_TYPE_IMM:
384
if (dst.type == MI_VALUE_TYPE_REG64) {
385
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
386
GENX(MI_LOAD_REGISTER_IMM_length) + 2);
387
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
388
mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
389
lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
390
GENX(MI_LOAD_REGISTER_IMM_length_bias);
391
#if GFX_VER >= 11
392
lri.AddCSMMIOStartOffset = reg.cs;
393
#endif
394
}
395
dw[1] = reg.num;
396
dw[2] = src.imm;
397
dw[3] = reg.num + 4;
398
dw[4] = src.imm >> 32;
399
} else {
400
#if GFX_VER >= 8
401
assert(dst.type == MI_VALUE_TYPE_MEM64);
402
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
403
GENX(MI_STORE_DATA_IMM_length) + 1);
404
mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
405
sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
406
GENX(MI_STORE_DATA_IMM_length_bias);
407
sdm.StoreQword = true;
408
sdm.Address = dst.addr;
409
}
410
dw[3] = src.imm;
411
dw[4] = src.imm >> 32;
412
#else
413
_mi_copy_no_unref(b, mi_value_half(dst, false),
414
mi_value_half(src, false));
415
_mi_copy_no_unref(b, mi_value_half(dst, true),
416
mi_value_half(src, true));
417
#endif
418
}
419
break;
420
case MI_VALUE_TYPE_REG32:
421
case MI_VALUE_TYPE_MEM32:
422
_mi_copy_no_unref(b, mi_value_half(dst, false),
423
mi_value_half(src, false));
424
_mi_copy_no_unref(b, mi_value_half(dst, true),
425
mi_imm(0));
426
break;
427
case MI_VALUE_TYPE_REG64:
428
case MI_VALUE_TYPE_MEM64:
429
_mi_copy_no_unref(b, mi_value_half(dst, false),
430
mi_value_half(src, false));
431
_mi_copy_no_unref(b, mi_value_half(dst, true),
432
mi_value_half(src, true));
433
break;
434
default:
435
unreachable("Invalid mi_value type");
436
}
437
break;
438
439
case MI_VALUE_TYPE_MEM32:
440
switch (src.type) {
441
case MI_VALUE_TYPE_IMM:
442
mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
443
sdi.Address = dst.addr;
444
#if GFX_VER >= 12
445
sdi.ForceWriteCompletionCheck = true;
446
#endif
447
sdi.ImmediateData = src.imm;
448
}
449
break;
450
451
case MI_VALUE_TYPE_MEM32:
452
case MI_VALUE_TYPE_MEM64:
453
#if GFX_VER >= 8
454
mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
455
cmm.DestinationMemoryAddress = dst.addr;
456
cmm.SourceMemoryAddress = src.addr;
457
}
458
#elif GFX_VERx10 == 75
459
{
460
struct mi_value tmp = mi_new_gpr(b);
461
_mi_copy_no_unref(b, tmp, src);
462
_mi_copy_no_unref(b, dst, tmp);
463
mi_value_unref(b, tmp);
464
}
465
#else
466
unreachable("Cannot do mem <-> mem copy on IVB and earlier");
467
#endif
468
break;
469
470
case MI_VALUE_TYPE_REG32:
471
case MI_VALUE_TYPE_REG64:
472
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
473
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
474
srm.RegisterAddress = reg.num;
475
#if GFX_VER >= 11
476
srm.AddCSMMIOStartOffset = reg.cs;
477
#endif
478
srm.MemoryAddress = dst.addr;
479
}
480
break;
481
482
default:
483
unreachable("Invalid mi_value type");
484
}
485
break;
486
487
case MI_VALUE_TYPE_REG32:
488
switch (src.type) {
489
case MI_VALUE_TYPE_IMM:
490
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
491
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
492
lri.RegisterOffset = reg.num;
493
#if GFX_VER >= 11
494
lri.AddCSMMIOStartOffset = reg.cs;
495
#endif
496
lri.DataDWord = src.imm;
497
}
498
break;
499
500
case MI_VALUE_TYPE_MEM32:
501
case MI_VALUE_TYPE_MEM64:
502
#if GFX_VER >= 7
503
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
504
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
505
lrm.RegisterAddress = reg.num;
506
#if GFX_VER >= 11
507
lrm.AddCSMMIOStartOffset = reg.cs;
508
#endif
509
lrm.MemoryAddress = src.addr;
510
}
511
#else
512
unreachable("Cannot load do mem -> reg copy on SNB and earlier");
513
#endif
514
break;
515
516
case MI_VALUE_TYPE_REG32:
517
case MI_VALUE_TYPE_REG64:
518
#if GFX_VERx10 >= 75
519
if (src.reg != dst.reg) {
520
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
521
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
522
lrr.SourceRegisterAddress = reg.num;
523
#if GFX_VER >= 11
524
lrr.AddCSMMIOStartOffsetSource = reg.cs;
525
#endif
526
reg = mi_adjust_reg_num(dst.reg);
527
lrr.DestinationRegisterAddress = reg.num;
528
#if GFX_VER >= 11
529
lrr.AddCSMMIOStartOffsetDestination = reg.cs;
530
#endif
531
}
532
}
533
#else
534
unreachable("Cannot do reg <-> reg copy on IVB and earlier");
535
#endif
536
break;
537
538
default:
539
unreachable("Invalid mi_value type");
540
}
541
break;
542
543
default:
544
unreachable("Invalid mi_value type");
545
}
546
}
547
548
#if GFX_VERx10 >= 75
549
static inline struct mi_value
550
mi_resolve_invert(struct mi_builder *b, struct mi_value src);
551
#endif
552
553
/** Store the value in src to the value represented by dst
554
*
555
* If the bit size of src and dst mismatch, this function does an unsigned
556
* integer cast. If src has more bits than dst, it takes the bottom bits. If
557
* src has fewer bits then dst, it fills the top bits with zeros.
558
*
559
* This function consumes one reference for each of src and dst.
560
*/
561
static inline void
562
mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
563
{
564
#if GFX_VERx10 >= 75
565
src = mi_resolve_invert(b, src);
566
#endif
567
_mi_copy_no_unref(b, dst, src);
568
mi_value_unref(b, src);
569
mi_value_unref(b, dst);
570
}
571
572
static inline void
573
mi_memset(struct mi_builder *b, __gen_address_type dst,
574
uint32_t value, uint32_t size)
575
{
576
#if GFX_VERx10 >= 75
577
assert(b->num_math_dwords == 0);
578
#endif
579
580
/* This memset operates in units of dwords. */
581
assert(size % 4 == 0);
582
583
for (uint32_t i = 0; i < size; i += 4) {
584
mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
585
mi_imm(value));
586
}
587
}
588
589
/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
590
static inline void
591
mi_memcpy(struct mi_builder *b, __gen_address_type dst,
592
__gen_address_type src, uint32_t size)
593
{
594
#if GFX_VERx10 >= 75
595
assert(b->num_math_dwords == 0);
596
#endif
597
598
/* This memcpy operates in units of dwords. */
599
assert(size % 4 == 0);
600
601
for (uint32_t i = 0; i < size; i += 4) {
602
struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
603
struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
604
#if GFX_VERx10 >= 75
605
mi_store(b, dst_val, src_val);
606
#else
607
/* IVB does not have a general purpose register for command streamer
608
* commands. Therefore, we use an alternate temporary register.
609
*/
610
struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
611
mi_store(b, tmp_reg, src_val);
612
mi_store(b, dst_val, tmp_reg);
613
#endif
614
}
615
}
616
617
/*
618
* MI_MATH Section. Only available on Haswell+
619
*/
620
621
#if GFX_VERx10 >= 75
622
623
/**
624
* Perform a predicated store (assuming the condition is already loaded
625
* in the MI_PREDICATE_RESULT register) of the value in src to the memory
626
* location specified by dst. Non-memory destinations are not supported.
627
*
628
* This function consumes one reference for each of src and dst.
629
*/
630
static inline void
631
mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
632
{
633
assert(!dst.invert && !src.invert);
634
635
mi_builder_flush_math(b);
636
637
/* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
638
* destination to be memory, and resolve the source to a temporary
639
* register if it isn't in one already.
640
*/
641
assert(dst.type == MI_VALUE_TYPE_MEM64 ||
642
dst.type == MI_VALUE_TYPE_MEM32);
643
644
if (src.type != MI_VALUE_TYPE_REG32 &&
645
src.type != MI_VALUE_TYPE_REG64) {
646
struct mi_value tmp = mi_new_gpr(b);
647
_mi_copy_no_unref(b, tmp, src);
648
src = tmp;
649
}
650
651
if (dst.type == MI_VALUE_TYPE_MEM64) {
652
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
653
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
654
srm.RegisterAddress = reg.num;
655
#if GFX_VER >= 11
656
srm.AddCSMMIOStartOffset = reg.cs;
657
#endif
658
srm.MemoryAddress = dst.addr;
659
srm.PredicateEnable = true;
660
}
661
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
662
struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
663
srm.RegisterAddress = reg.num;
664
#if GFX_VER >= 11
665
srm.AddCSMMIOStartOffset = reg.cs;
666
#endif
667
srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
668
srm.PredicateEnable = true;
669
}
670
} else {
671
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
672
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
673
srm.RegisterAddress = reg.num;
674
#if GFX_VER >= 11
675
srm.AddCSMMIOStartOffset = reg.cs;
676
#endif
677
srm.MemoryAddress = dst.addr;
678
srm.PredicateEnable = true;
679
}
680
}
681
682
mi_value_unref(b, src);
683
mi_value_unref(b, dst);
684
}
685
686
static inline void
687
_mi_builder_push_math(struct mi_builder *b,
688
const uint32_t *dwords,
689
unsigned num_dwords)
690
{
691
assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
692
if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
693
mi_builder_flush_math(b);
694
695
memcpy(&b->math_dwords[b->num_math_dwords],
696
dwords, num_dwords * sizeof(*dwords));
697
b->num_math_dwords += num_dwords;
698
}
699
700
static inline uint32_t
701
_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
702
{
703
struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
704
.Operand2 = operand2,
705
.Operand1 = operand1,
706
.ALUOpcode = opcode,
707
};
708
709
uint32_t dw;
710
GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
711
712
return dw;
713
}
714
715
static inline struct mi_value
716
mi_value_to_gpr(struct mi_builder *b, struct mi_value val)
717
{
718
if (mi_value_is_gpr(val))
719
return val;
720
721
/* Save off the invert flag because it makes copy() grumpy */
722
bool invert = val.invert;
723
val.invert = false;
724
725
struct mi_value tmp = mi_new_gpr(b);
726
_mi_copy_no_unref(b, tmp, val);
727
tmp.invert = invert;
728
729
return tmp;
730
}
731
732
static inline uint64_t
733
mi_value_to_u64(struct mi_value val)
734
{
735
assert(val.type == MI_VALUE_TYPE_IMM);
736
return val.invert ? ~val.imm : val.imm;
737
}
738
739
static inline uint32_t
740
_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
741
{
742
if (val->type == MI_VALUE_TYPE_IMM &&
743
(val->imm == 0 || val->imm == UINT64_MAX)) {
744
uint64_t imm = val->invert ? ~val->imm : val->imm;
745
return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
746
} else {
747
*val = mi_value_to_gpr(b, *val);
748
return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
749
src, _mi_value_as_gpr(*val));
750
}
751
}
752
753
static inline struct mi_value
754
mi_math_binop(struct mi_builder *b, uint32_t opcode,
755
struct mi_value src0, struct mi_value src1,
756
uint32_t store_op, uint32_t store_src)
757
{
758
struct mi_value dst = mi_new_gpr(b);
759
760
uint32_t dw[4];
761
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
762
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
763
dw[2] = _mi_pack_alu(opcode, 0, 0);
764
dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
765
_mi_builder_push_math(b, dw, 4);
766
767
mi_value_unref(b, src0);
768
mi_value_unref(b, src1);
769
770
return dst;
771
}
772
773
static inline struct mi_value
774
mi_inot(struct mi_builder *b, struct mi_value val)
775
{
776
if (val.type == MI_VALUE_TYPE_IMM)
777
return mi_imm(~mi_value_to_u64(val));
778
779
val.invert = !val.invert;
780
return val;
781
}
782
783
static inline struct mi_value
784
mi_resolve_invert(struct mi_builder *b, struct mi_value src)
785
{
786
if (!src.invert)
787
return src;
788
789
assert(src.type != MI_VALUE_TYPE_IMM);
790
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
791
MI_ALU_STORE, MI_ALU_ACCU);
792
}
793
794
static inline struct mi_value
795
mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
796
{
797
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
798
return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
799
800
return mi_math_binop(b, MI_ALU_ADD, src0, src1,
801
MI_ALU_STORE, MI_ALU_ACCU);
802
}
803
804
static inline struct mi_value
805
mi_iadd_imm(struct mi_builder *b,
806
struct mi_value src, uint64_t N)
807
{
808
if (N == 0)
809
return src;
810
811
return mi_iadd(b, src, mi_imm(N));
812
}
813
814
static inline struct mi_value
815
mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
816
{
817
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
818
return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
819
820
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
821
MI_ALU_STORE, MI_ALU_ACCU);
822
}
823
824
static inline struct mi_value
825
mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
826
{
827
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
828
return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
829
830
/* Compute "equal" by subtracting and storing the zero bit */
831
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
832
MI_ALU_STORE, MI_ALU_ZF);
833
}
834
835
static inline struct mi_value
836
mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
837
{
838
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
839
return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
840
841
/* Compute "not equal" by subtracting and storing the inverse zero bit */
842
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
843
MI_ALU_STOREINV, MI_ALU_ZF);
844
}
845
846
static inline struct mi_value
847
mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
848
{
849
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
850
return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
851
852
/* Compute "less than" by subtracting and storing the carry bit */
853
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
854
MI_ALU_STORE, MI_ALU_CF);
855
}
856
857
static inline struct mi_value
858
mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
859
{
860
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
861
return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
862
863
/* Compute "less than" by subtracting and storing the carry bit */
864
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
865
MI_ALU_STOREINV, MI_ALU_CF);
866
}
867
868
static inline struct mi_value
869
mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
870
{
871
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
872
return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
873
874
return mi_math_binop(b, MI_ALU_AND, src0, src1,
875
MI_ALU_STORE, MI_ALU_ACCU);
876
}
877
878
static inline struct mi_value
879
mi_nz(struct mi_builder *b, struct mi_value src)
880
{
881
if (src.type == MI_VALUE_TYPE_IMM)
882
return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
883
884
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
885
MI_ALU_STOREINV, MI_ALU_ZF);
886
}
887
888
static inline struct mi_value
889
mi_z(struct mi_builder *b, struct mi_value src)
890
{
891
if (src.type == MI_VALUE_TYPE_IMM)
892
return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
893
894
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
895
MI_ALU_STORE, MI_ALU_ZF);
896
}
897
898
static inline struct mi_value
899
mi_ior(struct mi_builder *b,
900
struct mi_value src0, struct mi_value src1)
901
{
902
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
903
return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
904
905
return mi_math_binop(b, MI_ALU_OR, src0, src1,
906
MI_ALU_STORE, MI_ALU_ACCU);
907
}
908
909
#if GFX_VERx10 >= 125
910
static inline struct mi_value
911
mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
912
{
913
if (src1.type == MI_VALUE_TYPE_IMM) {
914
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
915
assert(mi_value_to_u64(src1) <= 32);
916
}
917
918
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
919
return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
920
921
return mi_math_binop(b, MI_ALU_SHL, src0, src1,
922
MI_ALU_STORE, MI_ALU_ACCU);
923
}
924
925
static inline struct mi_value
926
mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
927
{
928
if (src1.type == MI_VALUE_TYPE_IMM) {
929
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
930
assert(mi_value_to_u64(src1) <= 32);
931
}
932
933
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
934
return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
935
936
return mi_math_binop(b, MI_ALU_SHR, src0, src1,
937
MI_ALU_STORE, MI_ALU_ACCU);
938
}
939
940
static inline struct mi_value
941
mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
942
{
943
if (shift == 0)
944
return src;
945
946
if (shift >= 64)
947
return mi_imm(0);
948
949
if (src.type == MI_VALUE_TYPE_IMM)
950
return mi_imm(mi_value_to_u64(src) >> shift);
951
952
struct mi_value res = mi_value_to_gpr(b, src);
953
954
/* Annoyingly, we only have power-of-two shifts */
955
while (shift) {
956
int bit = u_bit_scan(&shift);
957
assert(bit <= 5);
958
res = mi_ushr(b, res, mi_imm(1 << bit));
959
}
960
961
return res;
962
}
963
964
static inline struct mi_value
965
mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
966
{
967
if (src1.type == MI_VALUE_TYPE_IMM) {
968
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
969
assert(mi_value_to_u64(src1) <= 32);
970
}
971
972
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
973
return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
974
975
return mi_math_binop(b, MI_ALU_SAR, src0, src1,
976
MI_ALU_STORE, MI_ALU_ACCU);
977
}
978
979
static inline struct mi_value
980
mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
981
{
982
if (shift == 0)
983
return src;
984
985
if (shift >= 64)
986
return mi_imm(0);
987
988
if (src.type == MI_VALUE_TYPE_IMM)
989
return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
990
991
struct mi_value res = mi_value_to_gpr(b, src);
992
993
/* Annoyingly, we only have power-of-two shifts */
994
while (shift) {
995
int bit = u_bit_scan(&shift);
996
assert(bit <= 5);
997
res = mi_ishr(b, res, mi_imm(1 << bit));
998
}
999
1000
return res;
1001
}
1002
#endif /* if GFX_VERx10 >= 125 */
1003
1004
static inline struct mi_value
1005
mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
1006
{
1007
if (src.type == MI_VALUE_TYPE_IMM)
1008
return mi_imm(mi_value_to_u64(src) * N);
1009
1010
if (N == 0) {
1011
mi_value_unref(b, src);
1012
return mi_imm(0);
1013
}
1014
1015
if (N == 1)
1016
return src;
1017
1018
src = mi_value_to_gpr(b, src);
1019
1020
struct mi_value res = mi_value_ref(b, src);
1021
1022
unsigned top_bit = 31 - __builtin_clz(N);
1023
for (int i = top_bit - 1; i >= 0; i--) {
1024
res = mi_iadd(b, res, mi_value_ref(b, res));
1025
if (N & (1 << i))
1026
res = mi_iadd(b, res, mi_value_ref(b, src));
1027
}
1028
1029
mi_value_unref(b, src);
1030
1031
return res;
1032
}
1033
1034
static inline struct mi_value
1035
mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1036
{
1037
if (shift == 0)
1038
return src;
1039
1040
if (shift >= 64)
1041
return mi_imm(0);
1042
1043
if (src.type == MI_VALUE_TYPE_IMM)
1044
return mi_imm(mi_value_to_u64(src) << shift);
1045
1046
struct mi_value res = mi_value_to_gpr(b, src);
1047
1048
#if GFX_VERx10 >= 125
1049
/* Annoyingly, we only have power-of-two shifts */
1050
while (shift) {
1051
int bit = u_bit_scan(&shift);
1052
assert(bit <= 5);
1053
res = mi_ishl(b, res, mi_imm(1 << bit));
1054
}
1055
#else
1056
for (unsigned i = 0; i < shift; i++)
1057
res = mi_iadd(b, res, mi_value_ref(b, res));
1058
#endif
1059
1060
return res;
1061
}
1062
1063
static inline struct mi_value
1064
mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1065
{
1066
if (shift == 0)
1067
return src;
1068
1069
if (shift >= 64)
1070
return mi_imm(0);
1071
1072
/* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
1073
* of the result.
1074
*/
1075
if (src.type == MI_VALUE_TYPE_IMM)
1076
return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
1077
1078
if (shift > 32) {
1079
struct mi_value tmp = mi_new_gpr(b);
1080
_mi_copy_no_unref(b, mi_value_half(tmp, false),
1081
mi_value_half(src, true));
1082
_mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
1083
mi_value_unref(b, src);
1084
src = tmp;
1085
shift -= 32;
1086
}
1087
assert(shift <= 32);
1088
struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
1089
struct mi_value dst = mi_new_gpr(b);
1090
_mi_copy_no_unref(b, mi_value_half(dst, false),
1091
mi_value_half(tmp, true));
1092
_mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
1093
mi_value_unref(b, tmp);
1094
return dst;
1095
}
1096
1097
static inline struct mi_value
1098
mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
1099
{
1100
if (N.type == MI_VALUE_TYPE_IMM) {
1101
assert(mi_value_to_u64(N) <= UINT32_MAX);
1102
return mi_imm(mi_value_to_u64(N) / D);
1103
}
1104
1105
/* We implicitly assume that N is only a 32-bit value */
1106
if (D == 0) {
1107
/* This is invalid but we should do something */
1108
return mi_imm(0);
1109
} else if (util_is_power_of_two_or_zero(D)) {
1110
return mi_ushr32_imm(b, N, util_logbase2(D));
1111
} else {
1112
struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
1113
assert(m.multiplier <= UINT32_MAX);
1114
1115
if (m.pre_shift)
1116
N = mi_ushr32_imm(b, N, m.pre_shift);
1117
1118
/* Do the 32x32 multiply into gpr0 */
1119
N = mi_imul_imm(b, N, m.multiplier);
1120
1121
if (m.increment)
1122
N = mi_iadd(b, N, mi_imm(m.multiplier));
1123
1124
N = mi_ushr32_imm(b, N, 32);
1125
1126
if (m.post_shift)
1127
N = mi_ushr32_imm(b, N, m.post_shift);
1128
1129
return N;
1130
}
1131
}
1132
1133
#endif /* MI_MATH section */
1134
1135
/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
1136
#if MI_BUILDER_CAN_WRITE_BATCH
1137
1138
struct mi_address_token {
1139
/* Pointers to address memory fields in the batch. */
1140
uint64_t *ptrs[2];
1141
};
1142
1143
static inline struct mi_address_token
1144
mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
1145
{
1146
mi_builder_flush_math(b);
1147
1148
assert(addr_reg.type == MI_VALUE_TYPE_REG64);
1149
1150
struct mi_address_token token = {};
1151
1152
for (unsigned i = 0; i < 2; i++) {
1153
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
1154
srm.RegisterAddress = addr_reg.reg + (i * 4);
1155
1156
const unsigned addr_dw =
1157
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
1158
token.ptrs[i] = (void *)_dst + addr_dw;
1159
}
1160
}
1161
1162
mi_value_unref(b, addr_reg);
1163
return token;
1164
}
1165
1166
static inline void
1167
mi_self_mod_barrier(struct mi_builder *b)
1168
{
1169
/* First make sure all the memory writes from previous modifying commands
1170
* have landed. We want to do this before going through the CS cache,
1171
* otherwise we could be fetching memory that hasn't been written to yet.
1172
*/
1173
mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
1174
pc.CommandStreamerStallEnable = true;
1175
}
1176
/* Documentation says Gfx11+ should be able to invalidate the command cache
1177
* but experiment show it doesn't work properly, so for now just get over
1178
* the CS prefetch.
1179
*/
1180
for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)
1181
mi_builder_emit(b, GENX(MI_NOOP), noop);
1182
}
1183
1184
static inline void
1185
_mi_resolve_address_token(struct mi_builder *b,
1186
struct mi_address_token token,
1187
void *batch_location)
1188
{
1189
__gen_address_type addr = __gen_get_batch_address(b->user_data,
1190
batch_location);
1191
uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
1192
addr, 0);
1193
*(token.ptrs[0]) = addr_addr_u64;
1194
*(token.ptrs[1]) = addr_addr_u64 + 4;
1195
}
1196
1197
#endif /* MI_BUILDER_CAN_WRITE_BATCH */
1198
1199
#if GFX_VERx10 >= 125
1200
1201
/*
1202
* Indirect load/store. Only available on XE_HP+
1203
*/
1204
1205
MUST_CHECK static inline struct mi_value
1206
mi_load_mem64_offset(struct mi_builder *b,
1207
__gen_address_type addr, struct mi_value offset)
1208
{
1209
uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1210
struct mi_value addr_val = mi_imm(addr_u64);
1211
1212
struct mi_value dst = mi_new_gpr(b);
1213
1214
uint32_t dw[5];
1215
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1216
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1217
dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1218
dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
1219
dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
1220
_mi_builder_push_math(b, dw, 5);
1221
1222
mi_value_unref(b, addr_val);
1223
mi_value_unref(b, offset);
1224
1225
return dst;
1226
}
1227
1228
static inline void
1229
mi_store_mem64_offset(struct mi_builder *b,
1230
__gen_address_type addr, struct mi_value offset,
1231
struct mi_value data)
1232
{
1233
uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1234
struct mi_value addr_val = mi_imm(addr_u64);
1235
1236
data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
1237
1238
uint32_t dw[5];
1239
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1240
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1241
dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1242
dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
1243
dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
1244
_mi_builder_push_math(b, dw, 5);
1245
1246
mi_value_unref(b, addr_val);
1247
mi_value_unref(b, offset);
1248
mi_value_unref(b, data);
1249
1250
/* This is the only math case which has side-effects outside of regular
1251
* registers to flush math afterwards so we don't confuse anyone.
1252
*/
1253
mi_builder_flush_math(b);
1254
}
1255
1256
/*
1257
* Control-flow Section. Only available on XE_HP+
1258
*/
1259
1260
struct _mi_goto {
1261
bool predicated;
1262
void *mi_bbs;
1263
};
1264
1265
struct mi_goto_target {
1266
bool placed;
1267
unsigned num_gotos;
1268
struct _mi_goto gotos[8];
1269
__gen_address_type addr;
1270
};
1271
1272
#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
1273
1274
#define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418
1275
1276
static inline void
1277
mi_goto_if(struct mi_builder *b, struct mi_value cond,
1278
struct mi_goto_target *t)
1279
{
1280
/* First, set up the predicate, if any */
1281
bool predicated;
1282
if (cond.type == MI_VALUE_TYPE_IMM) {
1283
/* If it's an immediate, the goto either doesn't happen or happens
1284
* unconditionally.
1285
*/
1286
if (mi_value_to_u64(cond) == 0)
1287
return;
1288
1289
assert(mi_value_to_u64(cond) == ~0ull);
1290
predicated = false;
1291
} else if (mi_value_is_reg(cond) &&
1292
cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
1293
/* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
1294
* provided us with
1295
*/
1296
assert(cond.type == MI_VALUE_TYPE_REG32);
1297
predicated = true;
1298
} else {
1299
mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
1300
predicated = true;
1301
}
1302
1303
if (predicated) {
1304
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1305
sp.PredicateEnable = NOOPOnResultClear;
1306
}
1307
}
1308
if (t->placed) {
1309
mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
1310
bbs.PredicationEnable = predicated;
1311
bbs.AddressSpaceIndicator = ASI_PPGTT;
1312
bbs.BatchBufferStartAddress = t->addr;
1313
}
1314
} else {
1315
assert(t->num_gotos < ARRAY_SIZE(t->gotos));
1316
struct _mi_goto g = {
1317
.predicated = predicated,
1318
.mi_bbs = __gen_get_batch_dwords(b->user_data,
1319
GENX(MI_BATCH_BUFFER_START_length)),
1320
};
1321
memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
1322
t->gotos[t->num_gotos++] = g;
1323
}
1324
if (predicated) {
1325
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1326
sp.PredicateEnable = NOOPNever;
1327
}
1328
}
1329
}
1330
1331
static inline void
1332
mi_goto(struct mi_builder *b, struct mi_goto_target *t)
1333
{
1334
mi_goto_if(b, mi_imm(-1), t);
1335
}
1336
1337
static inline void
1338
mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
1339
{
1340
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1341
sp.PredicateEnable = NOOPNever;
1342
t->addr = __gen_get_batch_address(b->user_data, _dst);
1343
}
1344
t->placed = true;
1345
1346
struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
1347
bbs.AddressSpaceIndicator = ASI_PPGTT;
1348
bbs.BatchBufferStartAddress = t->addr;
1349
1350
for (unsigned i = 0; i < t->num_gotos; i++) {
1351
bbs.PredicationEnable = t->gotos[i].predicated;
1352
GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
1353
}
1354
}
1355
1356
static inline struct mi_goto_target
1357
mi_goto_target_init_and_place(struct mi_builder *b)
1358
{
1359
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1360
mi_goto_target(b, &t);
1361
return t;
1362
}
1363
1364
#define mi_loop(b) \
1365
for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
1366
__continue = mi_goto_target_init_and_place(b); !__break.placed; \
1367
mi_goto(b, &__continue), mi_goto_target(b, &__break))
1368
1369
#define mi_break(b) mi_goto(b, &__break)
1370
#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
1371
#define mi_continue(b) mi_goto(b, &__continue)
1372
#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
1373
1374
#endif /* GFX_VERx10 >= 125 */
1375
1376
#endif /* MI_BUILDER_H */
1377
1378