Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/compiler/brw_fs_cmod_propagation.cpp
4550 views
1
/*
2
* Copyright © 2014 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "brw_fs.h"
25
#include "brw_cfg.h"
26
#include "brw_eu.h"
27
28
/** @file brw_fs_cmod_propagation.cpp
29
*
30
* Implements a pass that propagates the conditional modifier from a CMP x 0.0
31
* instruction into the instruction that generated x. For instance, in this
32
* sequence
33
*
34
* add(8) g70<1>F g69<8,8,1>F 4096F
35
* cmp.ge.f0(8) null g70<8,8,1>F 0F
36
*
37
* we can do the comparison as part of the ADD instruction directly:
38
*
39
* add.ge.f0(8) g70<1>F g69<8,8,1>F 4096F
40
*
41
* If there had been a use of the flag register and another CMP using g70
42
*
43
* add.ge.f0(8) g70<1>F g69<8,8,1>F 4096F
44
* (+f0) sel(8) g71<F> g72<8,8,1>F g73<8,8,1>F
45
* cmp.ge.f0(8) null g70<8,8,1>F 0F
46
*
47
* we can recognize that the CMP is generating the flag value that already
48
* exists and therefore remove the instruction.
49
*/
50
51
using namespace brw;
52
53
static bool
54
cmod_propagate_cmp_to_add(const intel_device_info *devinfo, bblock_t *block,
55
fs_inst *inst)
56
{
57
bool read_flag = false;
58
const unsigned flags_written = inst->flags_written(devinfo);
59
60
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
61
if (scan_inst->opcode == BRW_OPCODE_ADD &&
62
!scan_inst->is_partial_write() &&
63
scan_inst->exec_size == inst->exec_size) {
64
bool negate;
65
66
/* A CMP is basically a subtraction. The result of the
67
* subtraction must be the same as the result of the addition.
68
* This means that one of the operands must be negated. So (a +
69
* b) vs (a == -b) or (a + -b) vs (a == b).
70
*/
71
if ((inst->src[0].equals(scan_inst->src[0]) &&
72
inst->src[1].negative_equals(scan_inst->src[1])) ||
73
(inst->src[0].equals(scan_inst->src[1]) &&
74
inst->src[1].negative_equals(scan_inst->src[0]))) {
75
negate = false;
76
} else if ((inst->src[0].negative_equals(scan_inst->src[0]) &&
77
inst->src[1].equals(scan_inst->src[1])) ||
78
(inst->src[0].negative_equals(scan_inst->src[1]) &&
79
inst->src[1].equals(scan_inst->src[0]))) {
80
negate = true;
81
} else {
82
goto not_match;
83
}
84
85
/* If the scan instruction writes a different flag register than the
86
* instruction we're trying to propagate from, bail.
87
*
88
* FINISHME: The second part of the condition may be too strong.
89
* Perhaps (scan_inst->flags_written() & flags_written) !=
90
* flags_written?
91
*/
92
if (scan_inst->flags_written(devinfo) != 0 &&
93
scan_inst->flags_written(devinfo) != flags_written)
94
goto not_match;
95
96
/* From the Kaby Lake PRM Vol. 7 "Assigning Conditional Flags":
97
*
98
* * Note that the [post condition signal] bits generated at
99
* the output of a compute are before the .sat.
100
*
101
* Paragraph about post_zero does not mention saturation, but
102
* testing it on actual GPUs shows that conditional modifiers
103
* are applied after saturation.
104
*
105
* * post_zero bit: This bit reflects whether the final
106
* result is zero after all the clamping, normalizing,
107
* or format conversion logic.
108
*
109
* For signed types we don't care about saturation: it won't
110
* change the result of conditional modifier.
111
*
112
* For floating and unsigned types there two special cases,
113
* when we can remove inst even if scan_inst is saturated: G
114
* and LE. Since conditional modifiers are just comparations
115
* against zero, saturating positive values to the upper
116
* limit never changes the result of comparation.
117
*
118
* For negative values:
119
* (sat(x) > 0) == (x > 0) --- false
120
* (sat(x) <= 0) == (x <= 0) --- true
121
*/
122
const enum brw_conditional_mod cond =
123
negate ? brw_swap_cmod(inst->conditional_mod)
124
: inst->conditional_mod;
125
126
if (scan_inst->saturate &&
127
(brw_reg_type_is_floating_point(scan_inst->dst.type) ||
128
type_is_unsigned_int(scan_inst->dst.type)) &&
129
(cond != BRW_CONDITIONAL_G &&
130
cond != BRW_CONDITIONAL_LE))
131
goto not_match;
132
133
/* Otherwise, try propagating the conditional. */
134
if (scan_inst->can_do_cmod() &&
135
((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
136
scan_inst->conditional_mod == cond)) {
137
scan_inst->conditional_mod = cond;
138
inst->remove(block, true);
139
return true;
140
}
141
break;
142
}
143
144
not_match:
145
if ((scan_inst->flags_written(devinfo) & flags_written) != 0)
146
break;
147
148
read_flag = read_flag ||
149
(scan_inst->flags_read(devinfo) & flags_written) != 0;
150
}
151
152
return false;
153
}
154
155
/**
156
* Propagate conditional modifiers from NOT instructions
157
*
158
* Attempt to convert sequences like
159
*
160
* or(8) g78<8,8,1> g76<8,8,1>UD g77<8,8,1>UD
161
* ...
162
* not.nz.f0(8) null g78<8,8,1>UD
163
*
164
* into
165
*
166
* or.z.f0(8) g78<8,8,1> g76<8,8,1>UD g77<8,8,1>UD
167
*/
168
static bool
169
cmod_propagate_not(const intel_device_info *devinfo, bblock_t *block,
170
fs_inst *inst)
171
{
172
const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod);
173
bool read_flag = false;
174
const unsigned flags_written = inst->flags_written(devinfo);
175
176
if (cond != BRW_CONDITIONAL_Z && cond != BRW_CONDITIONAL_NZ)
177
return false;
178
179
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
180
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
181
inst->src[0], inst->size_read(0))) {
182
if (scan_inst->opcode != BRW_OPCODE_OR &&
183
scan_inst->opcode != BRW_OPCODE_AND)
184
break;
185
186
if (scan_inst->is_partial_write() ||
187
scan_inst->dst.offset != inst->src[0].offset ||
188
scan_inst->exec_size != inst->exec_size)
189
break;
190
191
/* If the scan instruction writes a different flag register than the
192
* instruction we're trying to propagate from, bail.
193
*
194
* FINISHME: The second part of the condition may be too strong.
195
* Perhaps (scan_inst->flags_written() & flags_written) !=
196
* flags_written?
197
*/
198
if (scan_inst->flags_written(devinfo) != 0 &&
199
scan_inst->flags_written(devinfo) != flags_written)
200
break;
201
202
if (scan_inst->can_do_cmod() &&
203
((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
204
scan_inst->conditional_mod == cond)) {
205
scan_inst->conditional_mod = cond;
206
inst->remove(block, true);
207
return true;
208
}
209
break;
210
}
211
212
if ((scan_inst->flags_written(devinfo) & flags_written) != 0)
213
break;
214
215
read_flag = read_flag ||
216
(scan_inst->flags_read(devinfo) & flags_written) != 0;
217
}
218
219
return false;
220
}
221
222
static bool
223
opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
224
{
225
bool progress = false;
226
int ip = block->end_ip + 1;
227
228
foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
229
ip--;
230
231
if ((inst->opcode != BRW_OPCODE_AND &&
232
inst->opcode != BRW_OPCODE_CMP &&
233
inst->opcode != BRW_OPCODE_MOV &&
234
inst->opcode != BRW_OPCODE_NOT) ||
235
inst->predicate != BRW_PREDICATE_NONE ||
236
!inst->dst.is_null() ||
237
(inst->src[0].file != VGRF && inst->src[0].file != ATTR &&
238
inst->src[0].file != UNIFORM))
239
continue;
240
241
/* An ABS source modifier can only be handled when processing a compare
242
* with a value other than zero.
243
*/
244
if (inst->src[0].abs &&
245
(inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero()))
246
continue;
247
248
/* Only an AND.NZ can be propagated. Many AND.Z instructions are
249
* generated (for ir_unop_not in fs_visitor::emit_bool_to_cond_code).
250
* Propagating those would require inverting the condition on the CMP.
251
* This changes both the flag value and the register destination of the
252
* CMP. That result may be used elsewhere, so we can't change its value
253
* on a whim.
254
*/
255
if (inst->opcode == BRW_OPCODE_AND &&
256
!(inst->src[1].is_one() &&
257
inst->conditional_mod == BRW_CONDITIONAL_NZ &&
258
!inst->src[0].negate))
259
continue;
260
261
if (inst->opcode == BRW_OPCODE_MOV &&
262
inst->conditional_mod != BRW_CONDITIONAL_NZ)
263
continue;
264
265
/* A CMP with a second source of zero can match with anything. A CMP
266
* with a second source that is not zero can only match with an ADD
267
* instruction.
268
*
269
* Only apply this optimization to float-point sources. It can fail for
270
* integers. For inputs a = 0x80000000, b = 4, int(0x80000000) < 4, but
271
* int(0x80000000) - 4 overflows and results in 0x7ffffffc. that's not
272
* less than zero, so the flags get set differently than for (a < b).
273
*/
274
if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) {
275
if (brw_reg_type_is_floating_point(inst->src[0].type) &&
276
cmod_propagate_cmp_to_add(devinfo, block, inst))
277
progress = true;
278
279
continue;
280
}
281
282
if (inst->opcode == BRW_OPCODE_NOT) {
283
progress = cmod_propagate_not(devinfo, block, inst) || progress;
284
continue;
285
}
286
287
bool read_flag = false;
288
const unsigned flags_written = inst->flags_written(devinfo);
289
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
290
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
291
inst->src[0], inst->size_read(0))) {
292
/* If the scan instruction writes a different flag register than
293
* the instruction we're trying to propagate from, bail.
294
*
295
* FINISHME: The second part of the condition may be too strong.
296
* Perhaps (scan_inst->flags_written() & flags_written) !=
297
* flags_written?
298
*/
299
if (scan_inst->flags_written(devinfo) != 0 &&
300
scan_inst->flags_written(devinfo) != flags_written)
301
break;
302
303
if (scan_inst->is_partial_write() ||
304
scan_inst->dst.offset != inst->src[0].offset ||
305
scan_inst->exec_size != inst->exec_size)
306
break;
307
308
/* CMP's result is the same regardless of dest type. */
309
if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
310
scan_inst->opcode == BRW_OPCODE_CMP &&
311
brw_reg_type_is_integer(inst->dst.type)) {
312
inst->remove(block, true);
313
progress = true;
314
break;
315
}
316
317
/* If the AND wasn't handled by the previous case, it isn't safe
318
* to remove it.
319
*/
320
if (inst->opcode == BRW_OPCODE_AND)
321
break;
322
323
/* Not safe to use inequality operators if the types are different
324
*/
325
if (scan_inst->dst.type != inst->src[0].type &&
326
inst->conditional_mod != BRW_CONDITIONAL_Z &&
327
inst->conditional_mod != BRW_CONDITIONAL_NZ)
328
break;
329
330
/* Comparisons operate differently for ints and floats */
331
if (scan_inst->dst.type != inst->dst.type) {
332
/* Comparison result may be altered if the bit-size changes
333
* since that affects range, denorms, etc
334
*/
335
if (type_sz(scan_inst->dst.type) != type_sz(inst->dst.type))
336
break;
337
338
/* We should propagate from a MOV to another instruction in a
339
* sequence like:
340
*
341
* and(16) g31<1>UD g20<8,8,1>UD g22<8,8,1>UD
342
* mov.nz.f0(16) null<1>F g31<8,8,1>D
343
*/
344
if (inst->opcode == BRW_OPCODE_MOV) {
345
if ((inst->src[0].type != BRW_REGISTER_TYPE_D &&
346
inst->src[0].type != BRW_REGISTER_TYPE_UD) ||
347
(scan_inst->dst.type != BRW_REGISTER_TYPE_D &&
348
scan_inst->dst.type != BRW_REGISTER_TYPE_UD)) {
349
break;
350
}
351
} else if (brw_reg_type_is_floating_point(scan_inst->dst.type) !=
352
brw_reg_type_is_floating_point(inst->dst.type)) {
353
break;
354
}
355
}
356
357
/* Knowing following:
358
* - CMP writes to flag register the result of
359
* applying cmod to the `src0 - src1`.
360
* After that it stores the same value to dst.
361
* Other instructions first store their result to
362
* dst, and then store cmod(dst) to the flag
363
* register.
364
* - inst is either CMP or MOV
365
* - inst->dst is null
366
* - inst->src[0] overlaps with scan_inst->dst
367
* - inst->src[1] is zero
368
* - scan_inst wrote to a flag register
369
*
370
* There can be three possible paths:
371
*
372
* - scan_inst is CMP:
373
*
374
* Considering that src0 is either 0x0 (false),
375
* or 0xffffffff (true), and src1 is 0x0:
376
*
377
* - If inst's cmod is NZ, we can always remove
378
* scan_inst: NZ is invariant for false and true. This
379
* holds even if src0 is NaN: .nz is the only cmod,
380
* that returns true for NaN.
381
*
382
* - .g is invariant if src0 has a UD type
383
*
384
* - .l is invariant if src0 has a D type
385
*
386
* - scan_inst and inst have the same cmod:
387
*
388
* If scan_inst is anything than CMP, it already
389
* wrote the appropriate value to the flag register.
390
*
391
* - else:
392
*
393
* We can change cmod of scan_inst to that of inst,
394
* and remove inst. It is valid as long as we make
395
* sure that no instruction uses the flag register
396
* between scan_inst and inst.
397
*/
398
if (!inst->src[0].negate &&
399
scan_inst->flags_written(devinfo)) {
400
if (scan_inst->opcode == BRW_OPCODE_CMP) {
401
if ((inst->conditional_mod == BRW_CONDITIONAL_NZ) ||
402
(inst->conditional_mod == BRW_CONDITIONAL_G &&
403
inst->src[0].type == BRW_REGISTER_TYPE_UD) ||
404
(inst->conditional_mod == BRW_CONDITIONAL_L &&
405
inst->src[0].type == BRW_REGISTER_TYPE_D)) {
406
inst->remove(block, true);
407
progress = true;
408
break;
409
}
410
} else if (scan_inst->conditional_mod == inst->conditional_mod) {
411
/* On Gfx4 and Gfx5 sel.cond will dirty the flags, but the
412
* flags value is not based on the result stored in the
413
* destination. On all other platforms sel.cond will not
414
* write the flags, so execution will not get to this point.
415
*/
416
if (scan_inst->opcode == BRW_OPCODE_SEL) {
417
assert(devinfo->ver <= 5);
418
} else {
419
inst->remove(block, true);
420
progress = true;
421
}
422
423
break;
424
} else if (!read_flag) {
425
scan_inst->conditional_mod = inst->conditional_mod;
426
inst->remove(block, true);
427
progress = true;
428
break;
429
}
430
}
431
432
/* The conditional mod of the CMP/CMPN instructions behaves
433
* specially because the flag output is not calculated from the
434
* result of the instruction, but the other way around, which
435
* means that even if the condmod to propagate and the condmod
436
* from the CMP instruction are the same they will in general give
437
* different results because they are evaluated based on different
438
* inputs.
439
*/
440
if (scan_inst->opcode == BRW_OPCODE_CMP ||
441
scan_inst->opcode == BRW_OPCODE_CMPN)
442
break;
443
444
/* From the Sky Lake PRM, Vol 2a, "Multiply":
445
*
446
* "When multiplying integer data types, if one of the sources
447
* is a DW, the resulting full precision data is stored in
448
* the accumulator. However, if the destination data type is
449
* either W or DW, the low bits of the result are written to
450
* the destination register and the remaining high bits are
451
* discarded. This results in undefined Overflow and Sign
452
* flags. Therefore, conditional modifiers and saturation
453
* (.sat) cannot be used in this case."
454
*
455
* We just disallow cmod propagation on all integer multiplies.
456
*/
457
if (!brw_reg_type_is_floating_point(scan_inst->dst.type) &&
458
scan_inst->opcode == BRW_OPCODE_MUL)
459
break;
460
461
enum brw_conditional_mod cond =
462
inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
463
: inst->conditional_mod;
464
465
/* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
466
*
467
* * Note that the [post condition signal] bits generated at
468
* the output of a compute are before the .sat.
469
*
470
* This limits the cases where we can propagate the conditional
471
* modifier. If scan_inst has a saturate modifier, then we can
472
* only propagate from inst if inst is 'scan_inst <= 0',
473
* 'scan_inst == 0', 'scan_inst != 0', or 'scan_inst > 0'. If
474
* inst is 'scan_inst == 0', the conditional modifier must be
475
* replace with LE. Likewise, if inst is 'scan_inst != 0', the
476
* conditional modifier must be replace with G.
477
*
478
* The only other cases are 'scan_inst < 0' (which is a
479
* contradiction) and 'scan_inst >= 0' (which is a tautology).
480
*/
481
if (scan_inst->saturate) {
482
if (scan_inst->dst.type != BRW_REGISTER_TYPE_F)
483
break;
484
485
if (cond != BRW_CONDITIONAL_Z &&
486
cond != BRW_CONDITIONAL_NZ &&
487
cond != BRW_CONDITIONAL_LE &&
488
cond != BRW_CONDITIONAL_G)
489
break;
490
491
if (inst->opcode != BRW_OPCODE_MOV &&
492
inst->opcode != BRW_OPCODE_CMP)
493
break;
494
495
/* inst->src[1].is_zero() was tested before, but be safe
496
* against possible future changes in this code.
497
*/
498
assert(inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero());
499
500
if (cond == BRW_CONDITIONAL_Z)
501
cond = BRW_CONDITIONAL_LE;
502
else if (cond == BRW_CONDITIONAL_NZ)
503
cond = BRW_CONDITIONAL_G;
504
}
505
506
/* Otherwise, try propagating the conditional. */
507
if (scan_inst->can_do_cmod() &&
508
((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
509
scan_inst->conditional_mod == cond)) {
510
scan_inst->conditional_mod = cond;
511
scan_inst->flag_subreg = inst->flag_subreg;
512
inst->remove(block, true);
513
progress = true;
514
}
515
break;
516
}
517
518
if ((scan_inst->flags_written(devinfo) & flags_written) != 0)
519
break;
520
521
read_flag = read_flag ||
522
(scan_inst->flags_read(devinfo) & flags_written) != 0;
523
}
524
}
525
526
/* There is progress if and only if instructions were removed. */
527
assert(progress == (block->end_ip_delta != 0));
528
529
return progress;
530
}
531
532
bool
533
fs_visitor::opt_cmod_propagation()
534
{
535
bool progress = false;
536
537
foreach_block_reverse(block, cfg) {
538
progress = opt_cmod_propagation_local(devinfo, block) || progress;
539
}
540
541
if (progress) {
542
cfg->adjust_block_ips();
543
544
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
545
}
546
547
return progress;
548
}
549
550