Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
4574 views
1
/*
2
* Copyright (C) 2009 Nicolai Haehnle.
3
*
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining
7
* a copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sublicense, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial
16
* portions of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
*/
27
28
#include "radeon_program_pair.h"
29
30
#include <stdio.h>
31
32
#include "radeon_compiler.h"
33
#include "radeon_compiler_util.h"
34
#include "radeon_dataflow.h"
35
#include "radeon_list.h"
36
#include "radeon_variable.h"
37
38
#include "util/u_debug.h"
39
40
#define VERBOSE 0
41
42
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43
44
struct schedule_instruction {
45
struct rc_instruction * Instruction;
46
47
/** Next instruction in the linked list of ready instructions. */
48
struct schedule_instruction *NextReady;
49
50
/** Values that this instruction reads and writes */
51
struct reg_value * WriteValues[4];
52
struct reg_value * ReadValues[12];
53
unsigned int NumWriteValues:3;
54
unsigned int NumReadValues:4;
55
56
/**
57
* Number of (read and write) dependencies that must be resolved before
58
* this instruction can be scheduled.
59
*/
60
unsigned int NumDependencies:5;
61
62
/** List of all readers (see rc_get_readers() for the definition of
63
* "all readers"), even those outside the basic block this instruction
64
* lives in. */
65
struct rc_reader_data GlobalReaders;
66
67
/** If the scheduler has paired an RGB and an Alpha instruction together,
68
* PairedInst references the alpha instruction's dependency information.
69
*/
70
struct schedule_instruction * PairedInst;
71
72
/** This scheduler uses the value of Score to determine which
73
* instruction to schedule. Instructions with a higher value of Score
74
* will be scheduled first. */
75
int Score;
76
77
/** The number of components that read from a TEX instruction. */
78
unsigned TexReadCount;
79
80
/** For TEX instructions a list of readers */
81
struct rc_list * TexReaders;
82
};
83
84
85
/**
86
* Used to keep track of which instructions read a value.
87
*/
88
struct reg_value_reader {
89
struct schedule_instruction *Reader;
90
struct reg_value_reader *Next;
91
};
92
93
/**
94
* Used to keep track which values are stored in each component of a
95
* RC_FILE_TEMPORARY.
96
*/
97
struct reg_value {
98
struct schedule_instruction * Writer;
99
100
/**
101
* Unordered linked list of instructions that read from this value.
102
* When this value becomes available, we increase all readers'
103
* dependency count.
104
*/
105
struct reg_value_reader *Readers;
106
107
/**
108
* Number of readers of this value. This is decremented each time
109
* a reader of the value is committed.
110
* When the reader count reaches zero, the dependency count
111
* of the instruction writing \ref Next is decremented.
112
*/
113
unsigned int NumReaders;
114
115
struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
116
};
117
118
struct register_state {
119
struct reg_value * Values[4];
120
};
121
122
struct remap_reg {
123
struct rc_instruction * Inst;
124
unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125
unsigned int OldSwizzle:3;
126
unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127
unsigned int NewSwizzle:3;
128
unsigned int OnlyTexReads:1;
129
struct remap_reg * Next;
130
};
131
132
struct schedule_state {
133
struct radeon_compiler * C;
134
struct schedule_instruction * Current;
135
/** Array of the previous writers of Current's destination register
136
* indexed by channel. */
137
struct schedule_instruction * PrevWriter[4];
138
139
struct register_state Temporary[RC_REGISTER_MAX_INDEX];
140
141
/**
142
* Linked lists of instructions that can be scheduled right now,
143
* based on which ALU/TEX resources they require.
144
*/
145
/*@{*/
146
struct schedule_instruction *ReadyFullALU;
147
struct schedule_instruction *ReadyRGB;
148
struct schedule_instruction *ReadyAlpha;
149
struct schedule_instruction *ReadyTEX;
150
/*@}*/
151
struct rc_list *PendingTEX;
152
153
void (*CalcScore)(struct schedule_instruction *);
154
long max_tex_group;
155
unsigned PrevBlockHasTex:1;
156
unsigned TEXCount;
157
unsigned Opt:1;
158
};
159
160
static struct reg_value ** get_reg_valuep(struct schedule_state * s,
161
rc_register_file file, unsigned int index, unsigned int chan)
162
{
163
if (file != RC_FILE_TEMPORARY)
164
return 0;
165
166
if (index >= RC_REGISTER_MAX_INDEX) {
167
rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
168
return 0;
169
}
170
171
return &s->Temporary[index].Values[chan];
172
}
173
174
static unsigned get_tex_read_count(struct schedule_instruction * sinst)
175
{
176
unsigned tex_read_count = sinst->TexReadCount;
177
if (sinst->PairedInst) {
178
tex_read_count += sinst->PairedInst->TexReadCount;
179
}
180
return tex_read_count;
181
}
182
183
#if VERBOSE
184
static void print_list(struct schedule_instruction * sinst)
185
{
186
struct schedule_instruction * ptr;
187
for (ptr = sinst; ptr; ptr=ptr->NextReady) {
188
unsigned tex_read_count = get_tex_read_count(ptr);
189
unsigned score = sinst->Score;
190
fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
191
tex_read_count);
192
}
193
fprintf(stderr, "\n");
194
}
195
#endif
196
197
static void remove_inst_from_list(struct schedule_instruction ** list,
198
struct schedule_instruction * inst)
199
{
200
struct schedule_instruction * prev = NULL;
201
struct schedule_instruction * list_ptr;
202
for (list_ptr = *list; list_ptr; prev = list_ptr,
203
list_ptr = list_ptr->NextReady) {
204
if (list_ptr == inst) {
205
if (prev) {
206
prev->NextReady = inst->NextReady;
207
} else {
208
*list = inst->NextReady;
209
}
210
inst->NextReady = NULL;
211
break;
212
}
213
}
214
}
215
216
static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
217
{
218
inst->NextReady = *list;
219
*list = inst;
220
}
221
222
static void add_inst_to_list_score(struct schedule_instruction ** list,
223
struct schedule_instruction * inst)
224
{
225
struct schedule_instruction * temp;
226
struct schedule_instruction * prev;
227
if (!*list) {
228
*list = inst;
229
return;
230
}
231
temp = *list;
232
prev = NULL;
233
while(temp && inst->Score <= temp->Score) {
234
prev = temp;
235
temp = temp->NextReady;
236
}
237
238
if (!prev) {
239
inst->NextReady = temp;
240
*list = inst;
241
} else {
242
prev->NextReady = inst;
243
inst->NextReady = temp;
244
}
245
}
246
247
static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
248
{
249
DBG("%i is now ready\n", sinst->Instruction->IP);
250
251
/* Adding Ready TEX instructions to the end of the "Ready List" helps
252
* us emit TEX instructions in blocks without losing our place. */
253
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
254
add_inst_to_list_score(&s->ReadyTEX, sinst);
255
else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
256
add_inst_to_list_score(&s->ReadyRGB, sinst);
257
else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
258
add_inst_to_list_score(&s->ReadyAlpha, sinst);
259
else
260
add_inst_to_list_score(&s->ReadyFullALU, sinst);
261
}
262
263
static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
264
{
265
assert(sinst->NumDependencies > 0);
266
sinst->NumDependencies--;
267
if (!sinst->NumDependencies)
268
instruction_ready(s, sinst);
269
}
270
271
/* These functions provide different heuristics for scheduling instructions.
272
* The default is calc_score_readers. */
273
274
#if 0
275
276
static void calc_score_zero(struct schedule_instruction * sinst)
277
{
278
sinst->Score = 0;
279
}
280
281
static void calc_score_deps(struct schedule_instruction * sinst)
282
{
283
int i;
284
sinst->Score = 0;
285
for (i = 0; i < sinst->NumWriteValues; i++) {
286
struct reg_value * v = sinst->WriteValues[i];
287
if (v->NumReaders) {
288
struct reg_value_reader * r;
289
for (r = v->Readers; r; r = r->Next) {
290
if (r->Reader->NumDependencies == 1) {
291
sinst->Score += 100;
292
}
293
sinst->Score += r->Reader->NumDependencies;
294
}
295
}
296
}
297
}
298
299
#endif
300
301
#define NO_OUTPUT_SCORE (1 << 24)
302
303
static void score_no_output(struct schedule_instruction * sinst)
304
{
305
assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
306
if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
307
!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
308
if (sinst->PairedInst) {
309
if (!sinst->PairedInst->Instruction->U.P.
310
RGB.OutputWriteMask
311
&& !sinst->PairedInst->Instruction->U.P.
312
Alpha.OutputWriteMask) {
313
sinst->Score |= NO_OUTPUT_SCORE;
314
}
315
316
} else {
317
sinst->Score |= NO_OUTPUT_SCORE;
318
}
319
}
320
}
321
322
#define PAIRED_SCORE (1 << 16)
323
324
static void calc_score_r300(struct schedule_instruction * sinst)
325
{
326
unsigned src_idx;
327
328
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
329
sinst->Score = 0;
330
return;
331
}
332
333
score_no_output(sinst);
334
335
if (sinst->PairedInst) {
336
sinst->Score |= PAIRED_SCORE;
337
return;
338
}
339
340
for (src_idx = 0; src_idx < 4; src_idx++) {
341
sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
342
sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
343
}
344
}
345
346
#define NO_READ_TEX_SCORE (1 << 16)
347
348
static void calc_score_readers(struct schedule_instruction * sinst)
349
{
350
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
351
sinst->Score = 0;
352
} else {
353
sinst->Score = sinst->NumReadValues;
354
if (sinst->PairedInst) {
355
sinst->Score += sinst->PairedInst->NumReadValues;
356
}
357
if (get_tex_read_count(sinst) == 0) {
358
sinst->Score |= NO_READ_TEX_SCORE;
359
}
360
score_no_output(sinst);
361
}
362
}
363
364
/**
365
* This function decreases the dependencies of the next instruction that
366
* wants to write to each of sinst's read values.
367
*/
368
static void commit_update_reads(struct schedule_state * s,
369
struct schedule_instruction * sinst){
370
unsigned int i;
371
for(i = 0; i < sinst->NumReadValues; ++i) {
372
struct reg_value * v = sinst->ReadValues[i];
373
assert(v->NumReaders > 0);
374
v->NumReaders--;
375
if (!v->NumReaders) {
376
if (v->Next) {
377
decrease_dependencies(s, v->Next->Writer);
378
}
379
}
380
}
381
if (sinst->PairedInst) {
382
commit_update_reads(s, sinst->PairedInst);
383
}
384
}
385
386
static void commit_update_writes(struct schedule_state * s,
387
struct schedule_instruction * sinst){
388
unsigned int i;
389
for(i = 0; i < sinst->NumWriteValues; ++i) {
390
struct reg_value * v = sinst->WriteValues[i];
391
if (v->NumReaders) {
392
for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
393
decrease_dependencies(s, r->Reader);
394
}
395
} else {
396
/* This happens in instruction sequences of the type
397
* OP r.x, ...;
398
* OP r.x, r.x, ...;
399
* See also the subtlety in how instructions that both
400
* read and write the same register are scanned.
401
*/
402
if (v->Next)
403
decrease_dependencies(s, v->Next->Writer);
404
}
405
}
406
if (sinst->PairedInst) {
407
commit_update_writes(s, sinst->PairedInst);
408
}
409
}
410
411
static void notify_sem_wait(struct schedule_state *s)
412
{
413
struct rc_list * pend_ptr;
414
for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
415
struct rc_list * read_ptr;
416
struct schedule_instruction * pending = pend_ptr->Item;
417
for (read_ptr = pending->TexReaders; read_ptr;
418
read_ptr = read_ptr->Next) {
419
struct schedule_instruction * reader = read_ptr->Item;
420
reader->TexReadCount--;
421
}
422
}
423
s->PendingTEX = NULL;
424
}
425
426
static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
427
{
428
DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
429
430
commit_update_reads(s, sinst);
431
432
commit_update_writes(s, sinst);
433
434
if (get_tex_read_count(sinst) > 0) {
435
sinst->Instruction->U.P.SemWait = 1;
436
notify_sem_wait(s);
437
}
438
}
439
440
/**
441
* Emit all ready texture instructions in a single block.
442
*
443
* Emit as a single block to (hopefully) sample many textures in parallel,
444
* and to avoid hardware indirections on R300.
445
*/
446
static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
447
{
448
struct schedule_instruction *readytex;
449
struct rc_instruction * inst_begin;
450
451
assert(s->ReadyTEX);
452
notify_sem_wait(s);
453
454
/* Node marker for R300 */
455
inst_begin = rc_insert_new_instruction(s->C, before->Prev);
456
inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
457
458
/* Link texture instructions back in */
459
readytex = s->ReadyTEX;
460
while(readytex) {
461
rc_insert_instruction(before->Prev, readytex->Instruction);
462
DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
463
464
/* All of the TEX instructions in the same TEX block have
465
* their source registers read from before any of the
466
* instructions in that block write to their destination
467
* registers. This means that when we commit a TEX
468
* instruction, any other TEX instruction that wants to write
469
* to one of the committed instruction's source register can be
470
* marked as ready and should be emitted in the same TEX
471
* block. This prevents the following sequence from being
472
* emitted in two different TEX blocks:
473
* 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
474
* 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
475
*/
476
commit_update_reads(s, readytex);
477
readytex = readytex->NextReady;
478
}
479
readytex = s->ReadyTEX;
480
s->ReadyTEX = 0;
481
while(readytex){
482
DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
483
commit_update_writes(s, readytex);
484
/* Set semaphore bits for last TEX instruction in the block */
485
if (!readytex->NextReady) {
486
readytex->Instruction->U.I.TexSemAcquire = 1;
487
readytex->Instruction->U.I.TexSemWait = 1;
488
}
489
rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
490
readytex = readytex->NextReady;
491
}
492
}
493
494
/* This is a helper function for destructive_merge_instructions(). It helps
495
* merge presubtract sources from two instructions and makes sure the
496
* presubtract sources end up in the correct spot. This function assumes that
497
* dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
498
* but no scalar instruction (alpha).
499
* @return 0 if merging the presubtract sources fails.
500
* @retrun 1 if merging the presubtract sources succeeds.
501
*/
502
static int merge_presub_sources(
503
struct rc_pair_instruction * dst_full,
504
struct rc_pair_sub_instruction src,
505
unsigned int type)
506
{
507
unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
508
struct rc_pair_sub_instruction * dst_sub;
509
const struct rc_opcode_info * info;
510
511
assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
512
513
switch(type) {
514
case RC_SOURCE_RGB:
515
is_rgb = 1;
516
is_alpha = 0;
517
dst_sub = &dst_full->RGB;
518
break;
519
case RC_SOURCE_ALPHA:
520
is_rgb = 0;
521
is_alpha = 1;
522
dst_sub = &dst_full->Alpha;
523
break;
524
default:
525
assert(0);
526
return 0;
527
}
528
529
info = rc_get_opcode_info(dst_full->RGB.Opcode);
530
531
if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
532
return 0;
533
534
srcp_regs = rc_presubtract_src_reg_count(
535
src.Src[RC_PAIR_PRESUB_SRC].Index);
536
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
537
unsigned int arg;
538
int free_source;
539
unsigned int one_way = 0;
540
struct rc_pair_instruction_source srcp = src.Src[srcp_src];
541
struct rc_pair_instruction_source temp;
542
543
free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
544
srcp.File, srcp.Index);
545
546
/* If free_source < 0 then there are no free source
547
* slots. */
548
if (free_source < 0)
549
return 0;
550
551
temp = dst_sub->Src[srcp_src];
552
dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
553
554
/* srcp needs src0 and src1 to be the same */
555
if (free_source < srcp_src) {
556
if (!temp.Used)
557
continue;
558
free_source = rc_pair_alloc_source(dst_full, is_rgb,
559
is_alpha, temp.File, temp.Index);
560
if (free_source < 0)
561
return 0;
562
one_way = 1;
563
} else {
564
dst_sub->Src[free_source] = temp;
565
}
566
567
/* If free_source == srcp_src, then the presubtract
568
* source is already in the correct place. */
569
if (free_source == srcp_src)
570
continue;
571
572
/* Shuffle the sources, so we can put the
573
* presubtract source in the correct place. */
574
for(arg = 0; arg < info->NumSrcRegs; arg++) {
575
/*If this arg does not read from an rgb source,
576
* do nothing. */
577
if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
578
& type)) {
579
continue;
580
}
581
582
if (dst_full->RGB.Arg[arg].Source == srcp_src)
583
dst_full->RGB.Arg[arg].Source = free_source;
584
/* We need to do this just in case register
585
* is one of the sources already, but in the
586
* wrong spot. */
587
else if(dst_full->RGB.Arg[arg].Source == free_source
588
&& !one_way) {
589
dst_full->RGB.Arg[arg].Source = srcp_src;
590
}
591
}
592
}
593
return 1;
594
}
595
596
597
/* This function assumes that rgb.Alpha and alpha.RGB are unused */
598
static int destructive_merge_instructions(
599
struct rc_pair_instruction * rgb,
600
struct rc_pair_instruction * alpha)
601
{
602
const struct rc_opcode_info * opcode;
603
604
assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
605
assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
606
607
/* Presubtract registers need to be merged first so that registers
608
* needed by the presubtract operation can be placed in src0 and/or
609
* src1. */
610
611
/* Merge the rgb presubtract registers. */
612
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
613
if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
614
return 0;
615
}
616
}
617
/* Merge the alpha presubtract registers */
618
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
619
if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
620
return 0;
621
}
622
}
623
624
/* Copy alpha args into rgb */
625
opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
626
627
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
628
unsigned int srcrgb = 0;
629
unsigned int srcalpha = 0;
630
unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
631
rc_register_file file = 0;
632
unsigned int index = 0;
633
int source;
634
635
if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
636
srcrgb = 1;
637
file = alpha->RGB.Src[oldsrc].File;
638
index = alpha->RGB.Src[oldsrc].Index;
639
} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
640
srcalpha = 1;
641
file = alpha->Alpha.Src[oldsrc].File;
642
index = alpha->Alpha.Src[oldsrc].Index;
643
}
644
645
source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
646
if (source < 0)
647
return 0;
648
649
rgb->Alpha.Arg[arg].Source = source;
650
rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
651
rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
652
rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
653
}
654
655
/* Copy alpha opcode into rgb */
656
rgb->Alpha.Opcode = alpha->Alpha.Opcode;
657
rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
658
rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
659
rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
660
rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
661
rgb->Alpha.Saturate = alpha->Alpha.Saturate;
662
rgb->Alpha.Omod = alpha->Alpha.Omod;
663
664
/* Merge ALU result writing */
665
if (alpha->WriteALUResult) {
666
if (rgb->WriteALUResult)
667
return 0;
668
669
rgb->WriteALUResult = alpha->WriteALUResult;
670
rgb->ALUResultCompare = alpha->ALUResultCompare;
671
}
672
673
/* Copy SemWait */
674
rgb->SemWait |= alpha->SemWait;
675
676
return 1;
677
}
678
679
/**
680
* Try to merge the given instructions into the rgb instructions.
681
*
682
* Return true on success; on failure, return false, and keep
683
* the instructions untouched.
684
*/
685
static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
686
{
687
struct rc_pair_instruction backup;
688
689
/*Instructions can't write output registers and ALU result at the
690
* same time. */
691
if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
692
|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
693
return 0;
694
}
695
696
/* Writing output registers in the middle of shaders is slow, so
697
* we don't want to pair output writes with temp writes. */
698
if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
699
|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
700
return 0;
701
}
702
703
memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
704
705
if (destructive_merge_instructions(rgb, alpha))
706
return 1;
707
708
memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
709
return 0;
710
}
711
712
static void presub_nop(struct rc_instruction * emitted) {
713
int prev_rgb_index, prev_alpha_index, i, num_src;
714
715
/* We don't need a nop if the previous instruction is a TEX. */
716
if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
717
return;
718
}
719
if (emitted->Prev->U.P.RGB.WriteMask)
720
prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
721
else
722
prev_rgb_index = -1;
723
if (emitted->Prev->U.P.Alpha.WriteMask)
724
prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
725
else
726
prev_alpha_index = 1;
727
728
/* Check the previous rgb instruction */
729
if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
730
num_src = rc_presubtract_src_reg_count(
731
emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
732
for (i = 0; i < num_src; i++) {
733
unsigned int index = emitted->U.P.RGB.Src[i].Index;
734
if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
735
&& (index == prev_rgb_index
736
|| index == prev_alpha_index)) {
737
emitted->Prev->U.P.Nop = 1;
738
return;
739
}
740
}
741
}
742
743
/* Check the previous alpha instruction. */
744
if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
745
return;
746
747
num_src = rc_presubtract_src_reg_count(
748
emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
749
for (i = 0; i < num_src; i++) {
750
unsigned int index = emitted->U.P.Alpha.Src[i].Index;
751
if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
752
&& (index == prev_rgb_index || index == prev_alpha_index)) {
753
emitted->Prev->U.P.Nop = 1;
754
return;
755
}
756
}
757
}
758
759
static void rgb_to_alpha_remap (
760
struct rc_instruction * inst,
761
struct rc_pair_instruction_arg * arg,
762
rc_register_file old_file,
763
rc_swizzle old_swz,
764
unsigned int new_index)
765
{
766
int new_src_index;
767
unsigned int i;
768
769
for (i = 0; i < 3; i++) {
770
if (get_swz(arg->Swizzle, i) == old_swz) {
771
SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
772
}
773
}
774
new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
775
old_file, new_index);
776
/* This conversion is not possible, we must have made a mistake in
777
* is_rgb_to_alpha_possible. */
778
if (new_src_index < 0) {
779
assert(0);
780
return;
781
}
782
783
arg->Source = new_src_index;
784
}
785
786
static int can_remap(unsigned int opcode)
787
{
788
switch(opcode) {
789
case RC_OPCODE_DDX:
790
case RC_OPCODE_DDY:
791
return 0;
792
default:
793
return 1;
794
}
795
}
796
797
static int can_convert_opcode_to_alpha(unsigned int opcode)
798
{
799
switch(opcode) {
800
case RC_OPCODE_DDX:
801
case RC_OPCODE_DDY:
802
case RC_OPCODE_DP2:
803
case RC_OPCODE_DP3:
804
case RC_OPCODE_DP4:
805
case RC_OPCODE_DPH:
806
return 0;
807
default:
808
return 1;
809
}
810
}
811
812
static void is_rgb_to_alpha_possible(
813
void * userdata,
814
struct rc_instruction * inst,
815
struct rc_pair_instruction_arg * arg,
816
struct rc_pair_instruction_source * src)
817
{
818
unsigned int read_chan = RC_SWIZZLE_UNUSED;
819
unsigned int alpha_sources = 0;
820
unsigned int i;
821
struct rc_reader_data * reader_data = userdata;
822
823
if (!can_remap(inst->U.P.RGB.Opcode)
824
|| !can_remap(inst->U.P.Alpha.Opcode)) {
825
reader_data->Abort = 1;
826
return;
827
}
828
829
if (!src)
830
return;
831
832
/* XXX There are some cases where we can still do the conversion if
833
* a reader reads from a presubtract source, but for now we'll prevent
834
* it. */
835
if (arg->Source == RC_PAIR_PRESUB_SRC) {
836
reader_data->Abort = 1;
837
return;
838
}
839
840
/* Make sure the source only reads the register component that we
841
* are going to be convering from. It is OK if the instruction uses
842
* this component more than once.
843
* XXX If the index we will be converting to is the same as the
844
* current index, then it is OK to read from more than one component.
845
*/
846
for (i = 0; i < 3; i++) {
847
rc_swizzle swz = get_swz(arg->Swizzle, i);
848
switch(swz) {
849
case RC_SWIZZLE_X:
850
case RC_SWIZZLE_Y:
851
case RC_SWIZZLE_Z:
852
case RC_SWIZZLE_W:
853
if (read_chan == RC_SWIZZLE_UNUSED) {
854
read_chan = swz;
855
} else if (read_chan != swz) {
856
reader_data->Abort = 1;
857
return;
858
}
859
break;
860
default:
861
break;
862
}
863
}
864
865
/* Make sure there are enough alpha sources.
866
* XXX If we know what register all the readers are going
867
* to be remapped to, then in some situations we can still do
868
* the substitution, even if all 3 alpha sources are being used.*/
869
for (i = 0; i < 3; i++) {
870
if (inst->U.P.Alpha.Src[i].Used) {
871
alpha_sources++;
872
}
873
}
874
if (alpha_sources > 2) {
875
reader_data->Abort = 1;
876
return;
877
}
878
}
879
880
static int convert_rgb_to_alpha(
881
struct schedule_state * s,
882
struct schedule_instruction * sched_inst)
883
{
884
struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
885
unsigned int old_mask = pair_inst->RGB.WriteMask;
886
unsigned int old_swz = rc_mask_to_swizzle(old_mask);
887
const struct rc_opcode_info * info =
888
rc_get_opcode_info(pair_inst->RGB.Opcode);
889
int new_index = -1;
890
unsigned int i;
891
892
if (sched_inst->GlobalReaders.Abort)
893
return 0;
894
895
if (!pair_inst->RGB.WriteMask)
896
return 0;
897
898
if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
899
|| !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
900
return 0;
901
}
902
903
assert(sched_inst->NumWriteValues == 1);
904
905
if (!sched_inst->WriteValues[0]) {
906
assert(0);
907
return 0;
908
}
909
910
/* We start at the old index, because if we can reuse the same
911
* register and just change the swizzle then it is more likely we
912
* will be able to convert all the readers. */
913
for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
914
struct reg_value ** new_regvalp = get_reg_valuep(
915
s, RC_FILE_TEMPORARY, i, 3);
916
if (!*new_regvalp) {
917
struct reg_value ** old_regvalp =
918
get_reg_valuep(s,
919
RC_FILE_TEMPORARY,
920
pair_inst->RGB.DestIndex,
921
rc_mask_to_swizzle(old_mask));
922
new_index = i;
923
*new_regvalp = *old_regvalp;
924
*old_regvalp = NULL;
925
new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
926
break;
927
}
928
}
929
if (new_index < 0) {
930
return 0;
931
}
932
933
/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
934
* as the RGB opcode, then the Alpha instruction will already contain
935
* the correct opcode and instruction args, so we do not want to
936
* overwrite them.
937
*/
938
if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
939
pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
940
memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
941
sizeof(pair_inst->Alpha.Arg));
942
}
943
pair_inst->Alpha.DestIndex = new_index;
944
pair_inst->Alpha.WriteMask = RC_MASK_W;
945
pair_inst->Alpha.Target = pair_inst->RGB.Target;
946
pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
947
pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
948
pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
949
pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
950
/* Move the swizzles into the first chan */
951
for (i = 0; i < info->NumSrcRegs; i++) {
952
unsigned int j;
953
for (j = 0; j < 3; j++) {
954
unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
955
if (swz != RC_SWIZZLE_UNUSED) {
956
pair_inst->Alpha.Arg[i].Swizzle =
957
rc_init_swizzle(swz, 1);
958
break;
959
}
960
}
961
}
962
pair_inst->RGB.Opcode = RC_OPCODE_NOP;
963
pair_inst->RGB.DestIndex = 0;
964
pair_inst->RGB.WriteMask = 0;
965
pair_inst->RGB.Target = 0;
966
pair_inst->RGB.OutputWriteMask = 0;
967
pair_inst->RGB.DepthWriteMask = 0;
968
pair_inst->RGB.Saturate = 0;
969
memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
970
971
for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
972
struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
973
rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
974
RC_FILE_TEMPORARY, old_swz, new_index);
975
}
976
return 1;
977
}
978
979
static void try_convert_and_pair(
980
struct schedule_state *s,
981
struct schedule_instruction ** inst_list)
982
{
983
struct schedule_instruction * list_ptr = *inst_list;
984
while (list_ptr && *inst_list && (*inst_list)->NextReady) {
985
int paired = 0;
986
if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
987
&& list_ptr->Instruction->U.P.RGB.Opcode
988
!= RC_OPCODE_REPL_ALPHA) {
989
goto next;
990
}
991
if (list_ptr->NumWriteValues == 1
992
&& convert_rgb_to_alpha(s, list_ptr)) {
993
994
struct schedule_instruction * pair_ptr;
995
remove_inst_from_list(inst_list, list_ptr);
996
add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
997
998
for (pair_ptr = s->ReadyRGB; pair_ptr;
999
pair_ptr = pair_ptr->NextReady) {
1000
if (merge_instructions(&pair_ptr->Instruction->U.P,
1001
&list_ptr->Instruction->U.P)) {
1002
remove_inst_from_list(&s->ReadyAlpha, list_ptr);
1003
remove_inst_from_list(&s->ReadyRGB, pair_ptr);
1004
pair_ptr->PairedInst = list_ptr;
1005
1006
add_inst_to_list(&s->ReadyFullALU, pair_ptr);
1007
list_ptr = *inst_list;
1008
paired = 1;
1009
break;
1010
}
1011
1012
}
1013
}
1014
if (!paired) {
1015
next:
1016
list_ptr = list_ptr->NextReady;
1017
}
1018
}
1019
}
1020
1021
/**
1022
* This function attempts to merge RGB and Alpha instructions together.
1023
*/
1024
static void pair_instructions(struct schedule_state * s)
1025
{
1026
struct schedule_instruction *rgb_ptr;
1027
struct schedule_instruction *alpha_ptr;
1028
1029
/* Some pairings might fail because they require too
1030
* many source slots; try all possible pairings if necessary */
1031
rgb_ptr = s->ReadyRGB;
1032
while(rgb_ptr) {
1033
struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1034
alpha_ptr = s->ReadyAlpha;
1035
while(alpha_ptr) {
1036
struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1037
if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1038
/* Remove RGB and Alpha from their ready lists.
1039
*/
1040
remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1041
remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1042
rgb_ptr->PairedInst = alpha_ptr;
1043
add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1044
break;
1045
}
1046
alpha_ptr = alpha_next;
1047
}
1048
rgb_ptr = rgb_next;
1049
}
1050
1051
if (!s->Opt) {
1052
return;
1053
}
1054
1055
/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1056
* slot can be converted into Alpha instructions. */
1057
try_convert_and_pair(s, &s->ReadyFullALU);
1058
1059
/* Try to convert some of the RGB instructions to Alpha and
1060
* try to pair it with another RGB. */
1061
try_convert_and_pair(s, &s->ReadyRGB);
1062
}
1063
1064
static void update_max_score(
1065
struct schedule_state * s,
1066
struct schedule_instruction ** list,
1067
int * max_score,
1068
struct schedule_instruction ** max_inst_out,
1069
struct schedule_instruction *** list_out)
1070
{
1071
struct schedule_instruction * list_ptr;
1072
for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1073
int score;
1074
s->CalcScore(list_ptr);
1075
score = list_ptr->Score;
1076
if (!*max_inst_out || score > *max_score) {
1077
*max_score = score;
1078
*max_inst_out = list_ptr;
1079
*list_out = list;
1080
}
1081
}
1082
}
1083
1084
static void emit_instruction(
1085
struct schedule_state * s,
1086
struct rc_instruction * before)
1087
{
1088
int max_score = -1;
1089
struct schedule_instruction * max_inst = NULL;
1090
struct schedule_instruction ** max_list = NULL;
1091
unsigned tex_count = 0;
1092
struct schedule_instruction * tex_ptr;
1093
1094
pair_instructions(s);
1095
#if VERBOSE
1096
fprintf(stderr, "Full:\n");
1097
print_list(s->ReadyFullALU);
1098
fprintf(stderr, "RGB:\n");
1099
print_list(s->ReadyRGB);
1100
fprintf(stderr, "Alpha:\n");
1101
print_list(s->ReadyAlpha);
1102
fprintf(stderr, "TEX:\n");
1103
print_list(s->ReadyTEX);
1104
#endif
1105
1106
for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1107
if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1108
emit_all_tex(s, before);
1109
return;
1110
}
1111
tex_count++;
1112
}
1113
update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1114
update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1115
update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1116
1117
if (tex_count >= s->max_tex_group || max_score == -1
1118
|| (s->TEXCount > 0 && tex_count == s->TEXCount)
1119
|| (!s->C->is_r500 && tex_count > 0 && max_score == -1)) {
1120
emit_all_tex(s, before);
1121
} else {
1122
1123
1124
remove_inst_from_list(max_list, max_inst);
1125
rc_insert_instruction(before->Prev, max_inst->Instruction);
1126
commit_alu_instruction(s, max_inst);
1127
1128
presub_nop(before->Prev);
1129
}
1130
}
1131
1132
static void add_tex_reader(
1133
struct schedule_state * s,
1134
struct schedule_instruction * writer,
1135
struct schedule_instruction * reader)
1136
{
1137
if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1138
/*Not a TEX instructions */
1139
return;
1140
}
1141
reader->TexReadCount++;
1142
rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1143
}
1144
1145
static void scan_read(void * data, struct rc_instruction * inst,
1146
rc_register_file file, unsigned int index, unsigned int chan)
1147
{
1148
struct schedule_state * s = data;
1149
struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1150
struct reg_value_reader * reader;
1151
1152
if (!v)
1153
return;
1154
1155
if (*v && (*v)->Writer == s->Current) {
1156
/* The instruction reads and writes to a register component.
1157
* In this case, we only want to increment dependencies by one.
1158
* Why?
1159
* Because each instruction depends on the writers of its source
1160
* registers _and_ the most recent writer of its destination
1161
* register. In this case, the current instruction (s->Current)
1162
* has a dependency that both writes to one of its source
1163
* registers and was the most recent writer to its destination
1164
* register. We have already marked this dependency in
1165
* scan_write(), so we don't need to do it again.
1166
*/
1167
1168
/* We need to make sure we are adding s->Current to the
1169
* previous writer's list of TexReaders, if the previous writer
1170
* was a TEX instruction.
1171
*/
1172
add_tex_reader(s, s->PrevWriter[chan], s->Current);
1173
1174
return;
1175
}
1176
1177
DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1178
1179
reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1180
reader->Reader = s->Current;
1181
if (!*v) {
1182
/* In this situation, the instruction reads from a register
1183
* that hasn't been written to or read from in the current
1184
* block. */
1185
*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1186
memset(*v, 0, sizeof(struct reg_value));
1187
(*v)->Readers = reader;
1188
} else {
1189
reader->Next = (*v)->Readers;
1190
(*v)->Readers = reader;
1191
/* Only update the current instruction's dependencies if the
1192
* register it reads from has been written to in this block. */
1193
if ((*v)->Writer) {
1194
add_tex_reader(s, (*v)->Writer, s->Current);
1195
s->Current->NumDependencies++;
1196
}
1197
}
1198
(*v)->NumReaders++;
1199
1200
if (s->Current->NumReadValues >= 12) {
1201
rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
1202
} else {
1203
s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1204
}
1205
}
1206
1207
static void scan_write(void * data, struct rc_instruction * inst,
1208
rc_register_file file, unsigned int index, unsigned int chan)
1209
{
1210
struct schedule_state * s = data;
1211
struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1212
struct reg_value * newv;
1213
1214
if (!pv)
1215
return;
1216
1217
DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1218
1219
newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1220
memset(newv, 0, sizeof(*newv));
1221
1222
newv->Writer = s->Current;
1223
1224
if (*pv) {
1225
(*pv)->Next = newv;
1226
s->Current->NumDependencies++;
1227
/* Keep track of the previous writer to s->Current's destination
1228
* register */
1229
s->PrevWriter[chan] = (*pv)->Writer;
1230
}
1231
1232
*pv = newv;
1233
1234
if (s->Current->NumWriteValues >= 4) {
1235
rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
1236
} else {
1237
s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1238
}
1239
}
1240
1241
static void is_rgb_to_alpha_possible_normal(
1242
void * userdata,
1243
struct rc_instruction * inst,
1244
struct rc_src_register * src)
1245
{
1246
struct rc_reader_data * reader_data = userdata;
1247
reader_data->Abort = 1;
1248
1249
}
1250
1251
static void schedule_block(struct schedule_state * s,
1252
struct rc_instruction * begin, struct rc_instruction * end)
1253
{
1254
unsigned int ip;
1255
1256
/* Scan instructions for data dependencies */
1257
ip = 0;
1258
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1259
s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1260
memset(s->Current, 0, sizeof(struct schedule_instruction));
1261
1262
if (inst->Type == RC_INSTRUCTION_NORMAL) {
1263
const struct rc_opcode_info * info =
1264
rc_get_opcode_info(inst->U.I.Opcode);
1265
if (info->HasTexture) {
1266
s->TEXCount++;
1267
}
1268
}
1269
1270
/* XXX: This causes SemWait to be set for all instructions in
1271
* a block if the previous block contained a TEX instruction.
1272
* We can do better here, but it will take a lot of work. */
1273
if (s->PrevBlockHasTex) {
1274
s->Current->TexReadCount = 1;
1275
}
1276
1277
s->Current->Instruction = inst;
1278
inst->IP = ip++;
1279
1280
DBG("%i: Scanning\n", inst->IP);
1281
1282
/* The order of things here is subtle and maybe slightly
1283
* counter-intuitive, to account for the case where an
1284
* instruction writes to the same register as it reads
1285
* from. */
1286
rc_for_all_writes_chan(inst, &scan_write, s);
1287
rc_for_all_reads_chan(inst, &scan_read, s);
1288
1289
DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1290
1291
if (!s->Current->NumDependencies) {
1292
instruction_ready(s, s->Current);
1293
}
1294
1295
/* Get global readers for possible RGB->Alpha conversion. */
1296
s->Current->GlobalReaders.ExitOnAbort = 1;
1297
rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1298
is_rgb_to_alpha_possible_normal,
1299
is_rgb_to_alpha_possible, NULL);
1300
}
1301
1302
/* Temporarily unlink all instructions */
1303
begin->Prev->Next = end;
1304
end->Prev = begin->Prev;
1305
1306
/* Schedule instructions back */
1307
while(!s->C->Error &&
1308
(s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1309
emit_instruction(s, end);
1310
}
1311
}
1312
1313
static int is_controlflow(struct rc_instruction * inst)
1314
{
1315
if (inst->Type == RC_INSTRUCTION_NORMAL) {
1316
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1317
return opcode->IsFlowControl;
1318
}
1319
return 0;
1320
}
1321
1322
void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1323
{
1324
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1325
struct schedule_state s;
1326
struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1327
unsigned int * opt = user;
1328
1329
memset(&s, 0, sizeof(s));
1330
s.Opt = *opt;
1331
s.C = &c->Base;
1332
if (s.C->is_r500) {
1333
s.CalcScore = calc_score_readers;
1334
} else {
1335
s.CalcScore = calc_score_r300;
1336
}
1337
s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1338
while(inst != &c->Base.Program.Instructions) {
1339
struct rc_instruction * first;
1340
1341
if (is_controlflow(inst)) {
1342
inst = inst->Next;
1343
continue;
1344
}
1345
1346
first = inst;
1347
1348
while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1349
inst = inst->Next;
1350
1351
DBG("Schedule one block\n");
1352
memset(s.Temporary, 0, sizeof(s.Temporary));
1353
s.TEXCount = 0;
1354
schedule_block(&s, first, inst);
1355
if (s.PendingTEX) {
1356
s.PrevBlockHasTex = 1;
1357
}
1358
}
1359
}
1360
1361