Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/gpu/drm/radeon/evergreen_blit_kms.c
15113 views
1
/*
2
* Copyright 2010 Advanced Micro Devices, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
22
*
23
* Authors:
24
* Alex Deucher <[email protected]>
25
*/
26
27
#include "drmP.h"
28
#include "drm.h"
29
#include "radeon_drm.h"
30
#include "radeon.h"
31
32
#include "evergreend.h"
33
#include "evergreen_blit_shaders.h"
34
#include "cayman_blit_shaders.h"
35
36
#define DI_PT_RECTLIST 0x11
37
#define DI_INDEX_SIZE_16_BIT 0x0
38
#define DI_SRC_SEL_AUTO_INDEX 0x2
39
40
#define FMT_8 0x1
41
#define FMT_5_6_5 0x8
42
#define FMT_8_8_8_8 0x1a
43
#define COLOR_8 0x1
44
#define COLOR_5_6_5 0x8
45
#define COLOR_8_8_8_8 0x1a
46
47
/* emits 17 */
48
static void
49
set_render_target(struct radeon_device *rdev, int format,
50
int w, int h, u64 gpu_addr)
51
{
52
u32 cb_color_info;
53
int pitch, slice;
54
55
h = ALIGN(h, 8);
56
if (h < 8)
57
h = 8;
58
59
cb_color_info = ((format << 2) | (1 << 24) | (1 << 8));
60
pitch = (w / 8) - 1;
61
slice = ((w * h) / 64) - 1;
62
63
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 15));
64
radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2);
65
radeon_ring_write(rdev, gpu_addr >> 8);
66
radeon_ring_write(rdev, pitch);
67
radeon_ring_write(rdev, slice);
68
radeon_ring_write(rdev, 0);
69
radeon_ring_write(rdev, cb_color_info);
70
radeon_ring_write(rdev, (1 << 4));
71
radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
72
radeon_ring_write(rdev, 0);
73
radeon_ring_write(rdev, 0);
74
radeon_ring_write(rdev, 0);
75
radeon_ring_write(rdev, 0);
76
radeon_ring_write(rdev, 0);
77
radeon_ring_write(rdev, 0);
78
radeon_ring_write(rdev, 0);
79
radeon_ring_write(rdev, 0);
80
}
81
82
/* emits 5dw */
83
static void
84
cp_set_surface_sync(struct radeon_device *rdev,
85
u32 sync_type, u32 size,
86
u64 mc_addr)
87
{
88
u32 cp_coher_size;
89
90
if (size == 0xffffffff)
91
cp_coher_size = 0xffffffff;
92
else
93
cp_coher_size = ((size + 255) >> 8);
94
95
radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
96
radeon_ring_write(rdev, sync_type);
97
radeon_ring_write(rdev, cp_coher_size);
98
radeon_ring_write(rdev, mc_addr >> 8);
99
radeon_ring_write(rdev, 10); /* poll interval */
100
}
101
102
/* emits 11dw + 1 surface sync = 16dw */
103
static void
104
set_shaders(struct radeon_device *rdev)
105
{
106
u64 gpu_addr;
107
108
/* VS */
109
gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
110
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 3));
111
radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2);
112
radeon_ring_write(rdev, gpu_addr >> 8);
113
radeon_ring_write(rdev, 2);
114
radeon_ring_write(rdev, 0);
115
116
/* PS */
117
gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
118
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 4));
119
radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2);
120
radeon_ring_write(rdev, gpu_addr >> 8);
121
radeon_ring_write(rdev, 1);
122
radeon_ring_write(rdev, 0);
123
radeon_ring_write(rdev, 2);
124
125
gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
126
cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
127
}
128
129
/* emits 10 + 1 sync (5) = 15 */
130
static void
131
set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
132
{
133
u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
134
135
/* high addr, stride */
136
sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
137
#ifdef __BIG_ENDIAN
138
sq_vtx_constant_word2 |= (2 << 30);
139
#endif
140
/* xyzw swizzles */
141
sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
142
143
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
144
radeon_ring_write(rdev, 0x580);
145
radeon_ring_write(rdev, gpu_addr & 0xffffffff);
146
radeon_ring_write(rdev, 48 - 1); /* size */
147
radeon_ring_write(rdev, sq_vtx_constant_word2);
148
radeon_ring_write(rdev, sq_vtx_constant_word3);
149
radeon_ring_write(rdev, 0);
150
radeon_ring_write(rdev, 0);
151
radeon_ring_write(rdev, 0);
152
radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
153
154
if ((rdev->family == CHIP_CEDAR) ||
155
(rdev->family == CHIP_PALM) ||
156
(rdev->family == CHIP_SUMO) ||
157
(rdev->family == CHIP_SUMO2) ||
158
(rdev->family == CHIP_CAICOS))
159
cp_set_surface_sync(rdev,
160
PACKET3_TC_ACTION_ENA, 48, gpu_addr);
161
else
162
cp_set_surface_sync(rdev,
163
PACKET3_VC_ACTION_ENA, 48, gpu_addr);
164
165
}
166
167
/* emits 10 */
168
static void
169
set_tex_resource(struct radeon_device *rdev,
170
int format, int w, int h, int pitch,
171
u64 gpu_addr)
172
{
173
u32 sq_tex_resource_word0, sq_tex_resource_word1;
174
u32 sq_tex_resource_word4, sq_tex_resource_word7;
175
176
if (h < 1)
177
h = 1;
178
179
sq_tex_resource_word0 = (1 << 0); /* 2D */
180
sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
181
((w - 1) << 18));
182
sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28);
183
/* xyzw swizzles */
184
sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
185
186
sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30);
187
188
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
189
radeon_ring_write(rdev, 0);
190
radeon_ring_write(rdev, sq_tex_resource_word0);
191
radeon_ring_write(rdev, sq_tex_resource_word1);
192
radeon_ring_write(rdev, gpu_addr >> 8);
193
radeon_ring_write(rdev, gpu_addr >> 8);
194
radeon_ring_write(rdev, sq_tex_resource_word4);
195
radeon_ring_write(rdev, 0);
196
radeon_ring_write(rdev, 0);
197
radeon_ring_write(rdev, sq_tex_resource_word7);
198
}
199
200
/* emits 12 */
201
static void
202
set_scissors(struct radeon_device *rdev, int x1, int y1,
203
int x2, int y2)
204
{
205
/* workaround some hw bugs */
206
if (x2 == 0)
207
x1 = 1;
208
if (y2 == 0)
209
y1 = 1;
210
if (rdev->family == CHIP_CAYMAN) {
211
if ((x2 == 1) && (y2 == 1))
212
x2 = 2;
213
}
214
215
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
216
radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
217
radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
218
radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
219
220
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
221
radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
222
radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
223
radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
224
225
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
226
radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
227
radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
228
radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
229
}
230
231
/* emits 10 */
232
static void
233
draw_auto(struct radeon_device *rdev)
234
{
235
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
236
radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2);
237
radeon_ring_write(rdev, DI_PT_RECTLIST);
238
239
radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
240
radeon_ring_write(rdev,
241
#ifdef __BIG_ENDIAN
242
(2 << 2) |
243
#endif
244
DI_INDEX_SIZE_16_BIT);
245
246
radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
247
radeon_ring_write(rdev, 1);
248
249
radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
250
radeon_ring_write(rdev, 3);
251
radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
252
253
}
254
255
/* emits 39 */
256
static void
257
set_default_state(struct radeon_device *rdev)
258
{
259
u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
260
u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
261
u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
262
int num_ps_gprs, num_vs_gprs, num_temp_gprs;
263
int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs;
264
int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
265
int num_hs_threads, num_ls_threads;
266
int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
267
int num_hs_stack_entries, num_ls_stack_entries;
268
u64 gpu_addr;
269
int dwords;
270
271
/* set clear context state */
272
radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
273
radeon_ring_write(rdev, 0);
274
275
if (rdev->family < CHIP_CAYMAN) {
276
switch (rdev->family) {
277
case CHIP_CEDAR:
278
default:
279
num_ps_gprs = 93;
280
num_vs_gprs = 46;
281
num_temp_gprs = 4;
282
num_gs_gprs = 31;
283
num_es_gprs = 31;
284
num_hs_gprs = 23;
285
num_ls_gprs = 23;
286
num_ps_threads = 96;
287
num_vs_threads = 16;
288
num_gs_threads = 16;
289
num_es_threads = 16;
290
num_hs_threads = 16;
291
num_ls_threads = 16;
292
num_ps_stack_entries = 42;
293
num_vs_stack_entries = 42;
294
num_gs_stack_entries = 42;
295
num_es_stack_entries = 42;
296
num_hs_stack_entries = 42;
297
num_ls_stack_entries = 42;
298
break;
299
case CHIP_REDWOOD:
300
num_ps_gprs = 93;
301
num_vs_gprs = 46;
302
num_temp_gprs = 4;
303
num_gs_gprs = 31;
304
num_es_gprs = 31;
305
num_hs_gprs = 23;
306
num_ls_gprs = 23;
307
num_ps_threads = 128;
308
num_vs_threads = 20;
309
num_gs_threads = 20;
310
num_es_threads = 20;
311
num_hs_threads = 20;
312
num_ls_threads = 20;
313
num_ps_stack_entries = 42;
314
num_vs_stack_entries = 42;
315
num_gs_stack_entries = 42;
316
num_es_stack_entries = 42;
317
num_hs_stack_entries = 42;
318
num_ls_stack_entries = 42;
319
break;
320
case CHIP_JUNIPER:
321
num_ps_gprs = 93;
322
num_vs_gprs = 46;
323
num_temp_gprs = 4;
324
num_gs_gprs = 31;
325
num_es_gprs = 31;
326
num_hs_gprs = 23;
327
num_ls_gprs = 23;
328
num_ps_threads = 128;
329
num_vs_threads = 20;
330
num_gs_threads = 20;
331
num_es_threads = 20;
332
num_hs_threads = 20;
333
num_ls_threads = 20;
334
num_ps_stack_entries = 85;
335
num_vs_stack_entries = 85;
336
num_gs_stack_entries = 85;
337
num_es_stack_entries = 85;
338
num_hs_stack_entries = 85;
339
num_ls_stack_entries = 85;
340
break;
341
case CHIP_CYPRESS:
342
case CHIP_HEMLOCK:
343
num_ps_gprs = 93;
344
num_vs_gprs = 46;
345
num_temp_gprs = 4;
346
num_gs_gprs = 31;
347
num_es_gprs = 31;
348
num_hs_gprs = 23;
349
num_ls_gprs = 23;
350
num_ps_threads = 128;
351
num_vs_threads = 20;
352
num_gs_threads = 20;
353
num_es_threads = 20;
354
num_hs_threads = 20;
355
num_ls_threads = 20;
356
num_ps_stack_entries = 85;
357
num_vs_stack_entries = 85;
358
num_gs_stack_entries = 85;
359
num_es_stack_entries = 85;
360
num_hs_stack_entries = 85;
361
num_ls_stack_entries = 85;
362
break;
363
case CHIP_PALM:
364
num_ps_gprs = 93;
365
num_vs_gprs = 46;
366
num_temp_gprs = 4;
367
num_gs_gprs = 31;
368
num_es_gprs = 31;
369
num_hs_gprs = 23;
370
num_ls_gprs = 23;
371
num_ps_threads = 96;
372
num_vs_threads = 16;
373
num_gs_threads = 16;
374
num_es_threads = 16;
375
num_hs_threads = 16;
376
num_ls_threads = 16;
377
num_ps_stack_entries = 42;
378
num_vs_stack_entries = 42;
379
num_gs_stack_entries = 42;
380
num_es_stack_entries = 42;
381
num_hs_stack_entries = 42;
382
num_ls_stack_entries = 42;
383
break;
384
case CHIP_SUMO:
385
num_ps_gprs = 93;
386
num_vs_gprs = 46;
387
num_temp_gprs = 4;
388
num_gs_gprs = 31;
389
num_es_gprs = 31;
390
num_hs_gprs = 23;
391
num_ls_gprs = 23;
392
num_ps_threads = 96;
393
num_vs_threads = 25;
394
num_gs_threads = 25;
395
num_es_threads = 25;
396
num_hs_threads = 25;
397
num_ls_threads = 25;
398
num_ps_stack_entries = 42;
399
num_vs_stack_entries = 42;
400
num_gs_stack_entries = 42;
401
num_es_stack_entries = 42;
402
num_hs_stack_entries = 42;
403
num_ls_stack_entries = 42;
404
break;
405
case CHIP_SUMO2:
406
num_ps_gprs = 93;
407
num_vs_gprs = 46;
408
num_temp_gprs = 4;
409
num_gs_gprs = 31;
410
num_es_gprs = 31;
411
num_hs_gprs = 23;
412
num_ls_gprs = 23;
413
num_ps_threads = 96;
414
num_vs_threads = 25;
415
num_gs_threads = 25;
416
num_es_threads = 25;
417
num_hs_threads = 25;
418
num_ls_threads = 25;
419
num_ps_stack_entries = 85;
420
num_vs_stack_entries = 85;
421
num_gs_stack_entries = 85;
422
num_es_stack_entries = 85;
423
num_hs_stack_entries = 85;
424
num_ls_stack_entries = 85;
425
break;
426
case CHIP_BARTS:
427
num_ps_gprs = 93;
428
num_vs_gprs = 46;
429
num_temp_gprs = 4;
430
num_gs_gprs = 31;
431
num_es_gprs = 31;
432
num_hs_gprs = 23;
433
num_ls_gprs = 23;
434
num_ps_threads = 128;
435
num_vs_threads = 20;
436
num_gs_threads = 20;
437
num_es_threads = 20;
438
num_hs_threads = 20;
439
num_ls_threads = 20;
440
num_ps_stack_entries = 85;
441
num_vs_stack_entries = 85;
442
num_gs_stack_entries = 85;
443
num_es_stack_entries = 85;
444
num_hs_stack_entries = 85;
445
num_ls_stack_entries = 85;
446
break;
447
case CHIP_TURKS:
448
num_ps_gprs = 93;
449
num_vs_gprs = 46;
450
num_temp_gprs = 4;
451
num_gs_gprs = 31;
452
num_es_gprs = 31;
453
num_hs_gprs = 23;
454
num_ls_gprs = 23;
455
num_ps_threads = 128;
456
num_vs_threads = 20;
457
num_gs_threads = 20;
458
num_es_threads = 20;
459
num_hs_threads = 20;
460
num_ls_threads = 20;
461
num_ps_stack_entries = 42;
462
num_vs_stack_entries = 42;
463
num_gs_stack_entries = 42;
464
num_es_stack_entries = 42;
465
num_hs_stack_entries = 42;
466
num_ls_stack_entries = 42;
467
break;
468
case CHIP_CAICOS:
469
num_ps_gprs = 93;
470
num_vs_gprs = 46;
471
num_temp_gprs = 4;
472
num_gs_gprs = 31;
473
num_es_gprs = 31;
474
num_hs_gprs = 23;
475
num_ls_gprs = 23;
476
num_ps_threads = 128;
477
num_vs_threads = 10;
478
num_gs_threads = 10;
479
num_es_threads = 10;
480
num_hs_threads = 10;
481
num_ls_threads = 10;
482
num_ps_stack_entries = 42;
483
num_vs_stack_entries = 42;
484
num_gs_stack_entries = 42;
485
num_es_stack_entries = 42;
486
num_hs_stack_entries = 42;
487
num_ls_stack_entries = 42;
488
break;
489
}
490
491
if ((rdev->family == CHIP_CEDAR) ||
492
(rdev->family == CHIP_PALM) ||
493
(rdev->family == CHIP_SUMO) ||
494
(rdev->family == CHIP_SUMO2) ||
495
(rdev->family == CHIP_CAICOS))
496
sq_config = 0;
497
else
498
sq_config = VC_ENABLE;
499
500
sq_config |= (EXPORT_SRC_C |
501
CS_PRIO(0) |
502
LS_PRIO(0) |
503
HS_PRIO(0) |
504
PS_PRIO(0) |
505
VS_PRIO(1) |
506
GS_PRIO(2) |
507
ES_PRIO(3));
508
509
sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
510
NUM_VS_GPRS(num_vs_gprs) |
511
NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
512
sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
513
NUM_ES_GPRS(num_es_gprs));
514
sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
515
NUM_LS_GPRS(num_ls_gprs));
516
sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
517
NUM_VS_THREADS(num_vs_threads) |
518
NUM_GS_THREADS(num_gs_threads) |
519
NUM_ES_THREADS(num_es_threads));
520
sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
521
NUM_LS_THREADS(num_ls_threads));
522
sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
523
NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
524
sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
525
NUM_ES_STACK_ENTRIES(num_es_stack_entries));
526
sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
527
NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
528
529
/* disable dyn gprs */
530
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
531
radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
532
radeon_ring_write(rdev, 0);
533
534
/* setup LDS */
535
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
536
radeon_ring_write(rdev, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2);
537
radeon_ring_write(rdev, 0x10001000);
538
539
/* SQ config */
540
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
541
radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
542
radeon_ring_write(rdev, sq_config);
543
radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
544
radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
545
radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
546
radeon_ring_write(rdev, 0);
547
radeon_ring_write(rdev, 0);
548
radeon_ring_write(rdev, sq_thread_resource_mgmt);
549
radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
550
radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
551
radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
552
radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
553
}
554
555
/* CONTEXT_CONTROL */
556
radeon_ring_write(rdev, 0xc0012800);
557
radeon_ring_write(rdev, 0x80000000);
558
radeon_ring_write(rdev, 0x80000000);
559
560
/* SQ_VTX_BASE_VTX_LOC */
561
radeon_ring_write(rdev, 0xc0026f00);
562
radeon_ring_write(rdev, 0x00000000);
563
radeon_ring_write(rdev, 0x00000000);
564
radeon_ring_write(rdev, 0x00000000);
565
566
/* SET_SAMPLER */
567
radeon_ring_write(rdev, 0xc0036e00);
568
radeon_ring_write(rdev, 0x00000000);
569
radeon_ring_write(rdev, 0x00000012);
570
radeon_ring_write(rdev, 0x00000000);
571
radeon_ring_write(rdev, 0x00000000);
572
573
/* set to DX10/11 mode */
574
radeon_ring_write(rdev, PACKET3(PACKET3_MODE_CONTROL, 0));
575
radeon_ring_write(rdev, 1);
576
577
/* emit an IB pointing at default state */
578
dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
579
gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
580
radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
581
radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
582
radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
583
radeon_ring_write(rdev, dwords);
584
585
}
586
587
static inline uint32_t i2f(uint32_t input)
588
{
589
u32 result, i, exponent, fraction;
590
591
if ((input & 0x3fff) == 0)
592
result = 0; /* 0 is a special case */
593
else {
594
exponent = 140; /* exponent biased by 127; */
595
fraction = (input & 0x3fff) << 10; /* cheat and only
596
handle numbers below 2^^15 */
597
for (i = 0; i < 14; i++) {
598
if (fraction & 0x800000)
599
break;
600
else {
601
fraction = fraction << 1; /* keep
602
shifting left until top bit = 1 */
603
exponent = exponent - 1;
604
}
605
}
606
result = exponent << 23 | (fraction & 0x7fffff); /* mask
607
off top bit; assumed 1 */
608
}
609
return result;
610
}
611
612
int evergreen_blit_init(struct radeon_device *rdev)
613
{
614
u32 obj_size;
615
int i, r, dwords;
616
void *ptr;
617
u32 packet2s[16];
618
int num_packet2s = 0;
619
620
/* pin copy shader into vram if already initialized */
621
if (rdev->r600_blit.shader_obj)
622
goto done;
623
624
mutex_init(&rdev->r600_blit.mutex);
625
rdev->r600_blit.state_offset = 0;
626
627
if (rdev->family < CHIP_CAYMAN)
628
rdev->r600_blit.state_len = evergreen_default_size;
629
else
630
rdev->r600_blit.state_len = cayman_default_size;
631
632
dwords = rdev->r600_blit.state_len;
633
while (dwords & 0xf) {
634
packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
635
dwords++;
636
}
637
638
obj_size = dwords * 4;
639
obj_size = ALIGN(obj_size, 256);
640
641
rdev->r600_blit.vs_offset = obj_size;
642
if (rdev->family < CHIP_CAYMAN)
643
obj_size += evergreen_vs_size * 4;
644
else
645
obj_size += cayman_vs_size * 4;
646
obj_size = ALIGN(obj_size, 256);
647
648
rdev->r600_blit.ps_offset = obj_size;
649
if (rdev->family < CHIP_CAYMAN)
650
obj_size += evergreen_ps_size * 4;
651
else
652
obj_size += cayman_ps_size * 4;
653
obj_size = ALIGN(obj_size, 256);
654
655
r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
656
&rdev->r600_blit.shader_obj);
657
if (r) {
658
DRM_ERROR("evergreen failed to allocate shader\n");
659
return r;
660
}
661
662
DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n",
663
obj_size,
664
rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
665
666
r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
667
if (unlikely(r != 0))
668
return r;
669
r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
670
if (r) {
671
DRM_ERROR("failed to map blit object %d\n", r);
672
return r;
673
}
674
675
if (rdev->family < CHIP_CAYMAN) {
676
memcpy_toio(ptr + rdev->r600_blit.state_offset,
677
evergreen_default_state, rdev->r600_blit.state_len * 4);
678
679
if (num_packet2s)
680
memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
681
packet2s, num_packet2s * 4);
682
for (i = 0; i < evergreen_vs_size; i++)
683
*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
684
for (i = 0; i < evergreen_ps_size; i++)
685
*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
686
} else {
687
memcpy_toio(ptr + rdev->r600_blit.state_offset,
688
cayman_default_state, rdev->r600_blit.state_len * 4);
689
690
if (num_packet2s)
691
memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
692
packet2s, num_packet2s * 4);
693
for (i = 0; i < cayman_vs_size; i++)
694
*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
695
for (i = 0; i < cayman_ps_size; i++)
696
*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
697
}
698
radeon_bo_kunmap(rdev->r600_blit.shader_obj);
699
radeon_bo_unreserve(rdev->r600_blit.shader_obj);
700
701
done:
702
r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
703
if (unlikely(r != 0))
704
return r;
705
r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
706
&rdev->r600_blit.shader_gpu_addr);
707
radeon_bo_unreserve(rdev->r600_blit.shader_obj);
708
if (r) {
709
dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
710
return r;
711
}
712
radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
713
return 0;
714
}
715
716
void evergreen_blit_fini(struct radeon_device *rdev)
717
{
718
int r;
719
720
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
721
if (rdev->r600_blit.shader_obj == NULL)
722
return;
723
/* If we can't reserve the bo, unref should be enough to destroy
724
* it when it becomes idle.
725
*/
726
r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
727
if (!r) {
728
radeon_bo_unpin(rdev->r600_blit.shader_obj);
729
radeon_bo_unreserve(rdev->r600_blit.shader_obj);
730
}
731
radeon_bo_unref(&rdev->r600_blit.shader_obj);
732
}
733
734
static int evergreen_vb_ib_get(struct radeon_device *rdev)
735
{
736
int r;
737
r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
738
if (r) {
739
DRM_ERROR("failed to get IB for vertex buffer\n");
740
return r;
741
}
742
743
rdev->r600_blit.vb_total = 64*1024;
744
rdev->r600_blit.vb_used = 0;
745
return 0;
746
}
747
748
static void evergreen_vb_ib_put(struct radeon_device *rdev)
749
{
750
radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
751
radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
752
}
753
754
int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
755
{
756
int r;
757
int ring_size, line_size;
758
int max_size;
759
/* loops of emits + fence emit possible */
760
int dwords_per_loop = 74, num_loops;
761
762
r = evergreen_vb_ib_get(rdev);
763
if (r)
764
return r;
765
766
/* 8 bpp vs 32 bpp for xfer unit */
767
if (size_bytes & 3)
768
line_size = 8192;
769
else
770
line_size = 8192 * 4;
771
772
max_size = 8192 * line_size;
773
774
/* major loops cover the max size transfer */
775
num_loops = ((size_bytes + max_size) / max_size);
776
/* minor loops cover the extra non aligned bits */
777
num_loops += ((size_bytes % line_size) ? 1 : 0);
778
/* calculate number of loops correctly */
779
ring_size = num_loops * dwords_per_loop;
780
/* set default + shaders */
781
ring_size += 55; /* shaders + def state */
782
ring_size += 10; /* fence emit for VB IB */
783
ring_size += 5; /* done copy */
784
ring_size += 10; /* fence emit for done copy */
785
r = radeon_ring_lock(rdev, ring_size);
786
if (r)
787
return r;
788
789
set_default_state(rdev); /* 36 */
790
set_shaders(rdev); /* 16 */
791
return 0;
792
}
793
794
void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
795
{
796
int r;
797
798
if (rdev->r600_blit.vb_ib)
799
evergreen_vb_ib_put(rdev);
800
801
if (fence)
802
r = radeon_fence_emit(rdev, fence);
803
804
radeon_ring_unlock_commit(rdev);
805
}
806
807
void evergreen_kms_blit_copy(struct radeon_device *rdev,
808
u64 src_gpu_addr, u64 dst_gpu_addr,
809
int size_bytes)
810
{
811
int max_bytes;
812
u64 vb_gpu_addr;
813
u32 *vb;
814
815
DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
816
size_bytes, rdev->r600_blit.vb_used);
817
vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
818
if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
819
max_bytes = 8192;
820
821
while (size_bytes) {
822
int cur_size = size_bytes;
823
int src_x = src_gpu_addr & 255;
824
int dst_x = dst_gpu_addr & 255;
825
int h = 1;
826
src_gpu_addr = src_gpu_addr & ~255ULL;
827
dst_gpu_addr = dst_gpu_addr & ~255ULL;
828
829
if (!src_x && !dst_x) {
830
h = (cur_size / max_bytes);
831
if (h > 8192)
832
h = 8192;
833
if (h == 0)
834
h = 1;
835
else
836
cur_size = max_bytes;
837
} else {
838
if (cur_size > max_bytes)
839
cur_size = max_bytes;
840
if (cur_size > (max_bytes - dst_x))
841
cur_size = (max_bytes - dst_x);
842
if (cur_size > (max_bytes - src_x))
843
cur_size = (max_bytes - src_x);
844
}
845
846
if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
847
WARN_ON(1);
848
}
849
850
vb[0] = i2f(dst_x);
851
vb[1] = 0;
852
vb[2] = i2f(src_x);
853
vb[3] = 0;
854
855
vb[4] = i2f(dst_x);
856
vb[5] = i2f(h);
857
vb[6] = i2f(src_x);
858
vb[7] = i2f(h);
859
860
vb[8] = i2f(dst_x + cur_size);
861
vb[9] = i2f(h);
862
vb[10] = i2f(src_x + cur_size);
863
vb[11] = i2f(h);
864
865
/* src 10 */
866
set_tex_resource(rdev, FMT_8,
867
src_x + cur_size, h, src_x + cur_size,
868
src_gpu_addr);
869
870
/* 5 */
871
cp_set_surface_sync(rdev,
872
PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
873
874
875
/* dst 17 */
876
set_render_target(rdev, COLOR_8,
877
dst_x + cur_size, h,
878
dst_gpu_addr);
879
880
/* scissors 12 */
881
set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
882
883
/* 15 */
884
vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
885
set_vtx_resource(rdev, vb_gpu_addr);
886
887
/* draw 10 */
888
draw_auto(rdev);
889
890
/* 5 */
891
cp_set_surface_sync(rdev,
892
PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
893
cur_size * h, dst_gpu_addr);
894
895
vb += 12;
896
rdev->r600_blit.vb_used += 12 * 4;
897
898
src_gpu_addr += cur_size * h;
899
dst_gpu_addr += cur_size * h;
900
size_bytes -= cur_size * h;
901
}
902
} else {
903
max_bytes = 8192 * 4;
904
905
while (size_bytes) {
906
int cur_size = size_bytes;
907
int src_x = (src_gpu_addr & 255);
908
int dst_x = (dst_gpu_addr & 255);
909
int h = 1;
910
src_gpu_addr = src_gpu_addr & ~255ULL;
911
dst_gpu_addr = dst_gpu_addr & ~255ULL;
912
913
if (!src_x && !dst_x) {
914
h = (cur_size / max_bytes);
915
if (h > 8192)
916
h = 8192;
917
if (h == 0)
918
h = 1;
919
else
920
cur_size = max_bytes;
921
} else {
922
if (cur_size > max_bytes)
923
cur_size = max_bytes;
924
if (cur_size > (max_bytes - dst_x))
925
cur_size = (max_bytes - dst_x);
926
if (cur_size > (max_bytes - src_x))
927
cur_size = (max_bytes - src_x);
928
}
929
930
if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
931
WARN_ON(1);
932
}
933
934
vb[0] = i2f(dst_x / 4);
935
vb[1] = 0;
936
vb[2] = i2f(src_x / 4);
937
vb[3] = 0;
938
939
vb[4] = i2f(dst_x / 4);
940
vb[5] = i2f(h);
941
vb[6] = i2f(src_x / 4);
942
vb[7] = i2f(h);
943
944
vb[8] = i2f((dst_x + cur_size) / 4);
945
vb[9] = i2f(h);
946
vb[10] = i2f((src_x + cur_size) / 4);
947
vb[11] = i2f(h);
948
949
/* src 10 */
950
set_tex_resource(rdev, FMT_8_8_8_8,
951
(src_x + cur_size) / 4,
952
h, (src_x + cur_size) / 4,
953
src_gpu_addr);
954
/* 5 */
955
cp_set_surface_sync(rdev,
956
PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
957
958
/* dst 17 */
959
set_render_target(rdev, COLOR_8_8_8_8,
960
(dst_x + cur_size) / 4, h,
961
dst_gpu_addr);
962
963
/* scissors 12 */
964
set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
965
966
/* Vertex buffer setup 15 */
967
vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
968
set_vtx_resource(rdev, vb_gpu_addr);
969
970
/* draw 10 */
971
draw_auto(rdev);
972
973
/* 5 */
974
cp_set_surface_sync(rdev,
975
PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
976
cur_size * h, dst_gpu_addr);
977
978
/* 74 ring dwords per loop */
979
vb += 12;
980
rdev->r600_blit.vb_used += 12 * 4;
981
982
src_gpu_addr += cur_size * h;
983
dst_gpu_addr += cur_size * h;
984
size_bytes -= cur_size * h;
985
}
986
}
987
}
988
989
990