Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/gpu/drm/radeon/r600_cp.c
15113 views
1
/*
2
* Copyright 2008-2009 Advanced Micro Devices, Inc.
3
* Copyright 2008 Red Hat Inc.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
* DEALINGS IN THE SOFTWARE.
23
*
24
* Authors:
25
* Dave Airlie <[email protected]>
26
* Alex Deucher <[email protected]>
27
*/
28
29
#include "drmP.h"
30
#include "drm.h"
31
#include "radeon_drm.h"
32
#include "radeon_drv.h"
33
34
#define PFP_UCODE_SIZE 576
35
#define PM4_UCODE_SIZE 1792
36
#define R700_PFP_UCODE_SIZE 848
37
#define R700_PM4_UCODE_SIZE 1360
38
39
/* Firmware Names */
40
MODULE_FIRMWARE("radeon/R600_pfp.bin");
41
MODULE_FIRMWARE("radeon/R600_me.bin");
42
MODULE_FIRMWARE("radeon/RV610_pfp.bin");
43
MODULE_FIRMWARE("radeon/RV610_me.bin");
44
MODULE_FIRMWARE("radeon/RV630_pfp.bin");
45
MODULE_FIRMWARE("radeon/RV630_me.bin");
46
MODULE_FIRMWARE("radeon/RV620_pfp.bin");
47
MODULE_FIRMWARE("radeon/RV620_me.bin");
48
MODULE_FIRMWARE("radeon/RV635_pfp.bin");
49
MODULE_FIRMWARE("radeon/RV635_me.bin");
50
MODULE_FIRMWARE("radeon/RV670_pfp.bin");
51
MODULE_FIRMWARE("radeon/RV670_me.bin");
52
MODULE_FIRMWARE("radeon/RS780_pfp.bin");
53
MODULE_FIRMWARE("radeon/RS780_me.bin");
54
MODULE_FIRMWARE("radeon/RV770_pfp.bin");
55
MODULE_FIRMWARE("radeon/RV770_me.bin");
56
MODULE_FIRMWARE("radeon/RV730_pfp.bin");
57
MODULE_FIRMWARE("radeon/RV730_me.bin");
58
MODULE_FIRMWARE("radeon/RV710_pfp.bin");
59
MODULE_FIRMWARE("radeon/RV710_me.bin");
60
61
62
int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
63
unsigned family, u32 *ib, int *l);
64
void r600_cs_legacy_init(void);
65
66
67
# define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */
68
# define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1))
69
70
#define R600_PTE_VALID (1 << 0)
71
#define R600_PTE_SYSTEM (1 << 1)
72
#define R600_PTE_SNOOPED (1 << 2)
73
#define R600_PTE_READABLE (1 << 5)
74
#define R600_PTE_WRITEABLE (1 << 6)
75
76
/* MAX values used for gfx init */
77
#define R6XX_MAX_SH_GPRS 256
78
#define R6XX_MAX_TEMP_GPRS 16
79
#define R6XX_MAX_SH_THREADS 256
80
#define R6XX_MAX_SH_STACK_ENTRIES 4096
81
#define R6XX_MAX_BACKENDS 8
82
#define R6XX_MAX_BACKENDS_MASK 0xff
83
#define R6XX_MAX_SIMDS 8
84
#define R6XX_MAX_SIMDS_MASK 0xff
85
#define R6XX_MAX_PIPES 8
86
#define R6XX_MAX_PIPES_MASK 0xff
87
88
#define R7XX_MAX_SH_GPRS 256
89
#define R7XX_MAX_TEMP_GPRS 16
90
#define R7XX_MAX_SH_THREADS 256
91
#define R7XX_MAX_SH_STACK_ENTRIES 4096
92
#define R7XX_MAX_BACKENDS 8
93
#define R7XX_MAX_BACKENDS_MASK 0xff
94
#define R7XX_MAX_SIMDS 16
95
#define R7XX_MAX_SIMDS_MASK 0xffff
96
#define R7XX_MAX_PIPES 8
97
#define R7XX_MAX_PIPES_MASK 0xff
98
99
static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)
100
{
101
int i;
102
103
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
104
105
for (i = 0; i < dev_priv->usec_timeout; i++) {
106
int slots;
107
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
108
slots = (RADEON_READ(R600_GRBM_STATUS)
109
& R700_CMDFIFO_AVAIL_MASK);
110
else
111
slots = (RADEON_READ(R600_GRBM_STATUS)
112
& R600_CMDFIFO_AVAIL_MASK);
113
if (slots >= entries)
114
return 0;
115
DRM_UDELAY(1);
116
}
117
DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
118
RADEON_READ(R600_GRBM_STATUS),
119
RADEON_READ(R600_GRBM_STATUS2));
120
121
return -EBUSY;
122
}
123
124
static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)
125
{
126
int i, ret;
127
128
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
129
130
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
131
ret = r600_do_wait_for_fifo(dev_priv, 8);
132
else
133
ret = r600_do_wait_for_fifo(dev_priv, 16);
134
if (ret)
135
return ret;
136
for (i = 0; i < dev_priv->usec_timeout; i++) {
137
if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
138
return 0;
139
DRM_UDELAY(1);
140
}
141
DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
142
RADEON_READ(R600_GRBM_STATUS),
143
RADEON_READ(R600_GRBM_STATUS2));
144
145
return -EBUSY;
146
}
147
148
void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)
149
{
150
struct drm_sg_mem *entry = dev->sg;
151
int max_pages;
152
int pages;
153
int i;
154
155
if (!entry)
156
return;
157
158
if (gart_info->bus_addr) {
159
max_pages = (gart_info->table_size / sizeof(u64));
160
pages = (entry->pages <= max_pages)
161
? entry->pages : max_pages;
162
163
for (i = 0; i < pages; i++) {
164
if (!entry->busaddr[i])
165
break;
166
pci_unmap_page(dev->pdev, entry->busaddr[i],
167
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
168
}
169
if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
170
gart_info->bus_addr = 0;
171
}
172
}
173
174
/* R600 has page table setup */
175
int r600_page_table_init(struct drm_device *dev)
176
{
177
drm_radeon_private_t *dev_priv = dev->dev_private;
178
struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
179
struct drm_local_map *map = &gart_info->mapping;
180
struct drm_sg_mem *entry = dev->sg;
181
int ret = 0;
182
int i, j;
183
int pages;
184
u64 page_base;
185
dma_addr_t entry_addr;
186
int max_ati_pages, max_real_pages, gart_idx;
187
188
/* okay page table is available - lets rock */
189
max_ati_pages = (gart_info->table_size / sizeof(u64));
190
max_real_pages = max_ati_pages / (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE);
191
192
pages = (entry->pages <= max_real_pages) ?
193
entry->pages : max_real_pages;
194
195
memset_io((void __iomem *)map->handle, 0, max_ati_pages * sizeof(u64));
196
197
gart_idx = 0;
198
for (i = 0; i < pages; i++) {
199
entry->busaddr[i] = pci_map_page(dev->pdev,
200
entry->pagelist[i], 0,
201
PAGE_SIZE,
202
PCI_DMA_BIDIRECTIONAL);
203
if (pci_dma_mapping_error(dev->pdev, entry->busaddr[i])) {
204
DRM_ERROR("unable to map PCIGART pages!\n");
205
r600_page_table_cleanup(dev, gart_info);
206
goto done;
207
}
208
entry_addr = entry->busaddr[i];
209
for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
210
page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;
211
page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
212
page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
213
214
DRM_WRITE64(map, gart_idx * sizeof(u64), page_base);
215
216
gart_idx++;
217
218
if ((i % 128) == 0)
219
DRM_DEBUG("page entry %d: 0x%016llx\n",
220
i, (unsigned long long)page_base);
221
entry_addr += ATI_PCIGART_PAGE_SIZE;
222
}
223
}
224
ret = 1;
225
done:
226
return ret;
227
}
228
229
static void r600_vm_flush_gart_range(struct drm_device *dev)
230
{
231
drm_radeon_private_t *dev_priv = dev->dev_private;
232
u32 resp, countdown = 1000;
233
RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);
234
RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
235
RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);
236
237
do {
238
resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);
239
countdown--;
240
DRM_UDELAY(1);
241
} while (((resp & 0xf0) == 0) && countdown);
242
}
243
244
static void r600_vm_init(struct drm_device *dev)
245
{
246
drm_radeon_private_t *dev_priv = dev->dev_private;
247
/* initialise the VM to use the page table we constructed up there */
248
u32 vm_c0, i;
249
u32 mc_rd_a;
250
u32 vm_l2_cntl, vm_l2_cntl3;
251
/* okay set up the PCIE aperture type thingo */
252
RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
253
RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
254
RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
255
256
/* setup MC RD a */
257
mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |
258
R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |
259
R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;
260
261
RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);
262
RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);
263
264
RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);
265
RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);
266
267
RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);
268
RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);
269
270
RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);
271
RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);
272
273
RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);
274
RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);
275
276
RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);
277
RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);
278
279
RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);
280
RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);
281
282
vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
283
vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);
284
RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
285
286
RADEON_WRITE(R600_VM_L2_CNTL2, 0);
287
vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) |
288
R600_VM_L2_CNTL3_BANK_SELECT_1(1) |
289
R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2));
290
RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
291
292
vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
293
294
RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
295
296
vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
297
298
/* disable all other contexts */
299
for (i = 1; i < 8; i++)
300
RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
301
302
RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
303
RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
304
RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
305
306
r600_vm_flush_gart_range(dev);
307
}
308
309
static int r600_cp_init_microcode(drm_radeon_private_t *dev_priv)
310
{
311
struct platform_device *pdev;
312
const char *chip_name;
313
size_t pfp_req_size, me_req_size;
314
char fw_name[30];
315
int err;
316
317
pdev = platform_device_register_simple("r600_cp", 0, NULL, 0);
318
err = IS_ERR(pdev);
319
if (err) {
320
printk(KERN_ERR "r600_cp: Failed to register firmware\n");
321
return -EINVAL;
322
}
323
324
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
325
case CHIP_R600: chip_name = "R600"; break;
326
case CHIP_RV610: chip_name = "RV610"; break;
327
case CHIP_RV630: chip_name = "RV630"; break;
328
case CHIP_RV620: chip_name = "RV620"; break;
329
case CHIP_RV635: chip_name = "RV635"; break;
330
case CHIP_RV670: chip_name = "RV670"; break;
331
case CHIP_RS780:
332
case CHIP_RS880: chip_name = "RS780"; break;
333
case CHIP_RV770: chip_name = "RV770"; break;
334
case CHIP_RV730:
335
case CHIP_RV740: chip_name = "RV730"; break;
336
case CHIP_RV710: chip_name = "RV710"; break;
337
default: BUG();
338
}
339
340
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
341
pfp_req_size = R700_PFP_UCODE_SIZE * 4;
342
me_req_size = R700_PM4_UCODE_SIZE * 4;
343
} else {
344
pfp_req_size = PFP_UCODE_SIZE * 4;
345
me_req_size = PM4_UCODE_SIZE * 12;
346
}
347
348
DRM_INFO("Loading %s CP Microcode\n", chip_name);
349
350
snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
351
err = request_firmware(&dev_priv->pfp_fw, fw_name, &pdev->dev);
352
if (err)
353
goto out;
354
if (dev_priv->pfp_fw->size != pfp_req_size) {
355
printk(KERN_ERR
356
"r600_cp: Bogus length %zu in firmware \"%s\"\n",
357
dev_priv->pfp_fw->size, fw_name);
358
err = -EINVAL;
359
goto out;
360
}
361
362
snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
363
err = request_firmware(&dev_priv->me_fw, fw_name, &pdev->dev);
364
if (err)
365
goto out;
366
if (dev_priv->me_fw->size != me_req_size) {
367
printk(KERN_ERR
368
"r600_cp: Bogus length %zu in firmware \"%s\"\n",
369
dev_priv->me_fw->size, fw_name);
370
err = -EINVAL;
371
}
372
out:
373
platform_device_unregister(pdev);
374
375
if (err) {
376
if (err != -EINVAL)
377
printk(KERN_ERR
378
"r600_cp: Failed to load firmware \"%s\"\n",
379
fw_name);
380
release_firmware(dev_priv->pfp_fw);
381
dev_priv->pfp_fw = NULL;
382
release_firmware(dev_priv->me_fw);
383
dev_priv->me_fw = NULL;
384
}
385
return err;
386
}
387
388
static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
389
{
390
const __be32 *fw_data;
391
int i;
392
393
if (!dev_priv->me_fw || !dev_priv->pfp_fw)
394
return;
395
396
r600_do_cp_stop(dev_priv);
397
398
RADEON_WRITE(R600_CP_RB_CNTL,
399
#ifdef __BIG_ENDIAN
400
R600_BUF_SWAP_32BIT |
401
#endif
402
R600_RB_NO_UPDATE |
403
R600_RB_BLKSZ(15) |
404
R600_RB_BUFSZ(3));
405
406
RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
407
RADEON_READ(R600_GRBM_SOFT_RESET);
408
DRM_UDELAY(15000);
409
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
410
411
fw_data = (const __be32 *)dev_priv->me_fw->data;
412
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
413
for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
414
RADEON_WRITE(R600_CP_ME_RAM_DATA,
415
be32_to_cpup(fw_data++));
416
417
fw_data = (const __be32 *)dev_priv->pfp_fw->data;
418
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
419
for (i = 0; i < PFP_UCODE_SIZE; i++)
420
RADEON_WRITE(R600_CP_PFP_UCODE_DATA,
421
be32_to_cpup(fw_data++));
422
423
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
424
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
425
RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
426
427
}
428
429
static void r700_vm_init(struct drm_device *dev)
430
{
431
drm_radeon_private_t *dev_priv = dev->dev_private;
432
/* initialise the VM to use the page table we constructed up there */
433
u32 vm_c0, i;
434
u32 mc_vm_md_l1;
435
u32 vm_l2_cntl, vm_l2_cntl3;
436
/* okay set up the PCIE aperture type thingo */
437
RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
438
RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
439
RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
440
441
mc_vm_md_l1 = R700_ENABLE_L1_TLB |
442
R700_ENABLE_L1_FRAGMENT_PROCESSING |
443
R700_SYSTEM_ACCESS_MODE_IN_SYS |
444
R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
445
R700_EFFECTIVE_L1_TLB_SIZE(5) |
446
R700_EFFECTIVE_L1_QUEUE_SIZE(5);
447
448
RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);
449
RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);
450
RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);
451
RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);
452
RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);
453
RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);
454
RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);
455
456
vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
457
vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);
458
RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
459
460
RADEON_WRITE(R600_VM_L2_CNTL2, 0);
461
vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
462
RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
463
464
vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
465
466
RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
467
468
vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
469
470
/* disable all other contexts */
471
for (i = 1; i < 8; i++)
472
RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
473
474
RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
475
RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
476
RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
477
478
r600_vm_flush_gart_range(dev);
479
}
480
481
static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
482
{
483
const __be32 *fw_data;
484
int i;
485
486
if (!dev_priv->me_fw || !dev_priv->pfp_fw)
487
return;
488
489
r600_do_cp_stop(dev_priv);
490
491
RADEON_WRITE(R600_CP_RB_CNTL,
492
#ifdef __BIG_ENDIAN
493
R600_BUF_SWAP_32BIT |
494
#endif
495
R600_RB_NO_UPDATE |
496
R600_RB_BLKSZ(15) |
497
R600_RB_BUFSZ(3));
498
499
RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
500
RADEON_READ(R600_GRBM_SOFT_RESET);
501
DRM_UDELAY(15000);
502
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
503
504
fw_data = (const __be32 *)dev_priv->pfp_fw->data;
505
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
506
for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
507
RADEON_WRITE(R600_CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
508
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
509
510
fw_data = (const __be32 *)dev_priv->me_fw->data;
511
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
512
for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
513
RADEON_WRITE(R600_CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
514
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
515
516
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
517
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
518
RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
519
520
}
521
522
static void r600_test_writeback(drm_radeon_private_t *dev_priv)
523
{
524
u32 tmp;
525
526
/* Start with assuming that writeback doesn't work */
527
dev_priv->writeback_works = 0;
528
529
/* Writeback doesn't seem to work everywhere, test it here and possibly
530
* enable it if it appears to work
531
*/
532
radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
533
534
RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);
535
536
for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
537
u32 val;
538
539
val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1));
540
if (val == 0xdeadbeef)
541
break;
542
DRM_UDELAY(1);
543
}
544
545
if (tmp < dev_priv->usec_timeout) {
546
dev_priv->writeback_works = 1;
547
DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
548
} else {
549
dev_priv->writeback_works = 0;
550
DRM_INFO("writeback test failed\n");
551
}
552
if (radeon_no_wb == 1) {
553
dev_priv->writeback_works = 0;
554
DRM_INFO("writeback forced off\n");
555
}
556
557
if (!dev_priv->writeback_works) {
558
/* Disable writeback to avoid unnecessary bus master transfer */
559
RADEON_WRITE(R600_CP_RB_CNTL,
560
#ifdef __BIG_ENDIAN
561
R600_BUF_SWAP_32BIT |
562
#endif
563
RADEON_READ(R600_CP_RB_CNTL) |
564
R600_RB_NO_UPDATE);
565
RADEON_WRITE(R600_SCRATCH_UMSK, 0);
566
}
567
}
568
569
int r600_do_engine_reset(struct drm_device *dev)
570
{
571
drm_radeon_private_t *dev_priv = dev->dev_private;
572
u32 cp_ptr, cp_me_cntl, cp_rb_cntl;
573
574
DRM_INFO("Resetting GPU\n");
575
576
cp_ptr = RADEON_READ(R600_CP_RB_WPTR);
577
cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);
578
RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);
579
580
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);
581
RADEON_READ(R600_GRBM_SOFT_RESET);
582
DRM_UDELAY(50);
583
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
584
RADEON_READ(R600_GRBM_SOFT_RESET);
585
586
RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
587
cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
588
RADEON_WRITE(R600_CP_RB_CNTL,
589
#ifdef __BIG_ENDIAN
590
R600_BUF_SWAP_32BIT |
591
#endif
592
R600_RB_RPTR_WR_ENA);
593
594
RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
595
RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
596
RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);
597
RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);
598
599
/* Reset the CP ring */
600
r600_do_cp_reset(dev_priv);
601
602
/* The CP is no longer running after an engine reset */
603
dev_priv->cp_running = 0;
604
605
/* Reset any pending vertex, indirect buffers */
606
radeon_freelist_reset(dev);
607
608
return 0;
609
610
}
611
612
static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
613
u32 num_backends,
614
u32 backend_disable_mask)
615
{
616
u32 backend_map = 0;
617
u32 enabled_backends_mask;
618
u32 enabled_backends_count;
619
u32 cur_pipe;
620
u32 swizzle_pipe[R6XX_MAX_PIPES];
621
u32 cur_backend;
622
u32 i;
623
624
if (num_tile_pipes > R6XX_MAX_PIPES)
625
num_tile_pipes = R6XX_MAX_PIPES;
626
if (num_tile_pipes < 1)
627
num_tile_pipes = 1;
628
if (num_backends > R6XX_MAX_BACKENDS)
629
num_backends = R6XX_MAX_BACKENDS;
630
if (num_backends < 1)
631
num_backends = 1;
632
633
enabled_backends_mask = 0;
634
enabled_backends_count = 0;
635
for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
636
if (((backend_disable_mask >> i) & 1) == 0) {
637
enabled_backends_mask |= (1 << i);
638
++enabled_backends_count;
639
}
640
if (enabled_backends_count == num_backends)
641
break;
642
}
643
644
if (enabled_backends_count == 0) {
645
enabled_backends_mask = 1;
646
enabled_backends_count = 1;
647
}
648
649
if (enabled_backends_count != num_backends)
650
num_backends = enabled_backends_count;
651
652
memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
653
switch (num_tile_pipes) {
654
case 1:
655
swizzle_pipe[0] = 0;
656
break;
657
case 2:
658
swizzle_pipe[0] = 0;
659
swizzle_pipe[1] = 1;
660
break;
661
case 3:
662
swizzle_pipe[0] = 0;
663
swizzle_pipe[1] = 1;
664
swizzle_pipe[2] = 2;
665
break;
666
case 4:
667
swizzle_pipe[0] = 0;
668
swizzle_pipe[1] = 1;
669
swizzle_pipe[2] = 2;
670
swizzle_pipe[3] = 3;
671
break;
672
case 5:
673
swizzle_pipe[0] = 0;
674
swizzle_pipe[1] = 1;
675
swizzle_pipe[2] = 2;
676
swizzle_pipe[3] = 3;
677
swizzle_pipe[4] = 4;
678
break;
679
case 6:
680
swizzle_pipe[0] = 0;
681
swizzle_pipe[1] = 2;
682
swizzle_pipe[2] = 4;
683
swizzle_pipe[3] = 5;
684
swizzle_pipe[4] = 1;
685
swizzle_pipe[5] = 3;
686
break;
687
case 7:
688
swizzle_pipe[0] = 0;
689
swizzle_pipe[1] = 2;
690
swizzle_pipe[2] = 4;
691
swizzle_pipe[3] = 6;
692
swizzle_pipe[4] = 1;
693
swizzle_pipe[5] = 3;
694
swizzle_pipe[6] = 5;
695
break;
696
case 8:
697
swizzle_pipe[0] = 0;
698
swizzle_pipe[1] = 2;
699
swizzle_pipe[2] = 4;
700
swizzle_pipe[3] = 6;
701
swizzle_pipe[4] = 1;
702
swizzle_pipe[5] = 3;
703
swizzle_pipe[6] = 5;
704
swizzle_pipe[7] = 7;
705
break;
706
}
707
708
cur_backend = 0;
709
for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
710
while (((1 << cur_backend) & enabled_backends_mask) == 0)
711
cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
712
713
backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
714
715
cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
716
}
717
718
return backend_map;
719
}
720
721
static int r600_count_pipe_bits(uint32_t val)
722
{
723
int i, ret = 0;
724
for (i = 0; i < 32; i++) {
725
ret += val & 1;
726
val >>= 1;
727
}
728
return ret;
729
}
730
731
static void r600_gfx_init(struct drm_device *dev,
732
drm_radeon_private_t *dev_priv)
733
{
734
int i, j, num_qd_pipes;
735
u32 sx_debug_1;
736
u32 tc_cntl;
737
u32 arb_pop;
738
u32 num_gs_verts_per_thread;
739
u32 vgt_gs_per_es;
740
u32 gs_prim_buffer_depth = 0;
741
u32 sq_ms_fifo_sizes;
742
u32 sq_config;
743
u32 sq_gpr_resource_mgmt_1 = 0;
744
u32 sq_gpr_resource_mgmt_2 = 0;
745
u32 sq_thread_resource_mgmt = 0;
746
u32 sq_stack_resource_mgmt_1 = 0;
747
u32 sq_stack_resource_mgmt_2 = 0;
748
u32 hdp_host_path_cntl;
749
u32 backend_map;
750
u32 gb_tiling_config = 0;
751
u32 cc_rb_backend_disable;
752
u32 cc_gc_shader_pipe_config;
753
u32 ramcfg;
754
755
/* setup chip specs */
756
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
757
case CHIP_R600:
758
dev_priv->r600_max_pipes = 4;
759
dev_priv->r600_max_tile_pipes = 8;
760
dev_priv->r600_max_simds = 4;
761
dev_priv->r600_max_backends = 4;
762
dev_priv->r600_max_gprs = 256;
763
dev_priv->r600_max_threads = 192;
764
dev_priv->r600_max_stack_entries = 256;
765
dev_priv->r600_max_hw_contexts = 8;
766
dev_priv->r600_max_gs_threads = 16;
767
dev_priv->r600_sx_max_export_size = 128;
768
dev_priv->r600_sx_max_export_pos_size = 16;
769
dev_priv->r600_sx_max_export_smx_size = 128;
770
dev_priv->r600_sq_num_cf_insts = 2;
771
break;
772
case CHIP_RV630:
773
case CHIP_RV635:
774
dev_priv->r600_max_pipes = 2;
775
dev_priv->r600_max_tile_pipes = 2;
776
dev_priv->r600_max_simds = 3;
777
dev_priv->r600_max_backends = 1;
778
dev_priv->r600_max_gprs = 128;
779
dev_priv->r600_max_threads = 192;
780
dev_priv->r600_max_stack_entries = 128;
781
dev_priv->r600_max_hw_contexts = 8;
782
dev_priv->r600_max_gs_threads = 4;
783
dev_priv->r600_sx_max_export_size = 128;
784
dev_priv->r600_sx_max_export_pos_size = 16;
785
dev_priv->r600_sx_max_export_smx_size = 128;
786
dev_priv->r600_sq_num_cf_insts = 2;
787
break;
788
case CHIP_RV610:
789
case CHIP_RS780:
790
case CHIP_RS880:
791
case CHIP_RV620:
792
dev_priv->r600_max_pipes = 1;
793
dev_priv->r600_max_tile_pipes = 1;
794
dev_priv->r600_max_simds = 2;
795
dev_priv->r600_max_backends = 1;
796
dev_priv->r600_max_gprs = 128;
797
dev_priv->r600_max_threads = 192;
798
dev_priv->r600_max_stack_entries = 128;
799
dev_priv->r600_max_hw_contexts = 4;
800
dev_priv->r600_max_gs_threads = 4;
801
dev_priv->r600_sx_max_export_size = 128;
802
dev_priv->r600_sx_max_export_pos_size = 16;
803
dev_priv->r600_sx_max_export_smx_size = 128;
804
dev_priv->r600_sq_num_cf_insts = 1;
805
break;
806
case CHIP_RV670:
807
dev_priv->r600_max_pipes = 4;
808
dev_priv->r600_max_tile_pipes = 4;
809
dev_priv->r600_max_simds = 4;
810
dev_priv->r600_max_backends = 4;
811
dev_priv->r600_max_gprs = 192;
812
dev_priv->r600_max_threads = 192;
813
dev_priv->r600_max_stack_entries = 256;
814
dev_priv->r600_max_hw_contexts = 8;
815
dev_priv->r600_max_gs_threads = 16;
816
dev_priv->r600_sx_max_export_size = 128;
817
dev_priv->r600_sx_max_export_pos_size = 16;
818
dev_priv->r600_sx_max_export_smx_size = 128;
819
dev_priv->r600_sq_num_cf_insts = 2;
820
break;
821
default:
822
break;
823
}
824
825
/* Initialize HDP */
826
j = 0;
827
for (i = 0; i < 32; i++) {
828
RADEON_WRITE((0x2c14 + j), 0x00000000);
829
RADEON_WRITE((0x2c18 + j), 0x00000000);
830
RADEON_WRITE((0x2c1c + j), 0x00000000);
831
RADEON_WRITE((0x2c20 + j), 0x00000000);
832
RADEON_WRITE((0x2c24 + j), 0x00000000);
833
j += 0x18;
834
}
835
836
RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
837
838
/* setup tiling, simd, pipe config */
839
ramcfg = RADEON_READ(R600_RAMCFG);
840
841
switch (dev_priv->r600_max_tile_pipes) {
842
case 1:
843
gb_tiling_config |= R600_PIPE_TILING(0);
844
break;
845
case 2:
846
gb_tiling_config |= R600_PIPE_TILING(1);
847
break;
848
case 4:
849
gb_tiling_config |= R600_PIPE_TILING(2);
850
break;
851
case 8:
852
gb_tiling_config |= R600_PIPE_TILING(3);
853
break;
854
default:
855
break;
856
}
857
858
gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);
859
860
gb_tiling_config |= R600_GROUP_SIZE(0);
861
862
if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {
863
gb_tiling_config |= R600_ROW_TILING(3);
864
gb_tiling_config |= R600_SAMPLE_SPLIT(3);
865
} else {
866
gb_tiling_config |=
867
R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
868
gb_tiling_config |=
869
R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
870
}
871
872
gb_tiling_config |= R600_BANK_SWAPS(1);
873
874
cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
875
cc_rb_backend_disable |=
876
R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
877
878
cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
879
cc_gc_shader_pipe_config |=
880
R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
881
cc_gc_shader_pipe_config |=
882
R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
883
884
backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
885
(R6XX_MAX_BACKENDS -
886
r600_count_pipe_bits((cc_rb_backend_disable &
887
R6XX_MAX_BACKENDS_MASK) >> 16)),
888
(cc_rb_backend_disable >> 16));
889
gb_tiling_config |= R600_BACKEND_MAP(backend_map);
890
891
RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config);
892
RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
893
RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
894
if (gb_tiling_config & 0xc0) {
895
dev_priv->r600_group_size = 512;
896
} else {
897
dev_priv->r600_group_size = 256;
898
}
899
dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
900
if (gb_tiling_config & 0x30) {
901
dev_priv->r600_nbanks = 8;
902
} else {
903
dev_priv->r600_nbanks = 4;
904
}
905
906
RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
907
RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
908
RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
909
910
num_qd_pipes =
911
R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
912
RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
913
RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
914
915
/* set HW defaults for 3D engine */
916
RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
917
R600_ROQ_IB2_START(0x2b)));
918
919
RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |
920
R600_ROQ_END(0x40)));
921
922
RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
923
R600_SYNC_GRADIENT |
924
R600_SYNC_WALKER |
925
R600_SYNC_ALIGNER));
926
927
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)
928
RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);
929
930
sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);
931
sx_debug_1 |= R600_SMX_EVENT_RELEASE;
932
if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))
933
sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;
934
RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);
935
936
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
937
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
938
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
939
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
940
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
941
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
942
RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
943
else
944
RADEON_WRITE(R600_DB_DEBUG, 0);
945
946
RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |
947
R600_DEPTH_FLUSH(16) |
948
R600_DEPTH_PENDING_FREE(4) |
949
R600_DEPTH_CACHELINE_FREE(16)));
950
RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
951
RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);
952
953
RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
954
RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));
955
956
sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
957
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
958
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
959
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
960
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
961
sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
962
R600_FETCH_FIFO_HIWATER(0xa) |
963
R600_DONE_FIFO_HIWATER(0xe0) |
964
R600_ALU_UPDATE_FIFO_HIWATER(0x8));
965
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
966
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {
967
sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);
968
sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);
969
}
970
RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
971
972
/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
973
* should be adjusted as needed by the 2D/3D drivers. This just sets default values
974
*/
975
sq_config = RADEON_READ(R600_SQ_CONFIG);
976
sq_config &= ~(R600_PS_PRIO(3) |
977
R600_VS_PRIO(3) |
978
R600_GS_PRIO(3) |
979
R600_ES_PRIO(3));
980
sq_config |= (R600_DX9_CONSTS |
981
R600_VC_ENABLE |
982
R600_PS_PRIO(0) |
983
R600_VS_PRIO(1) |
984
R600_GS_PRIO(2) |
985
R600_ES_PRIO(3));
986
987
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {
988
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |
989
R600_NUM_VS_GPRS(124) |
990
R600_NUM_CLAUSE_TEMP_GPRS(4));
991
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |
992
R600_NUM_ES_GPRS(0));
993
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |
994
R600_NUM_VS_THREADS(48) |
995
R600_NUM_GS_THREADS(4) |
996
R600_NUM_ES_THREADS(4));
997
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |
998
R600_NUM_VS_STACK_ENTRIES(128));
999
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |
1000
R600_NUM_ES_STACK_ENTRIES(0));
1001
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1002
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1003
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1004
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
1005
/* no vertex cache */
1006
sq_config &= ~R600_VC_ENABLE;
1007
1008
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
1009
R600_NUM_VS_GPRS(44) |
1010
R600_NUM_CLAUSE_TEMP_GPRS(2));
1011
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
1012
R600_NUM_ES_GPRS(17));
1013
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
1014
R600_NUM_VS_THREADS(78) |
1015
R600_NUM_GS_THREADS(4) |
1016
R600_NUM_ES_THREADS(31));
1017
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
1018
R600_NUM_VS_STACK_ENTRIES(40));
1019
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
1020
R600_NUM_ES_STACK_ENTRIES(16));
1021
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
1022
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) {
1023
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
1024
R600_NUM_VS_GPRS(44) |
1025
R600_NUM_CLAUSE_TEMP_GPRS(2));
1026
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |
1027
R600_NUM_ES_GPRS(18));
1028
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
1029
R600_NUM_VS_THREADS(78) |
1030
R600_NUM_GS_THREADS(4) |
1031
R600_NUM_ES_THREADS(31));
1032
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
1033
R600_NUM_VS_STACK_ENTRIES(40));
1034
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
1035
R600_NUM_ES_STACK_ENTRIES(16));
1036
} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {
1037
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
1038
R600_NUM_VS_GPRS(44) |
1039
R600_NUM_CLAUSE_TEMP_GPRS(2));
1040
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
1041
R600_NUM_ES_GPRS(17));
1042
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
1043
R600_NUM_VS_THREADS(78) |
1044
R600_NUM_GS_THREADS(4) |
1045
R600_NUM_ES_THREADS(31));
1046
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |
1047
R600_NUM_VS_STACK_ENTRIES(64));
1048
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |
1049
R600_NUM_ES_STACK_ENTRIES(64));
1050
}
1051
1052
RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1053
RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1);
1054
RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2);
1055
RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1056
RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
1057
RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
1058
1059
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1060
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1061
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1062
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
1063
RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
1064
else
1065
RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
1066
1067
RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |
1068
R600_S0_Y(0x4) |
1069
R600_S1_X(0x4) |
1070
R600_S1_Y(0xc)));
1071
RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |
1072
R600_S0_Y(0xe) |
1073
R600_S1_X(0x2) |
1074
R600_S1_Y(0x2) |
1075
R600_S2_X(0xa) |
1076
R600_S2_Y(0x6) |
1077
R600_S3_X(0x6) |
1078
R600_S3_Y(0xa)));
1079
RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |
1080
R600_S0_Y(0xb) |
1081
R600_S1_X(0x4) |
1082
R600_S1_Y(0xc) |
1083
R600_S2_X(0x1) |
1084
R600_S2_Y(0x6) |
1085
R600_S3_X(0xa) |
1086
R600_S3_Y(0xe)));
1087
RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |
1088
R600_S4_Y(0x1) |
1089
R600_S5_X(0x0) |
1090
R600_S5_Y(0x0) |
1091
R600_S6_X(0xb) |
1092
R600_S6_Y(0x4) |
1093
R600_S7_X(0x7) |
1094
R600_S7_Y(0x8)));
1095
1096
1097
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1098
case CHIP_R600:
1099
case CHIP_RV630:
1100
case CHIP_RV635:
1101
gs_prim_buffer_depth = 0;
1102
break;
1103
case CHIP_RV610:
1104
case CHIP_RS780:
1105
case CHIP_RS880:
1106
case CHIP_RV620:
1107
gs_prim_buffer_depth = 32;
1108
break;
1109
case CHIP_RV670:
1110
gs_prim_buffer_depth = 128;
1111
break;
1112
default:
1113
break;
1114
}
1115
1116
num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1117
vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1118
/* Max value for this is 256 */
1119
if (vgt_gs_per_es > 256)
1120
vgt_gs_per_es = 256;
1121
1122
RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1123
RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1124
RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1125
RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1126
1127
/* more default values. 2D/3D driver should adjust as needed */
1128
RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1129
RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1130
RADEON_WRITE(R600_SX_MISC, 0);
1131
RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1132
RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1133
RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1134
RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1135
RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1136
RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1137
1138
/* clear render buffer base addresses */
1139
RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1140
RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1141
RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1142
RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1143
RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1144
RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1145
RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1146
RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1147
1148
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1149
case CHIP_RV610:
1150
case CHIP_RS780:
1151
case CHIP_RS880:
1152
case CHIP_RV620:
1153
tc_cntl = R600_TC_L2_SIZE(8);
1154
break;
1155
case CHIP_RV630:
1156
case CHIP_RV635:
1157
tc_cntl = R600_TC_L2_SIZE(4);
1158
break;
1159
case CHIP_R600:
1160
tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;
1161
break;
1162
default:
1163
tc_cntl = R600_TC_L2_SIZE(0);
1164
break;
1165
}
1166
1167
RADEON_WRITE(R600_TC_CNTL, tc_cntl);
1168
1169
hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1170
RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1171
1172
arb_pop = RADEON_READ(R600_ARB_POP);
1173
arb_pop |= R600_ENABLE_TC128;
1174
RADEON_WRITE(R600_ARB_POP, arb_pop);
1175
1176
RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1177
RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1178
R600_NUM_CLIP_SEQ(3)));
1179
RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));
1180
1181
}
1182
1183
static u32 r700_get_tile_pipe_to_backend_map(drm_radeon_private_t *dev_priv,
1184
u32 num_tile_pipes,
1185
u32 num_backends,
1186
u32 backend_disable_mask)
1187
{
1188
u32 backend_map = 0;
1189
u32 enabled_backends_mask;
1190
u32 enabled_backends_count;
1191
u32 cur_pipe;
1192
u32 swizzle_pipe[R7XX_MAX_PIPES];
1193
u32 cur_backend;
1194
u32 i;
1195
bool force_no_swizzle;
1196
1197
if (num_tile_pipes > R7XX_MAX_PIPES)
1198
num_tile_pipes = R7XX_MAX_PIPES;
1199
if (num_tile_pipes < 1)
1200
num_tile_pipes = 1;
1201
if (num_backends > R7XX_MAX_BACKENDS)
1202
num_backends = R7XX_MAX_BACKENDS;
1203
if (num_backends < 1)
1204
num_backends = 1;
1205
1206
enabled_backends_mask = 0;
1207
enabled_backends_count = 0;
1208
for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
1209
if (((backend_disable_mask >> i) & 1) == 0) {
1210
enabled_backends_mask |= (1 << i);
1211
++enabled_backends_count;
1212
}
1213
if (enabled_backends_count == num_backends)
1214
break;
1215
}
1216
1217
if (enabled_backends_count == 0) {
1218
enabled_backends_mask = 1;
1219
enabled_backends_count = 1;
1220
}
1221
1222
if (enabled_backends_count != num_backends)
1223
num_backends = enabled_backends_count;
1224
1225
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1226
case CHIP_RV770:
1227
case CHIP_RV730:
1228
force_no_swizzle = false;
1229
break;
1230
case CHIP_RV710:
1231
case CHIP_RV740:
1232
default:
1233
force_no_swizzle = true;
1234
break;
1235
}
1236
1237
memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
1238
switch (num_tile_pipes) {
1239
case 1:
1240
swizzle_pipe[0] = 0;
1241
break;
1242
case 2:
1243
swizzle_pipe[0] = 0;
1244
swizzle_pipe[1] = 1;
1245
break;
1246
case 3:
1247
if (force_no_swizzle) {
1248
swizzle_pipe[0] = 0;
1249
swizzle_pipe[1] = 1;
1250
swizzle_pipe[2] = 2;
1251
} else {
1252
swizzle_pipe[0] = 0;
1253
swizzle_pipe[1] = 2;
1254
swizzle_pipe[2] = 1;
1255
}
1256
break;
1257
case 4:
1258
if (force_no_swizzle) {
1259
swizzle_pipe[0] = 0;
1260
swizzle_pipe[1] = 1;
1261
swizzle_pipe[2] = 2;
1262
swizzle_pipe[3] = 3;
1263
} else {
1264
swizzle_pipe[0] = 0;
1265
swizzle_pipe[1] = 2;
1266
swizzle_pipe[2] = 3;
1267
swizzle_pipe[3] = 1;
1268
}
1269
break;
1270
case 5:
1271
if (force_no_swizzle) {
1272
swizzle_pipe[0] = 0;
1273
swizzle_pipe[1] = 1;
1274
swizzle_pipe[2] = 2;
1275
swizzle_pipe[3] = 3;
1276
swizzle_pipe[4] = 4;
1277
} else {
1278
swizzle_pipe[0] = 0;
1279
swizzle_pipe[1] = 2;
1280
swizzle_pipe[2] = 4;
1281
swizzle_pipe[3] = 1;
1282
swizzle_pipe[4] = 3;
1283
}
1284
break;
1285
case 6:
1286
if (force_no_swizzle) {
1287
swizzle_pipe[0] = 0;
1288
swizzle_pipe[1] = 1;
1289
swizzle_pipe[2] = 2;
1290
swizzle_pipe[3] = 3;
1291
swizzle_pipe[4] = 4;
1292
swizzle_pipe[5] = 5;
1293
} else {
1294
swizzle_pipe[0] = 0;
1295
swizzle_pipe[1] = 2;
1296
swizzle_pipe[2] = 4;
1297
swizzle_pipe[3] = 5;
1298
swizzle_pipe[4] = 3;
1299
swizzle_pipe[5] = 1;
1300
}
1301
break;
1302
case 7:
1303
if (force_no_swizzle) {
1304
swizzle_pipe[0] = 0;
1305
swizzle_pipe[1] = 1;
1306
swizzle_pipe[2] = 2;
1307
swizzle_pipe[3] = 3;
1308
swizzle_pipe[4] = 4;
1309
swizzle_pipe[5] = 5;
1310
swizzle_pipe[6] = 6;
1311
} else {
1312
swizzle_pipe[0] = 0;
1313
swizzle_pipe[1] = 2;
1314
swizzle_pipe[2] = 4;
1315
swizzle_pipe[3] = 6;
1316
swizzle_pipe[4] = 3;
1317
swizzle_pipe[5] = 1;
1318
swizzle_pipe[6] = 5;
1319
}
1320
break;
1321
case 8:
1322
if (force_no_swizzle) {
1323
swizzle_pipe[0] = 0;
1324
swizzle_pipe[1] = 1;
1325
swizzle_pipe[2] = 2;
1326
swizzle_pipe[3] = 3;
1327
swizzle_pipe[4] = 4;
1328
swizzle_pipe[5] = 5;
1329
swizzle_pipe[6] = 6;
1330
swizzle_pipe[7] = 7;
1331
} else {
1332
swizzle_pipe[0] = 0;
1333
swizzle_pipe[1] = 2;
1334
swizzle_pipe[2] = 4;
1335
swizzle_pipe[3] = 6;
1336
swizzle_pipe[4] = 3;
1337
swizzle_pipe[5] = 1;
1338
swizzle_pipe[6] = 7;
1339
swizzle_pipe[7] = 5;
1340
}
1341
break;
1342
}
1343
1344
cur_backend = 0;
1345
for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1346
while (((1 << cur_backend) & enabled_backends_mask) == 0)
1347
cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1348
1349
backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
1350
1351
cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1352
}
1353
1354
return backend_map;
1355
}
1356
1357
static void r700_gfx_init(struct drm_device *dev,
1358
drm_radeon_private_t *dev_priv)
1359
{
1360
int i, j, num_qd_pipes;
1361
u32 ta_aux_cntl;
1362
u32 sx_debug_1;
1363
u32 smx_dc_ctl0;
1364
u32 db_debug3;
1365
u32 num_gs_verts_per_thread;
1366
u32 vgt_gs_per_es;
1367
u32 gs_prim_buffer_depth = 0;
1368
u32 sq_ms_fifo_sizes;
1369
u32 sq_config;
1370
u32 sq_thread_resource_mgmt;
1371
u32 hdp_host_path_cntl;
1372
u32 sq_dyn_gpr_size_simd_ab_0;
1373
u32 backend_map;
1374
u32 gb_tiling_config = 0;
1375
u32 cc_rb_backend_disable;
1376
u32 cc_gc_shader_pipe_config;
1377
u32 mc_arb_ramcfg;
1378
u32 db_debug4;
1379
1380
/* setup chip specs */
1381
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1382
case CHIP_RV770:
1383
dev_priv->r600_max_pipes = 4;
1384
dev_priv->r600_max_tile_pipes = 8;
1385
dev_priv->r600_max_simds = 10;
1386
dev_priv->r600_max_backends = 4;
1387
dev_priv->r600_max_gprs = 256;
1388
dev_priv->r600_max_threads = 248;
1389
dev_priv->r600_max_stack_entries = 512;
1390
dev_priv->r600_max_hw_contexts = 8;
1391
dev_priv->r600_max_gs_threads = 16 * 2;
1392
dev_priv->r600_sx_max_export_size = 128;
1393
dev_priv->r600_sx_max_export_pos_size = 16;
1394
dev_priv->r600_sx_max_export_smx_size = 112;
1395
dev_priv->r600_sq_num_cf_insts = 2;
1396
1397
dev_priv->r700_sx_num_of_sets = 7;
1398
dev_priv->r700_sc_prim_fifo_size = 0xF9;
1399
dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1400
dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1401
break;
1402
case CHIP_RV730:
1403
dev_priv->r600_max_pipes = 2;
1404
dev_priv->r600_max_tile_pipes = 4;
1405
dev_priv->r600_max_simds = 8;
1406
dev_priv->r600_max_backends = 2;
1407
dev_priv->r600_max_gprs = 128;
1408
dev_priv->r600_max_threads = 248;
1409
dev_priv->r600_max_stack_entries = 256;
1410
dev_priv->r600_max_hw_contexts = 8;
1411
dev_priv->r600_max_gs_threads = 16 * 2;
1412
dev_priv->r600_sx_max_export_size = 256;
1413
dev_priv->r600_sx_max_export_pos_size = 32;
1414
dev_priv->r600_sx_max_export_smx_size = 224;
1415
dev_priv->r600_sq_num_cf_insts = 2;
1416
1417
dev_priv->r700_sx_num_of_sets = 7;
1418
dev_priv->r700_sc_prim_fifo_size = 0xf9;
1419
dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1420
dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1421
if (dev_priv->r600_sx_max_export_pos_size > 16) {
1422
dev_priv->r600_sx_max_export_pos_size -= 16;
1423
dev_priv->r600_sx_max_export_smx_size += 16;
1424
}
1425
break;
1426
case CHIP_RV710:
1427
dev_priv->r600_max_pipes = 2;
1428
dev_priv->r600_max_tile_pipes = 2;
1429
dev_priv->r600_max_simds = 2;
1430
dev_priv->r600_max_backends = 1;
1431
dev_priv->r600_max_gprs = 256;
1432
dev_priv->r600_max_threads = 192;
1433
dev_priv->r600_max_stack_entries = 256;
1434
dev_priv->r600_max_hw_contexts = 4;
1435
dev_priv->r600_max_gs_threads = 8 * 2;
1436
dev_priv->r600_sx_max_export_size = 128;
1437
dev_priv->r600_sx_max_export_pos_size = 16;
1438
dev_priv->r600_sx_max_export_smx_size = 112;
1439
dev_priv->r600_sq_num_cf_insts = 1;
1440
1441
dev_priv->r700_sx_num_of_sets = 7;
1442
dev_priv->r700_sc_prim_fifo_size = 0x40;
1443
dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1444
dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1445
break;
1446
case CHIP_RV740:
1447
dev_priv->r600_max_pipes = 4;
1448
dev_priv->r600_max_tile_pipes = 4;
1449
dev_priv->r600_max_simds = 8;
1450
dev_priv->r600_max_backends = 4;
1451
dev_priv->r600_max_gprs = 256;
1452
dev_priv->r600_max_threads = 248;
1453
dev_priv->r600_max_stack_entries = 512;
1454
dev_priv->r600_max_hw_contexts = 8;
1455
dev_priv->r600_max_gs_threads = 16 * 2;
1456
dev_priv->r600_sx_max_export_size = 256;
1457
dev_priv->r600_sx_max_export_pos_size = 32;
1458
dev_priv->r600_sx_max_export_smx_size = 224;
1459
dev_priv->r600_sq_num_cf_insts = 2;
1460
1461
dev_priv->r700_sx_num_of_sets = 7;
1462
dev_priv->r700_sc_prim_fifo_size = 0x100;
1463
dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1464
dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1465
1466
if (dev_priv->r600_sx_max_export_pos_size > 16) {
1467
dev_priv->r600_sx_max_export_pos_size -= 16;
1468
dev_priv->r600_sx_max_export_smx_size += 16;
1469
}
1470
break;
1471
default:
1472
break;
1473
}
1474
1475
/* Initialize HDP */
1476
j = 0;
1477
for (i = 0; i < 32; i++) {
1478
RADEON_WRITE((0x2c14 + j), 0x00000000);
1479
RADEON_WRITE((0x2c18 + j), 0x00000000);
1480
RADEON_WRITE((0x2c1c + j), 0x00000000);
1481
RADEON_WRITE((0x2c20 + j), 0x00000000);
1482
RADEON_WRITE((0x2c24 + j), 0x00000000);
1483
j += 0x18;
1484
}
1485
1486
RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
1487
1488
/* setup tiling, simd, pipe config */
1489
mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);
1490
1491
switch (dev_priv->r600_max_tile_pipes) {
1492
case 1:
1493
gb_tiling_config |= R600_PIPE_TILING(0);
1494
break;
1495
case 2:
1496
gb_tiling_config |= R600_PIPE_TILING(1);
1497
break;
1498
case 4:
1499
gb_tiling_config |= R600_PIPE_TILING(2);
1500
break;
1501
case 8:
1502
gb_tiling_config |= R600_PIPE_TILING(3);
1503
break;
1504
default:
1505
break;
1506
}
1507
1508
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1509
gb_tiling_config |= R600_BANK_TILING(1);
1510
else
1511
gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);
1512
1513
gb_tiling_config |= R600_GROUP_SIZE(0);
1514
1515
if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {
1516
gb_tiling_config |= R600_ROW_TILING(3);
1517
gb_tiling_config |= R600_SAMPLE_SPLIT(3);
1518
} else {
1519
gb_tiling_config |=
1520
R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1521
gb_tiling_config |=
1522
R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1523
}
1524
1525
gb_tiling_config |= R600_BANK_SWAPS(1);
1526
1527
cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
1528
cc_rb_backend_disable |=
1529
R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
1530
1531
cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
1532
cc_gc_shader_pipe_config |=
1533
R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
1534
cc_gc_shader_pipe_config |=
1535
R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
1536
1537
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)
1538
backend_map = 0x28;
1539
else
1540
backend_map = r700_get_tile_pipe_to_backend_map(dev_priv,
1541
dev_priv->r600_max_tile_pipes,
1542
(R7XX_MAX_BACKENDS -
1543
r600_count_pipe_bits((cc_rb_backend_disable &
1544
R7XX_MAX_BACKENDS_MASK) >> 16)),
1545
(cc_rb_backend_disable >> 16));
1546
gb_tiling_config |= R600_BACKEND_MAP(backend_map);
1547
1548
RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config);
1549
RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
1550
RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
1551
if (gb_tiling_config & 0xc0) {
1552
dev_priv->r600_group_size = 512;
1553
} else {
1554
dev_priv->r600_group_size = 256;
1555
}
1556
dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
1557
if (gb_tiling_config & 0x30) {
1558
dev_priv->r600_nbanks = 8;
1559
} else {
1560
dev_priv->r600_nbanks = 4;
1561
}
1562
1563
RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1564
RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1565
RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1566
1567
RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1568
RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
1569
RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
1570
RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
1571
RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
1572
1573
num_qd_pipes =
1574
R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
1575
RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
1576
RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
1577
1578
/* set HW defaults for 3D engine */
1579
RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
1580
R600_ROQ_IB2_START(0x2b)));
1581
1582
RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
1583
1584
ta_aux_cntl = RADEON_READ(R600_TA_CNTL_AUX);
1585
RADEON_WRITE(R600_TA_CNTL_AUX, ta_aux_cntl | R600_DISABLE_CUBE_ANISO);
1586
1587
sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
1588
sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
1589
RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);
1590
1591
smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);
1592
smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);
1593
smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
1594
RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
1595
1596
if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV740)
1597
RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
1598
R700_GS_FLUSH_CTL(4) |
1599
R700_ACK_FLUSH_CTL(3) |
1600
R700_SYNC_FLUSH_CTL));
1601
1602
db_debug3 = RADEON_READ(R700_DB_DEBUG3);
1603
db_debug3 &= ~R700_DB_CLK_OFF_DELAY(0x1f);
1604
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1605
case CHIP_RV770:
1606
case CHIP_RV740:
1607
db_debug3 |= R700_DB_CLK_OFF_DELAY(0x1f);
1608
break;
1609
case CHIP_RV710:
1610
case CHIP_RV730:
1611
default:
1612
db_debug3 |= R700_DB_CLK_OFF_DELAY(2);
1613
break;
1614
}
1615
RADEON_WRITE(R700_DB_DEBUG3, db_debug3);
1616
1617
if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV770) {
1618
db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
1619
db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
1620
RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
1621
}
1622
1623
RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |
1624
R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |
1625
R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));
1626
1627
RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |
1628
R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |
1629
R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));
1630
1631
RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1632
1633
RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);
1634
1635
RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
1636
1637
RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));
1638
1639
RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);
1640
1641
sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |
1642
R600_DONE_FIFO_HIWATER(0xe0) |
1643
R600_ALU_UPDATE_FIFO_HIWATER(0x8));
1644
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1645
case CHIP_RV770:
1646
case CHIP_RV730:
1647
case CHIP_RV710:
1648
sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
1649
break;
1650
case CHIP_RV740:
1651
default:
1652
sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
1653
break;
1654
}
1655
RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
1656
1657
/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
1658
* should be adjusted as needed by the 2D/3D drivers. This just sets default values
1659
*/
1660
sq_config = RADEON_READ(R600_SQ_CONFIG);
1661
sq_config &= ~(R600_PS_PRIO(3) |
1662
R600_VS_PRIO(3) |
1663
R600_GS_PRIO(3) |
1664
R600_ES_PRIO(3));
1665
sq_config |= (R600_DX9_CONSTS |
1666
R600_VC_ENABLE |
1667
R600_EXPORT_SRC_C |
1668
R600_PS_PRIO(0) |
1669
R600_VS_PRIO(1) |
1670
R600_GS_PRIO(2) |
1671
R600_ES_PRIO(3));
1672
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1673
/* no vertex cache */
1674
sq_config &= ~R600_VC_ENABLE;
1675
1676
RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1677
1678
RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1679
R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1680
R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));
1681
1682
RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |
1683
R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));
1684
1685
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |
1686
R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |
1687
R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));
1688
if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)
1689
sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);
1690
else
1691
sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);
1692
RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1693
1694
RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1695
R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1696
1697
RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1698
R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1699
1700
sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |
1701
R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |
1702
R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |
1703
R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));
1704
1705
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
1706
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
1707
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
1708
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
1709
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
1710
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
1711
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
1712
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
1713
1714
RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |
1715
R700_FORCE_EOV_MAX_REZ_CNT(255)));
1716
1717
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1718
RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |
1719
R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1720
else
1721
RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |
1722
R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1723
1724
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1725
case CHIP_RV770:
1726
case CHIP_RV730:
1727
case CHIP_RV740:
1728
gs_prim_buffer_depth = 384;
1729
break;
1730
case CHIP_RV710:
1731
gs_prim_buffer_depth = 128;
1732
break;
1733
default:
1734
break;
1735
}
1736
1737
num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1738
vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1739
/* Max value for this is 256 */
1740
if (vgt_gs_per_es > 256)
1741
vgt_gs_per_es = 256;
1742
1743
RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1744
RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1745
RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1746
1747
/* more default values. 2D/3D driver should adjust as needed */
1748
RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1749
RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1750
RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1751
RADEON_WRITE(R600_SX_MISC, 0);
1752
RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1753
RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);
1754
RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1755
RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);
1756
RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1757
RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1758
RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1759
RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1760
1761
/* clear render buffer base addresses */
1762
RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1763
RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1764
RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1765
RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1766
RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1767
RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1768
RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1769
RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1770
1771
RADEON_WRITE(R700_TCP_CNTL, 0);
1772
1773
hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1774
RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1775
1776
RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1777
1778
RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1779
R600_NUM_CLIP_SEQ(3)));
1780
1781
}
1782
1783
static void r600_cp_init_ring_buffer(struct drm_device *dev,
1784
drm_radeon_private_t *dev_priv,
1785
struct drm_file *file_priv)
1786
{
1787
struct drm_radeon_master_private *master_priv;
1788
u32 ring_start;
1789
u64 rptr_addr;
1790
1791
if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
1792
r700_gfx_init(dev, dev_priv);
1793
else
1794
r600_gfx_init(dev, dev_priv);
1795
1796
RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
1797
RADEON_READ(R600_GRBM_SOFT_RESET);
1798
DRM_UDELAY(15000);
1799
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
1800
1801
1802
/* Set ring buffer size */
1803
#ifdef __BIG_ENDIAN
1804
RADEON_WRITE(R600_CP_RB_CNTL,
1805
RADEON_BUF_SWAP_32BIT |
1806
RADEON_RB_NO_UPDATE |
1807
(dev_priv->ring.rptr_update_l2qw << 8) |
1808
dev_priv->ring.size_l2qw);
1809
#else
1810
RADEON_WRITE(R600_CP_RB_CNTL,
1811
RADEON_RB_NO_UPDATE |
1812
(dev_priv->ring.rptr_update_l2qw << 8) |
1813
dev_priv->ring.size_l2qw);
1814
#endif
1815
1816
RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);
1817
1818
/* Set the write pointer delay */
1819
RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
1820
1821
#ifdef __BIG_ENDIAN
1822
RADEON_WRITE(R600_CP_RB_CNTL,
1823
RADEON_BUF_SWAP_32BIT |
1824
RADEON_RB_NO_UPDATE |
1825
RADEON_RB_RPTR_WR_ENA |
1826
(dev_priv->ring.rptr_update_l2qw << 8) |
1827
dev_priv->ring.size_l2qw);
1828
#else
1829
RADEON_WRITE(R600_CP_RB_CNTL,
1830
RADEON_RB_NO_UPDATE |
1831
RADEON_RB_RPTR_WR_ENA |
1832
(dev_priv->ring.rptr_update_l2qw << 8) |
1833
dev_priv->ring.size_l2qw);
1834
#endif
1835
1836
/* Initialize the ring buffer's read and write pointers */
1837
RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);
1838
RADEON_WRITE(R600_CP_RB_WPTR, 0);
1839
SET_RING_HEAD(dev_priv, 0);
1840
dev_priv->ring.tail = 0;
1841
1842
#if __OS_HAS_AGP
1843
if (dev_priv->flags & RADEON_IS_AGP) {
1844
rptr_addr = dev_priv->ring_rptr->offset
1845
- dev->agp->base +
1846
dev_priv->gart_vm_start;
1847
} else
1848
#endif
1849
{
1850
rptr_addr = dev_priv->ring_rptr->offset
1851
- ((unsigned long) dev->sg->virtual)
1852
+ dev_priv->gart_vm_start;
1853
}
1854
RADEON_WRITE(R600_CP_RB_RPTR_ADDR,
1855
#ifdef __BIG_ENDIAN
1856
(2 << 0) |
1857
#endif
1858
(rptr_addr & 0xfffffffc));
1859
RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI,
1860
upper_32_bits(rptr_addr));
1861
1862
#ifdef __BIG_ENDIAN
1863
RADEON_WRITE(R600_CP_RB_CNTL,
1864
RADEON_BUF_SWAP_32BIT |
1865
(dev_priv->ring.rptr_update_l2qw << 8) |
1866
dev_priv->ring.size_l2qw);
1867
#else
1868
RADEON_WRITE(R600_CP_RB_CNTL,
1869
(dev_priv->ring.rptr_update_l2qw << 8) |
1870
dev_priv->ring.size_l2qw);
1871
#endif
1872
1873
#if __OS_HAS_AGP
1874
if (dev_priv->flags & RADEON_IS_AGP) {
1875
/* XXX */
1876
radeon_write_agp_base(dev_priv, dev->agp->base);
1877
1878
/* XXX */
1879
radeon_write_agp_location(dev_priv,
1880
(((dev_priv->gart_vm_start - 1 +
1881
dev_priv->gart_size) & 0xffff0000) |
1882
(dev_priv->gart_vm_start >> 16)));
1883
1884
ring_start = (dev_priv->cp_ring->offset
1885
- dev->agp->base
1886
+ dev_priv->gart_vm_start);
1887
} else
1888
#endif
1889
ring_start = (dev_priv->cp_ring->offset
1890
- (unsigned long)dev->sg->virtual
1891
+ dev_priv->gart_vm_start);
1892
1893
RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);
1894
1895
RADEON_WRITE(R600_CP_ME_CNTL, 0xff);
1896
1897
RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));
1898
1899
/* Initialize the scratch register pointer. This will cause
1900
* the scratch register values to be written out to memory
1901
* whenever they are updated.
1902
*
1903
* We simply put this behind the ring read pointer, this works
1904
* with PCI GART as well as (whatever kind of) AGP GART
1905
*/
1906
{
1907
u64 scratch_addr;
1908
1909
scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR) & 0xFFFFFFFC;
1910
scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
1911
scratch_addr += R600_SCRATCH_REG_OFFSET;
1912
scratch_addr >>= 8;
1913
scratch_addr &= 0xffffffff;
1914
1915
RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);
1916
}
1917
1918
RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);
1919
1920
/* Turn on bus mastering */
1921
radeon_enable_bm(dev_priv);
1922
1923
radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0);
1924
RADEON_WRITE(R600_LAST_FRAME_REG, 0);
1925
1926
radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
1927
RADEON_WRITE(R600_LAST_DISPATCH_REG, 0);
1928
1929
radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0);
1930
RADEON_WRITE(R600_LAST_CLEAR_REG, 0);
1931
1932
/* reset sarea copies of these */
1933
master_priv = file_priv->master->driver_priv;
1934
if (master_priv->sarea_priv) {
1935
master_priv->sarea_priv->last_frame = 0;
1936
master_priv->sarea_priv->last_dispatch = 0;
1937
master_priv->sarea_priv->last_clear = 0;
1938
}
1939
1940
r600_do_wait_for_idle(dev_priv);
1941
1942
}
1943
1944
int r600_do_cleanup_cp(struct drm_device *dev)
1945
{
1946
drm_radeon_private_t *dev_priv = dev->dev_private;
1947
DRM_DEBUG("\n");
1948
1949
/* Make sure interrupts are disabled here because the uninstall ioctl
1950
* may not have been called from userspace and after dev_private
1951
* is freed, it's too late.
1952
*/
1953
if (dev->irq_enabled)
1954
drm_irq_uninstall(dev);
1955
1956
#if __OS_HAS_AGP
1957
if (dev_priv->flags & RADEON_IS_AGP) {
1958
if (dev_priv->cp_ring != NULL) {
1959
drm_core_ioremapfree(dev_priv->cp_ring, dev);
1960
dev_priv->cp_ring = NULL;
1961
}
1962
if (dev_priv->ring_rptr != NULL) {
1963
drm_core_ioremapfree(dev_priv->ring_rptr, dev);
1964
dev_priv->ring_rptr = NULL;
1965
}
1966
if (dev->agp_buffer_map != NULL) {
1967
drm_core_ioremapfree(dev->agp_buffer_map, dev);
1968
dev->agp_buffer_map = NULL;
1969
}
1970
} else
1971
#endif
1972
{
1973
1974
if (dev_priv->gart_info.bus_addr)
1975
r600_page_table_cleanup(dev, &dev_priv->gart_info);
1976
1977
if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {
1978
drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
1979
dev_priv->gart_info.addr = NULL;
1980
}
1981
}
1982
/* only clear to the start of flags */
1983
memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
1984
1985
return 0;
1986
}
1987
1988
int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
1989
struct drm_file *file_priv)
1990
{
1991
drm_radeon_private_t *dev_priv = dev->dev_private;
1992
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1993
1994
DRM_DEBUG("\n");
1995
1996
mutex_init(&dev_priv->cs_mutex);
1997
r600_cs_legacy_init();
1998
/* if we require new memory map but we don't have it fail */
1999
if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
2000
DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
2001
r600_do_cleanup_cp(dev);
2002
return -EINVAL;
2003
}
2004
2005
if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {
2006
DRM_DEBUG("Forcing AGP card to PCI mode\n");
2007
dev_priv->flags &= ~RADEON_IS_AGP;
2008
/* The writeback test succeeds, but when writeback is enabled,
2009
* the ring buffer read ptr update fails after first 128 bytes.
2010
*/
2011
radeon_no_wb = 1;
2012
} else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
2013
&& !init->is_pci) {
2014
DRM_DEBUG("Restoring AGP flag\n");
2015
dev_priv->flags |= RADEON_IS_AGP;
2016
}
2017
2018
dev_priv->usec_timeout = init->usec_timeout;
2019
if (dev_priv->usec_timeout < 1 ||
2020
dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
2021
DRM_DEBUG("TIMEOUT problem!\n");
2022
r600_do_cleanup_cp(dev);
2023
return -EINVAL;
2024
}
2025
2026
/* Enable vblank on CRTC1 for older X servers
2027
*/
2028
dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
2029
dev_priv->do_boxes = 0;
2030
dev_priv->cp_mode = init->cp_mode;
2031
2032
/* We don't support anything other than bus-mastering ring mode,
2033
* but the ring can be in either AGP or PCI space for the ring
2034
* read pointer.
2035
*/
2036
if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
2037
(init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
2038
DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
2039
r600_do_cleanup_cp(dev);
2040
return -EINVAL;
2041
}
2042
2043
switch (init->fb_bpp) {
2044
case 16:
2045
dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
2046
break;
2047
case 32:
2048
default:
2049
dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
2050
break;
2051
}
2052
dev_priv->front_offset = init->front_offset;
2053
dev_priv->front_pitch = init->front_pitch;
2054
dev_priv->back_offset = init->back_offset;
2055
dev_priv->back_pitch = init->back_pitch;
2056
2057
dev_priv->ring_offset = init->ring_offset;
2058
dev_priv->ring_rptr_offset = init->ring_rptr_offset;
2059
dev_priv->buffers_offset = init->buffers_offset;
2060
dev_priv->gart_textures_offset = init->gart_textures_offset;
2061
2062
master_priv->sarea = drm_getsarea(dev);
2063
if (!master_priv->sarea) {
2064
DRM_ERROR("could not find sarea!\n");
2065
r600_do_cleanup_cp(dev);
2066
return -EINVAL;
2067
}
2068
2069
dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);
2070
if (!dev_priv->cp_ring) {
2071
DRM_ERROR("could not find cp ring region!\n");
2072
r600_do_cleanup_cp(dev);
2073
return -EINVAL;
2074
}
2075
dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
2076
if (!dev_priv->ring_rptr) {
2077
DRM_ERROR("could not find ring read pointer!\n");
2078
r600_do_cleanup_cp(dev);
2079
return -EINVAL;
2080
}
2081
dev->agp_buffer_token = init->buffers_offset;
2082
dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
2083
if (!dev->agp_buffer_map) {
2084
DRM_ERROR("could not find dma buffer region!\n");
2085
r600_do_cleanup_cp(dev);
2086
return -EINVAL;
2087
}
2088
2089
if (init->gart_textures_offset) {
2090
dev_priv->gart_textures =
2091
drm_core_findmap(dev, init->gart_textures_offset);
2092
if (!dev_priv->gart_textures) {
2093
DRM_ERROR("could not find GART texture region!\n");
2094
r600_do_cleanup_cp(dev);
2095
return -EINVAL;
2096
}
2097
}
2098
2099
#if __OS_HAS_AGP
2100
/* XXX */
2101
if (dev_priv->flags & RADEON_IS_AGP) {
2102
drm_core_ioremap_wc(dev_priv->cp_ring, dev);
2103
drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
2104
drm_core_ioremap_wc(dev->agp_buffer_map, dev);
2105
if (!dev_priv->cp_ring->handle ||
2106
!dev_priv->ring_rptr->handle ||
2107
!dev->agp_buffer_map->handle) {
2108
DRM_ERROR("could not find ioremap agp regions!\n");
2109
r600_do_cleanup_cp(dev);
2110
return -EINVAL;
2111
}
2112
} else
2113
#endif
2114
{
2115
dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;
2116
dev_priv->ring_rptr->handle =
2117
(void *)(unsigned long)dev_priv->ring_rptr->offset;
2118
dev->agp_buffer_map->handle =
2119
(void *)(unsigned long)dev->agp_buffer_map->offset;
2120
2121
DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
2122
dev_priv->cp_ring->handle);
2123
DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
2124
dev_priv->ring_rptr->handle);
2125
DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
2126
dev->agp_buffer_map->handle);
2127
}
2128
2129
dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;
2130
dev_priv->fb_size =
2131
(((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)
2132
- dev_priv->fb_location;
2133
2134
dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
2135
((dev_priv->front_offset
2136
+ dev_priv->fb_location) >> 10));
2137
2138
dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
2139
((dev_priv->back_offset
2140
+ dev_priv->fb_location) >> 10));
2141
2142
dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
2143
((dev_priv->depth_offset
2144
+ dev_priv->fb_location) >> 10));
2145
2146
dev_priv->gart_size = init->gart_size;
2147
2148
/* New let's set the memory map ... */
2149
if (dev_priv->new_memmap) {
2150
u32 base = 0;
2151
2152
DRM_INFO("Setting GART location based on new memory map\n");
2153
2154
/* If using AGP, try to locate the AGP aperture at the same
2155
* location in the card and on the bus, though we have to
2156
* align it down.
2157
*/
2158
#if __OS_HAS_AGP
2159
/* XXX */
2160
if (dev_priv->flags & RADEON_IS_AGP) {
2161
base = dev->agp->base;
2162
/* Check if valid */
2163
if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
2164
base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
2165
DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
2166
dev->agp->base);
2167
base = 0;
2168
}
2169
}
2170
#endif
2171
/* If not or if AGP is at 0 (Macs), try to put it elsewhere */
2172
if (base == 0) {
2173
base = dev_priv->fb_location + dev_priv->fb_size;
2174
if (base < dev_priv->fb_location ||
2175
((base + dev_priv->gart_size) & 0xfffffffful) < base)
2176
base = dev_priv->fb_location
2177
- dev_priv->gart_size;
2178
}
2179
dev_priv->gart_vm_start = base & 0xffc00000u;
2180
if (dev_priv->gart_vm_start != base)
2181
DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
2182
base, dev_priv->gart_vm_start);
2183
}
2184
2185
#if __OS_HAS_AGP
2186
/* XXX */
2187
if (dev_priv->flags & RADEON_IS_AGP)
2188
dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
2189
- dev->agp->base
2190
+ dev_priv->gart_vm_start);
2191
else
2192
#endif
2193
dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
2194
- (unsigned long)dev->sg->virtual
2195
+ dev_priv->gart_vm_start);
2196
2197
DRM_DEBUG("fb 0x%08x size %d\n",
2198
(unsigned int) dev_priv->fb_location,
2199
(unsigned int) dev_priv->fb_size);
2200
DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
2201
DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",
2202
(unsigned int) dev_priv->gart_vm_start);
2203
DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",
2204
dev_priv->gart_buffers_offset);
2205
2206
dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
2207
dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
2208
+ init->ring_size / sizeof(u32));
2209
dev_priv->ring.size = init->ring_size;
2210
dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
2211
2212
dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
2213
dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8);
2214
2215
dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
2216
dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16);
2217
2218
dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
2219
2220
dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
2221
2222
#if __OS_HAS_AGP
2223
if (dev_priv->flags & RADEON_IS_AGP) {
2224
/* XXX turn off pcie gart */
2225
} else
2226
#endif
2227
{
2228
dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
2229
/* if we have an offset set from userspace */
2230
if (!dev_priv->pcigart_offset_set) {
2231
DRM_ERROR("Need gart offset from userspace\n");
2232
r600_do_cleanup_cp(dev);
2233
return -EINVAL;
2234
}
2235
2236
DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);
2237
2238
dev_priv->gart_info.bus_addr =
2239
dev_priv->pcigart_offset + dev_priv->fb_location;
2240
dev_priv->gart_info.mapping.offset =
2241
dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
2242
dev_priv->gart_info.mapping.size =
2243
dev_priv->gart_info.table_size;
2244
2245
drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
2246
if (!dev_priv->gart_info.mapping.handle) {
2247
DRM_ERROR("ioremap failed.\n");
2248
r600_do_cleanup_cp(dev);
2249
return -EINVAL;
2250
}
2251
2252
dev_priv->gart_info.addr =
2253
dev_priv->gart_info.mapping.handle;
2254
2255
DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
2256
dev_priv->gart_info.addr,
2257
dev_priv->pcigart_offset);
2258
2259
if (!r600_page_table_init(dev)) {
2260
DRM_ERROR("Failed to init GART table\n");
2261
r600_do_cleanup_cp(dev);
2262
return -EINVAL;
2263
}
2264
2265
if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2266
r700_vm_init(dev);
2267
else
2268
r600_vm_init(dev);
2269
}
2270
2271
if (!dev_priv->me_fw || !dev_priv->pfp_fw) {
2272
int err = r600_cp_init_microcode(dev_priv);
2273
if (err) {
2274
DRM_ERROR("Failed to load firmware!\n");
2275
r600_do_cleanup_cp(dev);
2276
return err;
2277
}
2278
}
2279
if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2280
r700_cp_load_microcode(dev_priv);
2281
else
2282
r600_cp_load_microcode(dev_priv);
2283
2284
r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2285
2286
dev_priv->last_buf = 0;
2287
2288
r600_do_engine_reset(dev);
2289
r600_test_writeback(dev_priv);
2290
2291
return 0;
2292
}
2293
2294
int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)
2295
{
2296
drm_radeon_private_t *dev_priv = dev->dev_private;
2297
2298
DRM_DEBUG("\n");
2299
if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
2300
r700_vm_init(dev);
2301
r700_cp_load_microcode(dev_priv);
2302
} else {
2303
r600_vm_init(dev);
2304
r600_cp_load_microcode(dev_priv);
2305
}
2306
r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2307
r600_do_engine_reset(dev);
2308
2309
return 0;
2310
}
2311
2312
/* Wait for the CP to go idle.
2313
*/
2314
int r600_do_cp_idle(drm_radeon_private_t *dev_priv)
2315
{
2316
RING_LOCALS;
2317
DRM_DEBUG("\n");
2318
2319
BEGIN_RING(5);
2320
OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
2321
OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
2322
/* wait for 3D idle clean */
2323
OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
2324
OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
2325
OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
2326
2327
ADVANCE_RING();
2328
COMMIT_RING();
2329
2330
return r600_do_wait_for_idle(dev_priv);
2331
}
2332
2333
/* Start the Command Processor.
2334
*/
2335
void r600_do_cp_start(drm_radeon_private_t *dev_priv)
2336
{
2337
u32 cp_me;
2338
RING_LOCALS;
2339
DRM_DEBUG("\n");
2340
2341
BEGIN_RING(7);
2342
OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));
2343
OUT_RING(0x00000001);
2344
if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770))
2345
OUT_RING(0x00000003);
2346
else
2347
OUT_RING(0x00000000);
2348
OUT_RING((dev_priv->r600_max_hw_contexts - 1));
2349
OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));
2350
OUT_RING(0x00000000);
2351
OUT_RING(0x00000000);
2352
ADVANCE_RING();
2353
COMMIT_RING();
2354
2355
/* set the mux and reset the halt bit */
2356
cp_me = 0xff;
2357
RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2358
2359
dev_priv->cp_running = 1;
2360
2361
}
2362
2363
void r600_do_cp_reset(drm_radeon_private_t *dev_priv)
2364
{
2365
u32 cur_read_ptr;
2366
DRM_DEBUG("\n");
2367
2368
cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
2369
RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
2370
SET_RING_HEAD(dev_priv, cur_read_ptr);
2371
dev_priv->ring.tail = cur_read_ptr;
2372
}
2373
2374
void r600_do_cp_stop(drm_radeon_private_t *dev_priv)
2375
{
2376
uint32_t cp_me;
2377
2378
DRM_DEBUG("\n");
2379
2380
cp_me = 0xff | R600_CP_ME_HALT;
2381
2382
RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2383
2384
dev_priv->cp_running = 0;
2385
}
2386
2387
int r600_cp_dispatch_indirect(struct drm_device *dev,
2388
struct drm_buf *buf, int start, int end)
2389
{
2390
drm_radeon_private_t *dev_priv = dev->dev_private;
2391
RING_LOCALS;
2392
2393
if (start != end) {
2394
unsigned long offset = (dev_priv->gart_buffers_offset
2395
+ buf->offset + start);
2396
int dwords = (end - start + 3) / sizeof(u32);
2397
2398
DRM_DEBUG("dwords:%d\n", dwords);
2399
DRM_DEBUG("offset 0x%lx\n", offset);
2400
2401
2402
/* Indirect buffer data must be a multiple of 16 dwords.
2403
* pad the data with a Type-2 CP packet.
2404
*/
2405
while (dwords & 0xf) {
2406
u32 *data = (u32 *)
2407
((char *)dev->agp_buffer_map->handle
2408
+ buf->offset + start);
2409
data[dwords++] = RADEON_CP_PACKET2;
2410
}
2411
2412
/* Fire off the indirect buffer */
2413
BEGIN_RING(4);
2414
OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));
2415
OUT_RING((offset & 0xfffffffc));
2416
OUT_RING((upper_32_bits(offset) & 0xff));
2417
OUT_RING(dwords);
2418
ADVANCE_RING();
2419
}
2420
2421
return 0;
2422
}
2423
2424
void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)
2425
{
2426
drm_radeon_private_t *dev_priv = dev->dev_private;
2427
struct drm_master *master = file_priv->master;
2428
struct drm_radeon_master_private *master_priv = master->driver_priv;
2429
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2430
int nbox = sarea_priv->nbox;
2431
struct drm_clip_rect *pbox = sarea_priv->boxes;
2432
int i, cpp, src_pitch, dst_pitch;
2433
uint64_t src, dst;
2434
RING_LOCALS;
2435
DRM_DEBUG("\n");
2436
2437
if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
2438
cpp = 4;
2439
else
2440
cpp = 2;
2441
2442
if (sarea_priv->pfCurrentPage == 0) {
2443
src_pitch = dev_priv->back_pitch;
2444
dst_pitch = dev_priv->front_pitch;
2445
src = dev_priv->back_offset + dev_priv->fb_location;
2446
dst = dev_priv->front_offset + dev_priv->fb_location;
2447
} else {
2448
src_pitch = dev_priv->front_pitch;
2449
dst_pitch = dev_priv->back_pitch;
2450
src = dev_priv->front_offset + dev_priv->fb_location;
2451
dst = dev_priv->back_offset + dev_priv->fb_location;
2452
}
2453
2454
if (r600_prepare_blit_copy(dev, file_priv)) {
2455
DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
2456
return;
2457
}
2458
for (i = 0; i < nbox; i++) {
2459
int x = pbox[i].x1;
2460
int y = pbox[i].y1;
2461
int w = pbox[i].x2 - x;
2462
int h = pbox[i].y2 - y;
2463
2464
DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
2465
2466
r600_blit_swap(dev,
2467
src, dst,
2468
x, y, x, y, w, h,
2469
src_pitch, dst_pitch, cpp);
2470
}
2471
r600_done_blit_copy(dev);
2472
2473
/* Increment the frame counter. The client-side 3D driver must
2474
* throttle the framerate by waiting for this value before
2475
* performing the swapbuffer ioctl.
2476
*/
2477
sarea_priv->last_frame++;
2478
2479
BEGIN_RING(3);
2480
R600_FRAME_AGE(sarea_priv->last_frame);
2481
ADVANCE_RING();
2482
}
2483
2484
int r600_cp_dispatch_texture(struct drm_device *dev,
2485
struct drm_file *file_priv,
2486
drm_radeon_texture_t *tex,
2487
drm_radeon_tex_image_t *image)
2488
{
2489
drm_radeon_private_t *dev_priv = dev->dev_private;
2490
struct drm_buf *buf;
2491
u32 *buffer;
2492
const u8 __user *data;
2493
int size, pass_size;
2494
u64 src_offset, dst_offset;
2495
2496
if (!radeon_check_offset(dev_priv, tex->offset)) {
2497
DRM_ERROR("Invalid destination offset\n");
2498
return -EINVAL;
2499
}
2500
2501
/* this might fail for zero-sized uploads - are those illegal? */
2502
if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
2503
DRM_ERROR("Invalid final destination offset\n");
2504
return -EINVAL;
2505
}
2506
2507
size = tex->height * tex->pitch;
2508
2509
if (size == 0)
2510
return 0;
2511
2512
dst_offset = tex->offset;
2513
2514
if (r600_prepare_blit_copy(dev, file_priv)) {
2515
DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
2516
return -EAGAIN;
2517
}
2518
do {
2519
data = (const u8 __user *)image->data;
2520
pass_size = size;
2521
2522
buf = radeon_freelist_get(dev);
2523
if (!buf) {
2524
DRM_DEBUG("EAGAIN\n");
2525
if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
2526
return -EFAULT;
2527
return -EAGAIN;
2528
}
2529
2530
if (pass_size > buf->total)
2531
pass_size = buf->total;
2532
2533
/* Dispatch the indirect buffer.
2534
*/
2535
buffer =
2536
(u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
2537
2538
if (DRM_COPY_FROM_USER(buffer, data, pass_size)) {
2539
DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
2540
return -EFAULT;
2541
}
2542
2543
buf->file_priv = file_priv;
2544
buf->used = pass_size;
2545
src_offset = dev_priv->gart_buffers_offset + buf->offset;
2546
2547
r600_blit_copy(dev, src_offset, dst_offset, pass_size);
2548
2549
radeon_cp_discard_buffer(dev, file_priv->master, buf);
2550
2551
/* Update the input parameters for next time */
2552
image->data = (const u8 __user *)image->data + pass_size;
2553
dst_offset += pass_size;
2554
size -= pass_size;
2555
} while (size > 0);
2556
r600_done_blit_copy(dev);
2557
2558
return 0;
2559
}
2560
2561
/*
2562
* Legacy cs ioctl
2563
*/
2564
static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)
2565
{
2566
/* FIXME: check if wrap affect last reported wrap & sequence */
2567
radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;
2568
if (!radeon->cs_id_scnt) {
2569
/* increment wrap counter */
2570
radeon->cs_id_wcnt += 0x01000000;
2571
/* valid sequence counter start at 1 */
2572
radeon->cs_id_scnt = 1;
2573
}
2574
return (radeon->cs_id_scnt | radeon->cs_id_wcnt);
2575
}
2576
2577
static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)
2578
{
2579
RING_LOCALS;
2580
2581
*id = radeon_cs_id_get(dev_priv);
2582
2583
/* SCRATCH 2 */
2584
BEGIN_RING(3);
2585
R600_CLEAR_AGE(*id);
2586
ADVANCE_RING();
2587
COMMIT_RING();
2588
}
2589
2590
static int r600_ib_get(struct drm_device *dev,
2591
struct drm_file *fpriv,
2592
struct drm_buf **buffer)
2593
{
2594
struct drm_buf *buf;
2595
2596
*buffer = NULL;
2597
buf = radeon_freelist_get(dev);
2598
if (!buf) {
2599
return -EBUSY;
2600
}
2601
buf->file_priv = fpriv;
2602
*buffer = buf;
2603
return 0;
2604
}
2605
2606
static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,
2607
struct drm_file *fpriv, int l, int r)
2608
{
2609
drm_radeon_private_t *dev_priv = dev->dev_private;
2610
2611
if (buf) {
2612
if (!r)
2613
r600_cp_dispatch_indirect(dev, buf, 0, l * 4);
2614
radeon_cp_discard_buffer(dev, fpriv->master, buf);
2615
COMMIT_RING();
2616
}
2617
}
2618
2619
int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
2620
{
2621
struct drm_radeon_private *dev_priv = dev->dev_private;
2622
struct drm_radeon_cs *cs = data;
2623
struct drm_buf *buf;
2624
unsigned family;
2625
int l, r = 0;
2626
u32 *ib, cs_id = 0;
2627
2628
if (dev_priv == NULL) {
2629
DRM_ERROR("called with no initialization\n");
2630
return -EINVAL;
2631
}
2632
family = dev_priv->flags & RADEON_FAMILY_MASK;
2633
if (family < CHIP_R600) {
2634
DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");
2635
return -EINVAL;
2636
}
2637
mutex_lock(&dev_priv->cs_mutex);
2638
/* get ib */
2639
r = r600_ib_get(dev, fpriv, &buf);
2640
if (r) {
2641
DRM_ERROR("ib_get failed\n");
2642
goto out;
2643
}
2644
ib = dev->agp_buffer_map->handle + buf->offset;
2645
/* now parse command stream */
2646
r = r600_cs_legacy(dev, data, fpriv, family, ib, &l);
2647
if (r) {
2648
goto out;
2649
}
2650
2651
out:
2652
r600_ib_free(dev, buf, fpriv, l, r);
2653
/* emit cs id sequence */
2654
r600_cs_id_emit(dev_priv, &cs_id);
2655
cs->cs_id = cs_id;
2656
mutex_unlock(&dev_priv->cs_mutex);
2657
return r;
2658
}
2659
2660
void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size)
2661
{
2662
struct drm_radeon_private *dev_priv = dev->dev_private;
2663
2664
*npipes = dev_priv->r600_npipes;
2665
*nbanks = dev_priv->r600_nbanks;
2666
*group_size = dev_priv->r600_group_size;
2667
}
2668
2669