Path: blob/master/drivers/gpu/drm/radeon/r600_cp.c
15113 views
/*1* Copyright 2008-2009 Advanced Micro Devices, Inc.2* Copyright 2008 Red Hat Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR19* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,20* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER21* DEALINGS IN THE SOFTWARE.22*23* Authors:24* Dave Airlie <[email protected]>25* Alex Deucher <[email protected]>26*/2728#include "drmP.h"29#include "drm.h"30#include "radeon_drm.h"31#include "radeon_drv.h"3233#define PFP_UCODE_SIZE 57634#define PM4_UCODE_SIZE 179235#define R700_PFP_UCODE_SIZE 84836#define R700_PM4_UCODE_SIZE 13603738/* Firmware Names */39MODULE_FIRMWARE("radeon/R600_pfp.bin");40MODULE_FIRMWARE("radeon/R600_me.bin");41MODULE_FIRMWARE("radeon/RV610_pfp.bin");42MODULE_FIRMWARE("radeon/RV610_me.bin");43MODULE_FIRMWARE("radeon/RV630_pfp.bin");44MODULE_FIRMWARE("radeon/RV630_me.bin");45MODULE_FIRMWARE("radeon/RV620_pfp.bin");46MODULE_FIRMWARE("radeon/RV620_me.bin");47MODULE_FIRMWARE("radeon/RV635_pfp.bin");48MODULE_FIRMWARE("radeon/RV635_me.bin");49MODULE_FIRMWARE("radeon/RV670_pfp.bin");50MODULE_FIRMWARE("radeon/RV670_me.bin");51MODULE_FIRMWARE("radeon/RS780_pfp.bin");52MODULE_FIRMWARE("radeon/RS780_me.bin");53MODULE_FIRMWARE("radeon/RV770_pfp.bin");54MODULE_FIRMWARE("radeon/RV770_me.bin");55MODULE_FIRMWARE("radeon/RV730_pfp.bin");56MODULE_FIRMWARE("radeon/RV730_me.bin");57MODULE_FIRMWARE("radeon/RV710_pfp.bin");58MODULE_FIRMWARE("radeon/RV710_me.bin");596061int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,62unsigned family, u32 *ib, int *l);63void r600_cs_legacy_init(void);646566# define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */67# define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1))6869#define R600_PTE_VALID (1 << 0)70#define R600_PTE_SYSTEM (1 << 1)71#define R600_PTE_SNOOPED (1 << 2)72#define R600_PTE_READABLE (1 << 5)73#define R600_PTE_WRITEABLE (1 << 6)7475/* MAX values used for gfx init */76#define R6XX_MAX_SH_GPRS 25677#define R6XX_MAX_TEMP_GPRS 1678#define R6XX_MAX_SH_THREADS 25679#define R6XX_MAX_SH_STACK_ENTRIES 409680#define R6XX_MAX_BACKENDS 881#define R6XX_MAX_BACKENDS_MASK 0xff82#define R6XX_MAX_SIMDS 883#define R6XX_MAX_SIMDS_MASK 0xff84#define R6XX_MAX_PIPES 885#define R6XX_MAX_PIPES_MASK 0xff8687#define R7XX_MAX_SH_GPRS 25688#define R7XX_MAX_TEMP_GPRS 1689#define R7XX_MAX_SH_THREADS 25690#define R7XX_MAX_SH_STACK_ENTRIES 409691#define R7XX_MAX_BACKENDS 892#define R7XX_MAX_BACKENDS_MASK 0xff93#define R7XX_MAX_SIMDS 1694#define R7XX_MAX_SIMDS_MASK 0xffff95#define R7XX_MAX_PIPES 896#define R7XX_MAX_PIPES_MASK 0xff9798static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)99{100int i;101102dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;103104for (i = 0; i < dev_priv->usec_timeout; i++) {105int slots;106if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)107slots = (RADEON_READ(R600_GRBM_STATUS)108& R700_CMDFIFO_AVAIL_MASK);109else110slots = (RADEON_READ(R600_GRBM_STATUS)111& R600_CMDFIFO_AVAIL_MASK);112if (slots >= entries)113return 0;114DRM_UDELAY(1);115}116DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",117RADEON_READ(R600_GRBM_STATUS),118RADEON_READ(R600_GRBM_STATUS2));119120return -EBUSY;121}122123static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)124{125int i, ret;126127dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;128129if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)130ret = r600_do_wait_for_fifo(dev_priv, 8);131else132ret = r600_do_wait_for_fifo(dev_priv, 16);133if (ret)134return ret;135for (i = 0; i < dev_priv->usec_timeout; i++) {136if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))137return 0;138DRM_UDELAY(1);139}140DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",141RADEON_READ(R600_GRBM_STATUS),142RADEON_READ(R600_GRBM_STATUS2));143144return -EBUSY;145}146147void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)148{149struct drm_sg_mem *entry = dev->sg;150int max_pages;151int pages;152int i;153154if (!entry)155return;156157if (gart_info->bus_addr) {158max_pages = (gart_info->table_size / sizeof(u64));159pages = (entry->pages <= max_pages)160? entry->pages : max_pages;161162for (i = 0; i < pages; i++) {163if (!entry->busaddr[i])164break;165pci_unmap_page(dev->pdev, entry->busaddr[i],166PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);167}168if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)169gart_info->bus_addr = 0;170}171}172173/* R600 has page table setup */174int r600_page_table_init(struct drm_device *dev)175{176drm_radeon_private_t *dev_priv = dev->dev_private;177struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;178struct drm_local_map *map = &gart_info->mapping;179struct drm_sg_mem *entry = dev->sg;180int ret = 0;181int i, j;182int pages;183u64 page_base;184dma_addr_t entry_addr;185int max_ati_pages, max_real_pages, gart_idx;186187/* okay page table is available - lets rock */188max_ati_pages = (gart_info->table_size / sizeof(u64));189max_real_pages = max_ati_pages / (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE);190191pages = (entry->pages <= max_real_pages) ?192entry->pages : max_real_pages;193194memset_io((void __iomem *)map->handle, 0, max_ati_pages * sizeof(u64));195196gart_idx = 0;197for (i = 0; i < pages; i++) {198entry->busaddr[i] = pci_map_page(dev->pdev,199entry->pagelist[i], 0,200PAGE_SIZE,201PCI_DMA_BIDIRECTIONAL);202if (pci_dma_mapping_error(dev->pdev, entry->busaddr[i])) {203DRM_ERROR("unable to map PCIGART pages!\n");204r600_page_table_cleanup(dev, gart_info);205goto done;206}207entry_addr = entry->busaddr[i];208for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {209page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;210page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;211page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;212213DRM_WRITE64(map, gart_idx * sizeof(u64), page_base);214215gart_idx++;216217if ((i % 128) == 0)218DRM_DEBUG("page entry %d: 0x%016llx\n",219i, (unsigned long long)page_base);220entry_addr += ATI_PCIGART_PAGE_SIZE;221}222}223ret = 1;224done:225return ret;226}227228static void r600_vm_flush_gart_range(struct drm_device *dev)229{230drm_radeon_private_t *dev_priv = dev->dev_private;231u32 resp, countdown = 1000;232RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);233RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);234RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);235236do {237resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);238countdown--;239DRM_UDELAY(1);240} while (((resp & 0xf0) == 0) && countdown);241}242243static void r600_vm_init(struct drm_device *dev)244{245drm_radeon_private_t *dev_priv = dev->dev_private;246/* initialise the VM to use the page table we constructed up there */247u32 vm_c0, i;248u32 mc_rd_a;249u32 vm_l2_cntl, vm_l2_cntl3;250/* okay set up the PCIE aperture type thingo */251RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);252RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);253RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);254255/* setup MC RD a */256mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |257R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |258R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;259260RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);261RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);262263RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);264RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);265266RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);267RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);268269RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);270RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);271272RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);273RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);274275RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);276RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);277278RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);279RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);280281vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;282vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);283RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);284285RADEON_WRITE(R600_VM_L2_CNTL2, 0);286vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) |287R600_VM_L2_CNTL3_BANK_SELECT_1(1) |288R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2));289RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);290291vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;292293RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);294295vm_c0 &= ~R600_VM_ENABLE_CONTEXT;296297/* disable all other contexts */298for (i = 1; i < 8; i++)299RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);300301RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);302RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);303RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);304305r600_vm_flush_gart_range(dev);306}307308static int r600_cp_init_microcode(drm_radeon_private_t *dev_priv)309{310struct platform_device *pdev;311const char *chip_name;312size_t pfp_req_size, me_req_size;313char fw_name[30];314int err;315316pdev = platform_device_register_simple("r600_cp", 0, NULL, 0);317err = IS_ERR(pdev);318if (err) {319printk(KERN_ERR "r600_cp: Failed to register firmware\n");320return -EINVAL;321}322323switch (dev_priv->flags & RADEON_FAMILY_MASK) {324case CHIP_R600: chip_name = "R600"; break;325case CHIP_RV610: chip_name = "RV610"; break;326case CHIP_RV630: chip_name = "RV630"; break;327case CHIP_RV620: chip_name = "RV620"; break;328case CHIP_RV635: chip_name = "RV635"; break;329case CHIP_RV670: chip_name = "RV670"; break;330case CHIP_RS780:331case CHIP_RS880: chip_name = "RS780"; break;332case CHIP_RV770: chip_name = "RV770"; break;333case CHIP_RV730:334case CHIP_RV740: chip_name = "RV730"; break;335case CHIP_RV710: chip_name = "RV710"; break;336default: BUG();337}338339if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {340pfp_req_size = R700_PFP_UCODE_SIZE * 4;341me_req_size = R700_PM4_UCODE_SIZE * 4;342} else {343pfp_req_size = PFP_UCODE_SIZE * 4;344me_req_size = PM4_UCODE_SIZE * 12;345}346347DRM_INFO("Loading %s CP Microcode\n", chip_name);348349snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);350err = request_firmware(&dev_priv->pfp_fw, fw_name, &pdev->dev);351if (err)352goto out;353if (dev_priv->pfp_fw->size != pfp_req_size) {354printk(KERN_ERR355"r600_cp: Bogus length %zu in firmware \"%s\"\n",356dev_priv->pfp_fw->size, fw_name);357err = -EINVAL;358goto out;359}360361snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);362err = request_firmware(&dev_priv->me_fw, fw_name, &pdev->dev);363if (err)364goto out;365if (dev_priv->me_fw->size != me_req_size) {366printk(KERN_ERR367"r600_cp: Bogus length %zu in firmware \"%s\"\n",368dev_priv->me_fw->size, fw_name);369err = -EINVAL;370}371out:372platform_device_unregister(pdev);373374if (err) {375if (err != -EINVAL)376printk(KERN_ERR377"r600_cp: Failed to load firmware \"%s\"\n",378fw_name);379release_firmware(dev_priv->pfp_fw);380dev_priv->pfp_fw = NULL;381release_firmware(dev_priv->me_fw);382dev_priv->me_fw = NULL;383}384return err;385}386387static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)388{389const __be32 *fw_data;390int i;391392if (!dev_priv->me_fw || !dev_priv->pfp_fw)393return;394395r600_do_cp_stop(dev_priv);396397RADEON_WRITE(R600_CP_RB_CNTL,398#ifdef __BIG_ENDIAN399R600_BUF_SWAP_32BIT |400#endif401R600_RB_NO_UPDATE |402R600_RB_BLKSZ(15) |403R600_RB_BUFSZ(3));404405RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);406RADEON_READ(R600_GRBM_SOFT_RESET);407DRM_UDELAY(15000);408RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);409410fw_data = (const __be32 *)dev_priv->me_fw->data;411RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);412for (i = 0; i < PM4_UCODE_SIZE * 3; i++)413RADEON_WRITE(R600_CP_ME_RAM_DATA,414be32_to_cpup(fw_data++));415416fw_data = (const __be32 *)dev_priv->pfp_fw->data;417RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);418for (i = 0; i < PFP_UCODE_SIZE; i++)419RADEON_WRITE(R600_CP_PFP_UCODE_DATA,420be32_to_cpup(fw_data++));421422RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);423RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);424RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);425426}427428static void r700_vm_init(struct drm_device *dev)429{430drm_radeon_private_t *dev_priv = dev->dev_private;431/* initialise the VM to use the page table we constructed up there */432u32 vm_c0, i;433u32 mc_vm_md_l1;434u32 vm_l2_cntl, vm_l2_cntl3;435/* okay set up the PCIE aperture type thingo */436RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);437RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);438RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);439440mc_vm_md_l1 = R700_ENABLE_L1_TLB |441R700_ENABLE_L1_FRAGMENT_PROCESSING |442R700_SYSTEM_ACCESS_MODE_IN_SYS |443R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |444R700_EFFECTIVE_L1_TLB_SIZE(5) |445R700_EFFECTIVE_L1_QUEUE_SIZE(5);446447RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);448RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);449RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);450RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);451RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);452RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);453RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);454455vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;456vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);457RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);458459RADEON_WRITE(R600_VM_L2_CNTL2, 0);460vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);461RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);462463vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;464465RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);466467vm_c0 &= ~R600_VM_ENABLE_CONTEXT;468469/* disable all other contexts */470for (i = 1; i < 8; i++)471RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);472473RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);474RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);475RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);476477r600_vm_flush_gart_range(dev);478}479480static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)481{482const __be32 *fw_data;483int i;484485if (!dev_priv->me_fw || !dev_priv->pfp_fw)486return;487488r600_do_cp_stop(dev_priv);489490RADEON_WRITE(R600_CP_RB_CNTL,491#ifdef __BIG_ENDIAN492R600_BUF_SWAP_32BIT |493#endif494R600_RB_NO_UPDATE |495R600_RB_BLKSZ(15) |496R600_RB_BUFSZ(3));497498RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);499RADEON_READ(R600_GRBM_SOFT_RESET);500DRM_UDELAY(15000);501RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);502503fw_data = (const __be32 *)dev_priv->pfp_fw->data;504RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);505for (i = 0; i < R700_PFP_UCODE_SIZE; i++)506RADEON_WRITE(R600_CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));507RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);508509fw_data = (const __be32 *)dev_priv->me_fw->data;510RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);511for (i = 0; i < R700_PM4_UCODE_SIZE; i++)512RADEON_WRITE(R600_CP_ME_RAM_DATA, be32_to_cpup(fw_data++));513RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);514515RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);516RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);517RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);518519}520521static void r600_test_writeback(drm_radeon_private_t *dev_priv)522{523u32 tmp;524525/* Start with assuming that writeback doesn't work */526dev_priv->writeback_works = 0;527528/* Writeback doesn't seem to work everywhere, test it here and possibly529* enable it if it appears to work530*/531radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);532533RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);534535for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {536u32 val;537538val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1));539if (val == 0xdeadbeef)540break;541DRM_UDELAY(1);542}543544if (tmp < dev_priv->usec_timeout) {545dev_priv->writeback_works = 1;546DRM_INFO("writeback test succeeded in %d usecs\n", tmp);547} else {548dev_priv->writeback_works = 0;549DRM_INFO("writeback test failed\n");550}551if (radeon_no_wb == 1) {552dev_priv->writeback_works = 0;553DRM_INFO("writeback forced off\n");554}555556if (!dev_priv->writeback_works) {557/* Disable writeback to avoid unnecessary bus master transfer */558RADEON_WRITE(R600_CP_RB_CNTL,559#ifdef __BIG_ENDIAN560R600_BUF_SWAP_32BIT |561#endif562RADEON_READ(R600_CP_RB_CNTL) |563R600_RB_NO_UPDATE);564RADEON_WRITE(R600_SCRATCH_UMSK, 0);565}566}567568int r600_do_engine_reset(struct drm_device *dev)569{570drm_radeon_private_t *dev_priv = dev->dev_private;571u32 cp_ptr, cp_me_cntl, cp_rb_cntl;572573DRM_INFO("Resetting GPU\n");574575cp_ptr = RADEON_READ(R600_CP_RB_WPTR);576cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);577RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);578579RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);580RADEON_READ(R600_GRBM_SOFT_RESET);581DRM_UDELAY(50);582RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);583RADEON_READ(R600_GRBM_SOFT_RESET);584585RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);586cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);587RADEON_WRITE(R600_CP_RB_CNTL,588#ifdef __BIG_ENDIAN589R600_BUF_SWAP_32BIT |590#endif591R600_RB_RPTR_WR_ENA);592593RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);594RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);595RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);596RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);597598/* Reset the CP ring */599r600_do_cp_reset(dev_priv);600601/* The CP is no longer running after an engine reset */602dev_priv->cp_running = 0;603604/* Reset any pending vertex, indirect buffers */605radeon_freelist_reset(dev);606607return 0;608609}610611static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,612u32 num_backends,613u32 backend_disable_mask)614{615u32 backend_map = 0;616u32 enabled_backends_mask;617u32 enabled_backends_count;618u32 cur_pipe;619u32 swizzle_pipe[R6XX_MAX_PIPES];620u32 cur_backend;621u32 i;622623if (num_tile_pipes > R6XX_MAX_PIPES)624num_tile_pipes = R6XX_MAX_PIPES;625if (num_tile_pipes < 1)626num_tile_pipes = 1;627if (num_backends > R6XX_MAX_BACKENDS)628num_backends = R6XX_MAX_BACKENDS;629if (num_backends < 1)630num_backends = 1;631632enabled_backends_mask = 0;633enabled_backends_count = 0;634for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {635if (((backend_disable_mask >> i) & 1) == 0) {636enabled_backends_mask |= (1 << i);637++enabled_backends_count;638}639if (enabled_backends_count == num_backends)640break;641}642643if (enabled_backends_count == 0) {644enabled_backends_mask = 1;645enabled_backends_count = 1;646}647648if (enabled_backends_count != num_backends)649num_backends = enabled_backends_count;650651memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);652switch (num_tile_pipes) {653case 1:654swizzle_pipe[0] = 0;655break;656case 2:657swizzle_pipe[0] = 0;658swizzle_pipe[1] = 1;659break;660case 3:661swizzle_pipe[0] = 0;662swizzle_pipe[1] = 1;663swizzle_pipe[2] = 2;664break;665case 4:666swizzle_pipe[0] = 0;667swizzle_pipe[1] = 1;668swizzle_pipe[2] = 2;669swizzle_pipe[3] = 3;670break;671case 5:672swizzle_pipe[0] = 0;673swizzle_pipe[1] = 1;674swizzle_pipe[2] = 2;675swizzle_pipe[3] = 3;676swizzle_pipe[4] = 4;677break;678case 6:679swizzle_pipe[0] = 0;680swizzle_pipe[1] = 2;681swizzle_pipe[2] = 4;682swizzle_pipe[3] = 5;683swizzle_pipe[4] = 1;684swizzle_pipe[5] = 3;685break;686case 7:687swizzle_pipe[0] = 0;688swizzle_pipe[1] = 2;689swizzle_pipe[2] = 4;690swizzle_pipe[3] = 6;691swizzle_pipe[4] = 1;692swizzle_pipe[5] = 3;693swizzle_pipe[6] = 5;694break;695case 8:696swizzle_pipe[0] = 0;697swizzle_pipe[1] = 2;698swizzle_pipe[2] = 4;699swizzle_pipe[3] = 6;700swizzle_pipe[4] = 1;701swizzle_pipe[5] = 3;702swizzle_pipe[6] = 5;703swizzle_pipe[7] = 7;704break;705}706707cur_backend = 0;708for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {709while (((1 << cur_backend) & enabled_backends_mask) == 0)710cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;711712backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));713714cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;715}716717return backend_map;718}719720static int r600_count_pipe_bits(uint32_t val)721{722int i, ret = 0;723for (i = 0; i < 32; i++) {724ret += val & 1;725val >>= 1;726}727return ret;728}729730static void r600_gfx_init(struct drm_device *dev,731drm_radeon_private_t *dev_priv)732{733int i, j, num_qd_pipes;734u32 sx_debug_1;735u32 tc_cntl;736u32 arb_pop;737u32 num_gs_verts_per_thread;738u32 vgt_gs_per_es;739u32 gs_prim_buffer_depth = 0;740u32 sq_ms_fifo_sizes;741u32 sq_config;742u32 sq_gpr_resource_mgmt_1 = 0;743u32 sq_gpr_resource_mgmt_2 = 0;744u32 sq_thread_resource_mgmt = 0;745u32 sq_stack_resource_mgmt_1 = 0;746u32 sq_stack_resource_mgmt_2 = 0;747u32 hdp_host_path_cntl;748u32 backend_map;749u32 gb_tiling_config = 0;750u32 cc_rb_backend_disable;751u32 cc_gc_shader_pipe_config;752u32 ramcfg;753754/* setup chip specs */755switch (dev_priv->flags & RADEON_FAMILY_MASK) {756case CHIP_R600:757dev_priv->r600_max_pipes = 4;758dev_priv->r600_max_tile_pipes = 8;759dev_priv->r600_max_simds = 4;760dev_priv->r600_max_backends = 4;761dev_priv->r600_max_gprs = 256;762dev_priv->r600_max_threads = 192;763dev_priv->r600_max_stack_entries = 256;764dev_priv->r600_max_hw_contexts = 8;765dev_priv->r600_max_gs_threads = 16;766dev_priv->r600_sx_max_export_size = 128;767dev_priv->r600_sx_max_export_pos_size = 16;768dev_priv->r600_sx_max_export_smx_size = 128;769dev_priv->r600_sq_num_cf_insts = 2;770break;771case CHIP_RV630:772case CHIP_RV635:773dev_priv->r600_max_pipes = 2;774dev_priv->r600_max_tile_pipes = 2;775dev_priv->r600_max_simds = 3;776dev_priv->r600_max_backends = 1;777dev_priv->r600_max_gprs = 128;778dev_priv->r600_max_threads = 192;779dev_priv->r600_max_stack_entries = 128;780dev_priv->r600_max_hw_contexts = 8;781dev_priv->r600_max_gs_threads = 4;782dev_priv->r600_sx_max_export_size = 128;783dev_priv->r600_sx_max_export_pos_size = 16;784dev_priv->r600_sx_max_export_smx_size = 128;785dev_priv->r600_sq_num_cf_insts = 2;786break;787case CHIP_RV610:788case CHIP_RS780:789case CHIP_RS880:790case CHIP_RV620:791dev_priv->r600_max_pipes = 1;792dev_priv->r600_max_tile_pipes = 1;793dev_priv->r600_max_simds = 2;794dev_priv->r600_max_backends = 1;795dev_priv->r600_max_gprs = 128;796dev_priv->r600_max_threads = 192;797dev_priv->r600_max_stack_entries = 128;798dev_priv->r600_max_hw_contexts = 4;799dev_priv->r600_max_gs_threads = 4;800dev_priv->r600_sx_max_export_size = 128;801dev_priv->r600_sx_max_export_pos_size = 16;802dev_priv->r600_sx_max_export_smx_size = 128;803dev_priv->r600_sq_num_cf_insts = 1;804break;805case CHIP_RV670:806dev_priv->r600_max_pipes = 4;807dev_priv->r600_max_tile_pipes = 4;808dev_priv->r600_max_simds = 4;809dev_priv->r600_max_backends = 4;810dev_priv->r600_max_gprs = 192;811dev_priv->r600_max_threads = 192;812dev_priv->r600_max_stack_entries = 256;813dev_priv->r600_max_hw_contexts = 8;814dev_priv->r600_max_gs_threads = 16;815dev_priv->r600_sx_max_export_size = 128;816dev_priv->r600_sx_max_export_pos_size = 16;817dev_priv->r600_sx_max_export_smx_size = 128;818dev_priv->r600_sq_num_cf_insts = 2;819break;820default:821break;822}823824/* Initialize HDP */825j = 0;826for (i = 0; i < 32; i++) {827RADEON_WRITE((0x2c14 + j), 0x00000000);828RADEON_WRITE((0x2c18 + j), 0x00000000);829RADEON_WRITE((0x2c1c + j), 0x00000000);830RADEON_WRITE((0x2c20 + j), 0x00000000);831RADEON_WRITE((0x2c24 + j), 0x00000000);832j += 0x18;833}834835RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));836837/* setup tiling, simd, pipe config */838ramcfg = RADEON_READ(R600_RAMCFG);839840switch (dev_priv->r600_max_tile_pipes) {841case 1:842gb_tiling_config |= R600_PIPE_TILING(0);843break;844case 2:845gb_tiling_config |= R600_PIPE_TILING(1);846break;847case 4:848gb_tiling_config |= R600_PIPE_TILING(2);849break;850case 8:851gb_tiling_config |= R600_PIPE_TILING(3);852break;853default:854break;855}856857gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);858859gb_tiling_config |= R600_GROUP_SIZE(0);860861if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {862gb_tiling_config |= R600_ROW_TILING(3);863gb_tiling_config |= R600_SAMPLE_SPLIT(3);864} else {865gb_tiling_config |=866R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));867gb_tiling_config |=868R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));869}870871gb_tiling_config |= R600_BANK_SWAPS(1);872873cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;874cc_rb_backend_disable |=875R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);876877cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;878cc_gc_shader_pipe_config |=879R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);880cc_gc_shader_pipe_config |=881R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);882883backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,884(R6XX_MAX_BACKENDS -885r600_count_pipe_bits((cc_rb_backend_disable &886R6XX_MAX_BACKENDS_MASK) >> 16)),887(cc_rb_backend_disable >> 16));888gb_tiling_config |= R600_BACKEND_MAP(backend_map);889890RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config);891RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));892RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));893if (gb_tiling_config & 0xc0) {894dev_priv->r600_group_size = 512;895} else {896dev_priv->r600_group_size = 256;897}898dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);899if (gb_tiling_config & 0x30) {900dev_priv->r600_nbanks = 8;901} else {902dev_priv->r600_nbanks = 4;903}904905RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);906RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);907RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);908909num_qd_pipes =910R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);911RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);912RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);913914/* set HW defaults for 3D engine */915RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |916R600_ROQ_IB2_START(0x2b)));917918RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |919R600_ROQ_END(0x40)));920921RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |922R600_SYNC_GRADIENT |923R600_SYNC_WALKER |924R600_SYNC_ALIGNER));925926if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)927RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);928929sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);930sx_debug_1 |= R600_SMX_EVENT_RELEASE;931if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))932sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;933RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);934935if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||936((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||937((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||938((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||939((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||940((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))941RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);942else943RADEON_WRITE(R600_DB_DEBUG, 0);944945RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |946R600_DEPTH_FLUSH(16) |947R600_DEPTH_PENDING_FREE(4) |948R600_DEPTH_CACHELINE_FREE(16)));949RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);950RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);951952RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));953RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));954955sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);956if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||957((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||958((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||959((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {960sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |961R600_FETCH_FIFO_HIWATER(0xa) |962R600_DONE_FIFO_HIWATER(0xe0) |963R600_ALU_UPDATE_FIFO_HIWATER(0x8));964} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||965((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {966sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);967sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);968}969RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);970971/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT972* should be adjusted as needed by the 2D/3D drivers. This just sets default values973*/974sq_config = RADEON_READ(R600_SQ_CONFIG);975sq_config &= ~(R600_PS_PRIO(3) |976R600_VS_PRIO(3) |977R600_GS_PRIO(3) |978R600_ES_PRIO(3));979sq_config |= (R600_DX9_CONSTS |980R600_VC_ENABLE |981R600_PS_PRIO(0) |982R600_VS_PRIO(1) |983R600_GS_PRIO(2) |984R600_ES_PRIO(3));985986if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {987sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |988R600_NUM_VS_GPRS(124) |989R600_NUM_CLAUSE_TEMP_GPRS(4));990sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |991R600_NUM_ES_GPRS(0));992sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |993R600_NUM_VS_THREADS(48) |994R600_NUM_GS_THREADS(4) |995R600_NUM_ES_THREADS(4));996sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |997R600_NUM_VS_STACK_ENTRIES(128));998sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |999R600_NUM_ES_STACK_ENTRIES(0));1000} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||1001((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||1002((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||1003((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {1004/* no vertex cache */1005sq_config &= ~R600_VC_ENABLE;10061007sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |1008R600_NUM_VS_GPRS(44) |1009R600_NUM_CLAUSE_TEMP_GPRS(2));1010sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |1011R600_NUM_ES_GPRS(17));1012sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |1013R600_NUM_VS_THREADS(78) |1014R600_NUM_GS_THREADS(4) |1015R600_NUM_ES_THREADS(31));1016sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |1017R600_NUM_VS_STACK_ENTRIES(40));1018sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |1019R600_NUM_ES_STACK_ENTRIES(16));1020} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||1021((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) {1022sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |1023R600_NUM_VS_GPRS(44) |1024R600_NUM_CLAUSE_TEMP_GPRS(2));1025sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |1026R600_NUM_ES_GPRS(18));1027sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |1028R600_NUM_VS_THREADS(78) |1029R600_NUM_GS_THREADS(4) |1030R600_NUM_ES_THREADS(31));1031sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |1032R600_NUM_VS_STACK_ENTRIES(40));1033sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |1034R600_NUM_ES_STACK_ENTRIES(16));1035} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {1036sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |1037R600_NUM_VS_GPRS(44) |1038R600_NUM_CLAUSE_TEMP_GPRS(2));1039sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |1040R600_NUM_ES_GPRS(17));1041sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |1042R600_NUM_VS_THREADS(78) |1043R600_NUM_GS_THREADS(4) |1044R600_NUM_ES_THREADS(31));1045sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |1046R600_NUM_VS_STACK_ENTRIES(64));1047sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |1048R600_NUM_ES_STACK_ENTRIES(64));1049}10501051RADEON_WRITE(R600_SQ_CONFIG, sq_config);1052RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1);1053RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2);1054RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);1055RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);1056RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);10571058if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||1059((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||1060((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||1061((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))1062RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));1063else1064RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));10651066RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |1067R600_S0_Y(0x4) |1068R600_S1_X(0x4) |1069R600_S1_Y(0xc)));1070RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |1071R600_S0_Y(0xe) |1072R600_S1_X(0x2) |1073R600_S1_Y(0x2) |1074R600_S2_X(0xa) |1075R600_S2_Y(0x6) |1076R600_S3_X(0x6) |1077R600_S3_Y(0xa)));1078RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |1079R600_S0_Y(0xb) |1080R600_S1_X(0x4) |1081R600_S1_Y(0xc) |1082R600_S2_X(0x1) |1083R600_S2_Y(0x6) |1084R600_S3_X(0xa) |1085R600_S3_Y(0xe)));1086RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |1087R600_S4_Y(0x1) |1088R600_S5_X(0x0) |1089R600_S5_Y(0x0) |1090R600_S6_X(0xb) |1091R600_S6_Y(0x4) |1092R600_S7_X(0x7) |1093R600_S7_Y(0x8)));109410951096switch (dev_priv->flags & RADEON_FAMILY_MASK) {1097case CHIP_R600:1098case CHIP_RV630:1099case CHIP_RV635:1100gs_prim_buffer_depth = 0;1101break;1102case CHIP_RV610:1103case CHIP_RS780:1104case CHIP_RS880:1105case CHIP_RV620:1106gs_prim_buffer_depth = 32;1107break;1108case CHIP_RV670:1109gs_prim_buffer_depth = 128;1110break;1111default:1112break;1113}11141115num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;1116vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;1117/* Max value for this is 256 */1118if (vgt_gs_per_es > 256)1119vgt_gs_per_es = 256;11201121RADEON_WRITE(R600_VGT_ES_PER_GS, 128);1122RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);1123RADEON_WRITE(R600_VGT_GS_PER_VS, 2);1124RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);11251126/* more default values. 2D/3D driver should adjust as needed */1127RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);1128RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);1129RADEON_WRITE(R600_SX_MISC, 0);1130RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);1131RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);1132RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);1133RADEON_WRITE(R600_SPI_INPUT_Z, 0);1134RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));1135RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);11361137/* clear render buffer base addresses */1138RADEON_WRITE(R600_CB_COLOR0_BASE, 0);1139RADEON_WRITE(R600_CB_COLOR1_BASE, 0);1140RADEON_WRITE(R600_CB_COLOR2_BASE, 0);1141RADEON_WRITE(R600_CB_COLOR3_BASE, 0);1142RADEON_WRITE(R600_CB_COLOR4_BASE, 0);1143RADEON_WRITE(R600_CB_COLOR5_BASE, 0);1144RADEON_WRITE(R600_CB_COLOR6_BASE, 0);1145RADEON_WRITE(R600_CB_COLOR7_BASE, 0);11461147switch (dev_priv->flags & RADEON_FAMILY_MASK) {1148case CHIP_RV610:1149case CHIP_RS780:1150case CHIP_RS880:1151case CHIP_RV620:1152tc_cntl = R600_TC_L2_SIZE(8);1153break;1154case CHIP_RV630:1155case CHIP_RV635:1156tc_cntl = R600_TC_L2_SIZE(4);1157break;1158case CHIP_R600:1159tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;1160break;1161default:1162tc_cntl = R600_TC_L2_SIZE(0);1163break;1164}11651166RADEON_WRITE(R600_TC_CNTL, tc_cntl);11671168hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);1169RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);11701171arb_pop = RADEON_READ(R600_ARB_POP);1172arb_pop |= R600_ENABLE_TC128;1173RADEON_WRITE(R600_ARB_POP, arb_pop);11741175RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);1176RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |1177R600_NUM_CLIP_SEQ(3)));1178RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));11791180}11811182static u32 r700_get_tile_pipe_to_backend_map(drm_radeon_private_t *dev_priv,1183u32 num_tile_pipes,1184u32 num_backends,1185u32 backend_disable_mask)1186{1187u32 backend_map = 0;1188u32 enabled_backends_mask;1189u32 enabled_backends_count;1190u32 cur_pipe;1191u32 swizzle_pipe[R7XX_MAX_PIPES];1192u32 cur_backend;1193u32 i;1194bool force_no_swizzle;11951196if (num_tile_pipes > R7XX_MAX_PIPES)1197num_tile_pipes = R7XX_MAX_PIPES;1198if (num_tile_pipes < 1)1199num_tile_pipes = 1;1200if (num_backends > R7XX_MAX_BACKENDS)1201num_backends = R7XX_MAX_BACKENDS;1202if (num_backends < 1)1203num_backends = 1;12041205enabled_backends_mask = 0;1206enabled_backends_count = 0;1207for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {1208if (((backend_disable_mask >> i) & 1) == 0) {1209enabled_backends_mask |= (1 << i);1210++enabled_backends_count;1211}1212if (enabled_backends_count == num_backends)1213break;1214}12151216if (enabled_backends_count == 0) {1217enabled_backends_mask = 1;1218enabled_backends_count = 1;1219}12201221if (enabled_backends_count != num_backends)1222num_backends = enabled_backends_count;12231224switch (dev_priv->flags & RADEON_FAMILY_MASK) {1225case CHIP_RV770:1226case CHIP_RV730:1227force_no_swizzle = false;1228break;1229case CHIP_RV710:1230case CHIP_RV740:1231default:1232force_no_swizzle = true;1233break;1234}12351236memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);1237switch (num_tile_pipes) {1238case 1:1239swizzle_pipe[0] = 0;1240break;1241case 2:1242swizzle_pipe[0] = 0;1243swizzle_pipe[1] = 1;1244break;1245case 3:1246if (force_no_swizzle) {1247swizzle_pipe[0] = 0;1248swizzle_pipe[1] = 1;1249swizzle_pipe[2] = 2;1250} else {1251swizzle_pipe[0] = 0;1252swizzle_pipe[1] = 2;1253swizzle_pipe[2] = 1;1254}1255break;1256case 4:1257if (force_no_swizzle) {1258swizzle_pipe[0] = 0;1259swizzle_pipe[1] = 1;1260swizzle_pipe[2] = 2;1261swizzle_pipe[3] = 3;1262} else {1263swizzle_pipe[0] = 0;1264swizzle_pipe[1] = 2;1265swizzle_pipe[2] = 3;1266swizzle_pipe[3] = 1;1267}1268break;1269case 5:1270if (force_no_swizzle) {1271swizzle_pipe[0] = 0;1272swizzle_pipe[1] = 1;1273swizzle_pipe[2] = 2;1274swizzle_pipe[3] = 3;1275swizzle_pipe[4] = 4;1276} else {1277swizzle_pipe[0] = 0;1278swizzle_pipe[1] = 2;1279swizzle_pipe[2] = 4;1280swizzle_pipe[3] = 1;1281swizzle_pipe[4] = 3;1282}1283break;1284case 6:1285if (force_no_swizzle) {1286swizzle_pipe[0] = 0;1287swizzle_pipe[1] = 1;1288swizzle_pipe[2] = 2;1289swizzle_pipe[3] = 3;1290swizzle_pipe[4] = 4;1291swizzle_pipe[5] = 5;1292} else {1293swizzle_pipe[0] = 0;1294swizzle_pipe[1] = 2;1295swizzle_pipe[2] = 4;1296swizzle_pipe[3] = 5;1297swizzle_pipe[4] = 3;1298swizzle_pipe[5] = 1;1299}1300break;1301case 7:1302if (force_no_swizzle) {1303swizzle_pipe[0] = 0;1304swizzle_pipe[1] = 1;1305swizzle_pipe[2] = 2;1306swizzle_pipe[3] = 3;1307swizzle_pipe[4] = 4;1308swizzle_pipe[5] = 5;1309swizzle_pipe[6] = 6;1310} else {1311swizzle_pipe[0] = 0;1312swizzle_pipe[1] = 2;1313swizzle_pipe[2] = 4;1314swizzle_pipe[3] = 6;1315swizzle_pipe[4] = 3;1316swizzle_pipe[5] = 1;1317swizzle_pipe[6] = 5;1318}1319break;1320case 8:1321if (force_no_swizzle) {1322swizzle_pipe[0] = 0;1323swizzle_pipe[1] = 1;1324swizzle_pipe[2] = 2;1325swizzle_pipe[3] = 3;1326swizzle_pipe[4] = 4;1327swizzle_pipe[5] = 5;1328swizzle_pipe[6] = 6;1329swizzle_pipe[7] = 7;1330} else {1331swizzle_pipe[0] = 0;1332swizzle_pipe[1] = 2;1333swizzle_pipe[2] = 4;1334swizzle_pipe[3] = 6;1335swizzle_pipe[4] = 3;1336swizzle_pipe[5] = 1;1337swizzle_pipe[6] = 7;1338swizzle_pipe[7] = 5;1339}1340break;1341}13421343cur_backend = 0;1344for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {1345while (((1 << cur_backend) & enabled_backends_mask) == 0)1346cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;13471348backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));13491350cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;1351}13521353return backend_map;1354}13551356static void r700_gfx_init(struct drm_device *dev,1357drm_radeon_private_t *dev_priv)1358{1359int i, j, num_qd_pipes;1360u32 ta_aux_cntl;1361u32 sx_debug_1;1362u32 smx_dc_ctl0;1363u32 db_debug3;1364u32 num_gs_verts_per_thread;1365u32 vgt_gs_per_es;1366u32 gs_prim_buffer_depth = 0;1367u32 sq_ms_fifo_sizes;1368u32 sq_config;1369u32 sq_thread_resource_mgmt;1370u32 hdp_host_path_cntl;1371u32 sq_dyn_gpr_size_simd_ab_0;1372u32 backend_map;1373u32 gb_tiling_config = 0;1374u32 cc_rb_backend_disable;1375u32 cc_gc_shader_pipe_config;1376u32 mc_arb_ramcfg;1377u32 db_debug4;13781379/* setup chip specs */1380switch (dev_priv->flags & RADEON_FAMILY_MASK) {1381case CHIP_RV770:1382dev_priv->r600_max_pipes = 4;1383dev_priv->r600_max_tile_pipes = 8;1384dev_priv->r600_max_simds = 10;1385dev_priv->r600_max_backends = 4;1386dev_priv->r600_max_gprs = 256;1387dev_priv->r600_max_threads = 248;1388dev_priv->r600_max_stack_entries = 512;1389dev_priv->r600_max_hw_contexts = 8;1390dev_priv->r600_max_gs_threads = 16 * 2;1391dev_priv->r600_sx_max_export_size = 128;1392dev_priv->r600_sx_max_export_pos_size = 16;1393dev_priv->r600_sx_max_export_smx_size = 112;1394dev_priv->r600_sq_num_cf_insts = 2;13951396dev_priv->r700_sx_num_of_sets = 7;1397dev_priv->r700_sc_prim_fifo_size = 0xF9;1398dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;1399dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;1400break;1401case CHIP_RV730:1402dev_priv->r600_max_pipes = 2;1403dev_priv->r600_max_tile_pipes = 4;1404dev_priv->r600_max_simds = 8;1405dev_priv->r600_max_backends = 2;1406dev_priv->r600_max_gprs = 128;1407dev_priv->r600_max_threads = 248;1408dev_priv->r600_max_stack_entries = 256;1409dev_priv->r600_max_hw_contexts = 8;1410dev_priv->r600_max_gs_threads = 16 * 2;1411dev_priv->r600_sx_max_export_size = 256;1412dev_priv->r600_sx_max_export_pos_size = 32;1413dev_priv->r600_sx_max_export_smx_size = 224;1414dev_priv->r600_sq_num_cf_insts = 2;14151416dev_priv->r700_sx_num_of_sets = 7;1417dev_priv->r700_sc_prim_fifo_size = 0xf9;1418dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;1419dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;1420if (dev_priv->r600_sx_max_export_pos_size > 16) {1421dev_priv->r600_sx_max_export_pos_size -= 16;1422dev_priv->r600_sx_max_export_smx_size += 16;1423}1424break;1425case CHIP_RV710:1426dev_priv->r600_max_pipes = 2;1427dev_priv->r600_max_tile_pipes = 2;1428dev_priv->r600_max_simds = 2;1429dev_priv->r600_max_backends = 1;1430dev_priv->r600_max_gprs = 256;1431dev_priv->r600_max_threads = 192;1432dev_priv->r600_max_stack_entries = 256;1433dev_priv->r600_max_hw_contexts = 4;1434dev_priv->r600_max_gs_threads = 8 * 2;1435dev_priv->r600_sx_max_export_size = 128;1436dev_priv->r600_sx_max_export_pos_size = 16;1437dev_priv->r600_sx_max_export_smx_size = 112;1438dev_priv->r600_sq_num_cf_insts = 1;14391440dev_priv->r700_sx_num_of_sets = 7;1441dev_priv->r700_sc_prim_fifo_size = 0x40;1442dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;1443dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;1444break;1445case CHIP_RV740:1446dev_priv->r600_max_pipes = 4;1447dev_priv->r600_max_tile_pipes = 4;1448dev_priv->r600_max_simds = 8;1449dev_priv->r600_max_backends = 4;1450dev_priv->r600_max_gprs = 256;1451dev_priv->r600_max_threads = 248;1452dev_priv->r600_max_stack_entries = 512;1453dev_priv->r600_max_hw_contexts = 8;1454dev_priv->r600_max_gs_threads = 16 * 2;1455dev_priv->r600_sx_max_export_size = 256;1456dev_priv->r600_sx_max_export_pos_size = 32;1457dev_priv->r600_sx_max_export_smx_size = 224;1458dev_priv->r600_sq_num_cf_insts = 2;14591460dev_priv->r700_sx_num_of_sets = 7;1461dev_priv->r700_sc_prim_fifo_size = 0x100;1462dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;1463dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;14641465if (dev_priv->r600_sx_max_export_pos_size > 16) {1466dev_priv->r600_sx_max_export_pos_size -= 16;1467dev_priv->r600_sx_max_export_smx_size += 16;1468}1469break;1470default:1471break;1472}14731474/* Initialize HDP */1475j = 0;1476for (i = 0; i < 32; i++) {1477RADEON_WRITE((0x2c14 + j), 0x00000000);1478RADEON_WRITE((0x2c18 + j), 0x00000000);1479RADEON_WRITE((0x2c1c + j), 0x00000000);1480RADEON_WRITE((0x2c20 + j), 0x00000000);1481RADEON_WRITE((0x2c24 + j), 0x00000000);1482j += 0x18;1483}14841485RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));14861487/* setup tiling, simd, pipe config */1488mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);14891490switch (dev_priv->r600_max_tile_pipes) {1491case 1:1492gb_tiling_config |= R600_PIPE_TILING(0);1493break;1494case 2:1495gb_tiling_config |= R600_PIPE_TILING(1);1496break;1497case 4:1498gb_tiling_config |= R600_PIPE_TILING(2);1499break;1500case 8:1501gb_tiling_config |= R600_PIPE_TILING(3);1502break;1503default:1504break;1505}15061507if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)1508gb_tiling_config |= R600_BANK_TILING(1);1509else1510gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);15111512gb_tiling_config |= R600_GROUP_SIZE(0);15131514if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {1515gb_tiling_config |= R600_ROW_TILING(3);1516gb_tiling_config |= R600_SAMPLE_SPLIT(3);1517} else {1518gb_tiling_config |=1519R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));1520gb_tiling_config |=1521R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));1522}15231524gb_tiling_config |= R600_BANK_SWAPS(1);15251526cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;1527cc_rb_backend_disable |=1528R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);15291530cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;1531cc_gc_shader_pipe_config |=1532R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);1533cc_gc_shader_pipe_config |=1534R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);15351536if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)1537backend_map = 0x28;1538else1539backend_map = r700_get_tile_pipe_to_backend_map(dev_priv,1540dev_priv->r600_max_tile_pipes,1541(R7XX_MAX_BACKENDS -1542r600_count_pipe_bits((cc_rb_backend_disable &1543R7XX_MAX_BACKENDS_MASK) >> 16)),1544(cc_rb_backend_disable >> 16));1545gb_tiling_config |= R600_BACKEND_MAP(backend_map);15461547RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config);1548RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));1549RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));1550if (gb_tiling_config & 0xc0) {1551dev_priv->r600_group_size = 512;1552} else {1553dev_priv->r600_group_size = 256;1554}1555dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);1556if (gb_tiling_config & 0x30) {1557dev_priv->r600_nbanks = 8;1558} else {1559dev_priv->r600_nbanks = 4;1560}15611562RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);1563RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);1564RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);15651566RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);1567RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);1568RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);1569RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);1570RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);15711572num_qd_pipes =1573R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);1574RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);1575RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);15761577/* set HW defaults for 3D engine */1578RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |1579R600_ROQ_IB2_START(0x2b)));15801581RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));15821583ta_aux_cntl = RADEON_READ(R600_TA_CNTL_AUX);1584RADEON_WRITE(R600_TA_CNTL_AUX, ta_aux_cntl | R600_DISABLE_CUBE_ANISO);15851586sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);1587sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;1588RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);15891590smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);1591smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);1592smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);1593RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);15941595if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV740)1596RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |1597R700_GS_FLUSH_CTL(4) |1598R700_ACK_FLUSH_CTL(3) |1599R700_SYNC_FLUSH_CTL));16001601db_debug3 = RADEON_READ(R700_DB_DEBUG3);1602db_debug3 &= ~R700_DB_CLK_OFF_DELAY(0x1f);1603switch (dev_priv->flags & RADEON_FAMILY_MASK) {1604case CHIP_RV770:1605case CHIP_RV740:1606db_debug3 |= R700_DB_CLK_OFF_DELAY(0x1f);1607break;1608case CHIP_RV710:1609case CHIP_RV730:1610default:1611db_debug3 |= R700_DB_CLK_OFF_DELAY(2);1612break;1613}1614RADEON_WRITE(R700_DB_DEBUG3, db_debug3);16151616if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV770) {1617db_debug4 = RADEON_READ(RV700_DB_DEBUG4);1618db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;1619RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);1620}16211622RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |1623R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |1624R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));16251626RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |1627R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |1628R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));16291630RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);16311632RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);16331634RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));16351636RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));16371638RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);16391640sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |1641R600_DONE_FIFO_HIWATER(0xe0) |1642R600_ALU_UPDATE_FIFO_HIWATER(0x8));1643switch (dev_priv->flags & RADEON_FAMILY_MASK) {1644case CHIP_RV770:1645case CHIP_RV730:1646case CHIP_RV710:1647sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);1648break;1649case CHIP_RV740:1650default:1651sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);1652break;1653}1654RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);16551656/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT1657* should be adjusted as needed by the 2D/3D drivers. This just sets default values1658*/1659sq_config = RADEON_READ(R600_SQ_CONFIG);1660sq_config &= ~(R600_PS_PRIO(3) |1661R600_VS_PRIO(3) |1662R600_GS_PRIO(3) |1663R600_ES_PRIO(3));1664sq_config |= (R600_DX9_CONSTS |1665R600_VC_ENABLE |1666R600_EXPORT_SRC_C |1667R600_PS_PRIO(0) |1668R600_VS_PRIO(1) |1669R600_GS_PRIO(2) |1670R600_ES_PRIO(3));1671if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)1672/* no vertex cache */1673sq_config &= ~R600_VC_ENABLE;16741675RADEON_WRITE(R600_SQ_CONFIG, sq_config);16761677RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |1678R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |1679R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));16801681RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |1682R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));16831684sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |1685R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |1686R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));1687if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)1688sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);1689else1690sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);1691RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);16921693RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |1694R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));16951696RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |1697R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));16981699sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |1700R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |1701R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |1702R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));17031704RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);1705RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);1706RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);1707RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);1708RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);1709RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);1710RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);1711RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);17121713RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |1714R700_FORCE_EOV_MAX_REZ_CNT(255)));17151716if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)1717RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |1718R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));1719else1720RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |1721R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));17221723switch (dev_priv->flags & RADEON_FAMILY_MASK) {1724case CHIP_RV770:1725case CHIP_RV730:1726case CHIP_RV740:1727gs_prim_buffer_depth = 384;1728break;1729case CHIP_RV710:1730gs_prim_buffer_depth = 128;1731break;1732default:1733break;1734}17351736num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;1737vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;1738/* Max value for this is 256 */1739if (vgt_gs_per_es > 256)1740vgt_gs_per_es = 256;17411742RADEON_WRITE(R600_VGT_ES_PER_GS, 128);1743RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);1744RADEON_WRITE(R600_VGT_GS_PER_VS, 2);17451746/* more default values. 2D/3D driver should adjust as needed */1747RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);1748RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);1749RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);1750RADEON_WRITE(R600_SX_MISC, 0);1751RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);1752RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);1753RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);1754RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);1755RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);1756RADEON_WRITE(R600_SPI_INPUT_Z, 0);1757RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));1758RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);17591760/* clear render buffer base addresses */1761RADEON_WRITE(R600_CB_COLOR0_BASE, 0);1762RADEON_WRITE(R600_CB_COLOR1_BASE, 0);1763RADEON_WRITE(R600_CB_COLOR2_BASE, 0);1764RADEON_WRITE(R600_CB_COLOR3_BASE, 0);1765RADEON_WRITE(R600_CB_COLOR4_BASE, 0);1766RADEON_WRITE(R600_CB_COLOR5_BASE, 0);1767RADEON_WRITE(R600_CB_COLOR6_BASE, 0);1768RADEON_WRITE(R600_CB_COLOR7_BASE, 0);17691770RADEON_WRITE(R700_TCP_CNTL, 0);17711772hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);1773RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);17741775RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);17761777RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |1778R600_NUM_CLIP_SEQ(3)));17791780}17811782static void r600_cp_init_ring_buffer(struct drm_device *dev,1783drm_radeon_private_t *dev_priv,1784struct drm_file *file_priv)1785{1786struct drm_radeon_master_private *master_priv;1787u32 ring_start;1788u64 rptr_addr;17891790if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))1791r700_gfx_init(dev, dev_priv);1792else1793r600_gfx_init(dev, dev_priv);17941795RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);1796RADEON_READ(R600_GRBM_SOFT_RESET);1797DRM_UDELAY(15000);1798RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);179918001801/* Set ring buffer size */1802#ifdef __BIG_ENDIAN1803RADEON_WRITE(R600_CP_RB_CNTL,1804RADEON_BUF_SWAP_32BIT |1805RADEON_RB_NO_UPDATE |1806(dev_priv->ring.rptr_update_l2qw << 8) |1807dev_priv->ring.size_l2qw);1808#else1809RADEON_WRITE(R600_CP_RB_CNTL,1810RADEON_RB_NO_UPDATE |1811(dev_priv->ring.rptr_update_l2qw << 8) |1812dev_priv->ring.size_l2qw);1813#endif18141815RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);18161817/* Set the write pointer delay */1818RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);18191820#ifdef __BIG_ENDIAN1821RADEON_WRITE(R600_CP_RB_CNTL,1822RADEON_BUF_SWAP_32BIT |1823RADEON_RB_NO_UPDATE |1824RADEON_RB_RPTR_WR_ENA |1825(dev_priv->ring.rptr_update_l2qw << 8) |1826dev_priv->ring.size_l2qw);1827#else1828RADEON_WRITE(R600_CP_RB_CNTL,1829RADEON_RB_NO_UPDATE |1830RADEON_RB_RPTR_WR_ENA |1831(dev_priv->ring.rptr_update_l2qw << 8) |1832dev_priv->ring.size_l2qw);1833#endif18341835/* Initialize the ring buffer's read and write pointers */1836RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);1837RADEON_WRITE(R600_CP_RB_WPTR, 0);1838SET_RING_HEAD(dev_priv, 0);1839dev_priv->ring.tail = 0;18401841#if __OS_HAS_AGP1842if (dev_priv->flags & RADEON_IS_AGP) {1843rptr_addr = dev_priv->ring_rptr->offset1844- dev->agp->base +1845dev_priv->gart_vm_start;1846} else1847#endif1848{1849rptr_addr = dev_priv->ring_rptr->offset1850- ((unsigned long) dev->sg->virtual)1851+ dev_priv->gart_vm_start;1852}1853RADEON_WRITE(R600_CP_RB_RPTR_ADDR,1854#ifdef __BIG_ENDIAN1855(2 << 0) |1856#endif1857(rptr_addr & 0xfffffffc));1858RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI,1859upper_32_bits(rptr_addr));18601861#ifdef __BIG_ENDIAN1862RADEON_WRITE(R600_CP_RB_CNTL,1863RADEON_BUF_SWAP_32BIT |1864(dev_priv->ring.rptr_update_l2qw << 8) |1865dev_priv->ring.size_l2qw);1866#else1867RADEON_WRITE(R600_CP_RB_CNTL,1868(dev_priv->ring.rptr_update_l2qw << 8) |1869dev_priv->ring.size_l2qw);1870#endif18711872#if __OS_HAS_AGP1873if (dev_priv->flags & RADEON_IS_AGP) {1874/* XXX */1875radeon_write_agp_base(dev_priv, dev->agp->base);18761877/* XXX */1878radeon_write_agp_location(dev_priv,1879(((dev_priv->gart_vm_start - 1 +1880dev_priv->gart_size) & 0xffff0000) |1881(dev_priv->gart_vm_start >> 16)));18821883ring_start = (dev_priv->cp_ring->offset1884- dev->agp->base1885+ dev_priv->gart_vm_start);1886} else1887#endif1888ring_start = (dev_priv->cp_ring->offset1889- (unsigned long)dev->sg->virtual1890+ dev_priv->gart_vm_start);18911892RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);18931894RADEON_WRITE(R600_CP_ME_CNTL, 0xff);18951896RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));18971898/* Initialize the scratch register pointer. This will cause1899* the scratch register values to be written out to memory1900* whenever they are updated.1901*1902* We simply put this behind the ring read pointer, this works1903* with PCI GART as well as (whatever kind of) AGP GART1904*/1905{1906u64 scratch_addr;19071908scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR) & 0xFFFFFFFC;1909scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;1910scratch_addr += R600_SCRATCH_REG_OFFSET;1911scratch_addr >>= 8;1912scratch_addr &= 0xffffffff;19131914RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);1915}19161917RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);19181919/* Turn on bus mastering */1920radeon_enable_bm(dev_priv);19211922radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0);1923RADEON_WRITE(R600_LAST_FRAME_REG, 0);19241925radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);1926RADEON_WRITE(R600_LAST_DISPATCH_REG, 0);19271928radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0);1929RADEON_WRITE(R600_LAST_CLEAR_REG, 0);19301931/* reset sarea copies of these */1932master_priv = file_priv->master->driver_priv;1933if (master_priv->sarea_priv) {1934master_priv->sarea_priv->last_frame = 0;1935master_priv->sarea_priv->last_dispatch = 0;1936master_priv->sarea_priv->last_clear = 0;1937}19381939r600_do_wait_for_idle(dev_priv);19401941}19421943int r600_do_cleanup_cp(struct drm_device *dev)1944{1945drm_radeon_private_t *dev_priv = dev->dev_private;1946DRM_DEBUG("\n");19471948/* Make sure interrupts are disabled here because the uninstall ioctl1949* may not have been called from userspace and after dev_private1950* is freed, it's too late.1951*/1952if (dev->irq_enabled)1953drm_irq_uninstall(dev);19541955#if __OS_HAS_AGP1956if (dev_priv->flags & RADEON_IS_AGP) {1957if (dev_priv->cp_ring != NULL) {1958drm_core_ioremapfree(dev_priv->cp_ring, dev);1959dev_priv->cp_ring = NULL;1960}1961if (dev_priv->ring_rptr != NULL) {1962drm_core_ioremapfree(dev_priv->ring_rptr, dev);1963dev_priv->ring_rptr = NULL;1964}1965if (dev->agp_buffer_map != NULL) {1966drm_core_ioremapfree(dev->agp_buffer_map, dev);1967dev->agp_buffer_map = NULL;1968}1969} else1970#endif1971{19721973if (dev_priv->gart_info.bus_addr)1974r600_page_table_cleanup(dev, &dev_priv->gart_info);19751976if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {1977drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);1978dev_priv->gart_info.addr = NULL;1979}1980}1981/* only clear to the start of flags */1982memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));19831984return 0;1985}19861987int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,1988struct drm_file *file_priv)1989{1990drm_radeon_private_t *dev_priv = dev->dev_private;1991struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;19921993DRM_DEBUG("\n");19941995mutex_init(&dev_priv->cs_mutex);1996r600_cs_legacy_init();1997/* if we require new memory map but we don't have it fail */1998if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {1999DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");2000r600_do_cleanup_cp(dev);2001return -EINVAL;2002}20032004if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {2005DRM_DEBUG("Forcing AGP card to PCI mode\n");2006dev_priv->flags &= ~RADEON_IS_AGP;2007/* The writeback test succeeds, but when writeback is enabled,2008* the ring buffer read ptr update fails after first 128 bytes.2009*/2010radeon_no_wb = 1;2011} else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))2012&& !init->is_pci) {2013DRM_DEBUG("Restoring AGP flag\n");2014dev_priv->flags |= RADEON_IS_AGP;2015}20162017dev_priv->usec_timeout = init->usec_timeout;2018if (dev_priv->usec_timeout < 1 ||2019dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {2020DRM_DEBUG("TIMEOUT problem!\n");2021r600_do_cleanup_cp(dev);2022return -EINVAL;2023}20242025/* Enable vblank on CRTC1 for older X servers2026*/2027dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;2028dev_priv->do_boxes = 0;2029dev_priv->cp_mode = init->cp_mode;20302031/* We don't support anything other than bus-mastering ring mode,2032* but the ring can be in either AGP or PCI space for the ring2033* read pointer.2034*/2035if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&2036(init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {2037DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);2038r600_do_cleanup_cp(dev);2039return -EINVAL;2040}20412042switch (init->fb_bpp) {2043case 16:2044dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;2045break;2046case 32:2047default:2048dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;2049break;2050}2051dev_priv->front_offset = init->front_offset;2052dev_priv->front_pitch = init->front_pitch;2053dev_priv->back_offset = init->back_offset;2054dev_priv->back_pitch = init->back_pitch;20552056dev_priv->ring_offset = init->ring_offset;2057dev_priv->ring_rptr_offset = init->ring_rptr_offset;2058dev_priv->buffers_offset = init->buffers_offset;2059dev_priv->gart_textures_offset = init->gart_textures_offset;20602061master_priv->sarea = drm_getsarea(dev);2062if (!master_priv->sarea) {2063DRM_ERROR("could not find sarea!\n");2064r600_do_cleanup_cp(dev);2065return -EINVAL;2066}20672068dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);2069if (!dev_priv->cp_ring) {2070DRM_ERROR("could not find cp ring region!\n");2071r600_do_cleanup_cp(dev);2072return -EINVAL;2073}2074dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);2075if (!dev_priv->ring_rptr) {2076DRM_ERROR("could not find ring read pointer!\n");2077r600_do_cleanup_cp(dev);2078return -EINVAL;2079}2080dev->agp_buffer_token = init->buffers_offset;2081dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);2082if (!dev->agp_buffer_map) {2083DRM_ERROR("could not find dma buffer region!\n");2084r600_do_cleanup_cp(dev);2085return -EINVAL;2086}20872088if (init->gart_textures_offset) {2089dev_priv->gart_textures =2090drm_core_findmap(dev, init->gart_textures_offset);2091if (!dev_priv->gart_textures) {2092DRM_ERROR("could not find GART texture region!\n");2093r600_do_cleanup_cp(dev);2094return -EINVAL;2095}2096}20972098#if __OS_HAS_AGP2099/* XXX */2100if (dev_priv->flags & RADEON_IS_AGP) {2101drm_core_ioremap_wc(dev_priv->cp_ring, dev);2102drm_core_ioremap_wc(dev_priv->ring_rptr, dev);2103drm_core_ioremap_wc(dev->agp_buffer_map, dev);2104if (!dev_priv->cp_ring->handle ||2105!dev_priv->ring_rptr->handle ||2106!dev->agp_buffer_map->handle) {2107DRM_ERROR("could not find ioremap agp regions!\n");2108r600_do_cleanup_cp(dev);2109return -EINVAL;2110}2111} else2112#endif2113{2114dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;2115dev_priv->ring_rptr->handle =2116(void *)(unsigned long)dev_priv->ring_rptr->offset;2117dev->agp_buffer_map->handle =2118(void *)(unsigned long)dev->agp_buffer_map->offset;21192120DRM_DEBUG("dev_priv->cp_ring->handle %p\n",2121dev_priv->cp_ring->handle);2122DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",2123dev_priv->ring_rptr->handle);2124DRM_DEBUG("dev->agp_buffer_map->handle %p\n",2125dev->agp_buffer_map->handle);2126}21272128dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;2129dev_priv->fb_size =2130(((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)2131- dev_priv->fb_location;21322133dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |2134((dev_priv->front_offset2135+ dev_priv->fb_location) >> 10));21362137dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |2138((dev_priv->back_offset2139+ dev_priv->fb_location) >> 10));21402141dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |2142((dev_priv->depth_offset2143+ dev_priv->fb_location) >> 10));21442145dev_priv->gart_size = init->gart_size;21462147/* New let's set the memory map ... */2148if (dev_priv->new_memmap) {2149u32 base = 0;21502151DRM_INFO("Setting GART location based on new memory map\n");21522153/* If using AGP, try to locate the AGP aperture at the same2154* location in the card and on the bus, though we have to2155* align it down.2156*/2157#if __OS_HAS_AGP2158/* XXX */2159if (dev_priv->flags & RADEON_IS_AGP) {2160base = dev->agp->base;2161/* Check if valid */2162if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&2163base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {2164DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",2165dev->agp->base);2166base = 0;2167}2168}2169#endif2170/* If not or if AGP is at 0 (Macs), try to put it elsewhere */2171if (base == 0) {2172base = dev_priv->fb_location + dev_priv->fb_size;2173if (base < dev_priv->fb_location ||2174((base + dev_priv->gart_size) & 0xfffffffful) < base)2175base = dev_priv->fb_location2176- dev_priv->gart_size;2177}2178dev_priv->gart_vm_start = base & 0xffc00000u;2179if (dev_priv->gart_vm_start != base)2180DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",2181base, dev_priv->gart_vm_start);2182}21832184#if __OS_HAS_AGP2185/* XXX */2186if (dev_priv->flags & RADEON_IS_AGP)2187dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset2188- dev->agp->base2189+ dev_priv->gart_vm_start);2190else2191#endif2192dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset2193- (unsigned long)dev->sg->virtual2194+ dev_priv->gart_vm_start);21952196DRM_DEBUG("fb 0x%08x size %d\n",2197(unsigned int) dev_priv->fb_location,2198(unsigned int) dev_priv->fb_size);2199DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);2200DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",2201(unsigned int) dev_priv->gart_vm_start);2202DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",2203dev_priv->gart_buffers_offset);22042205dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;2206dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle2207+ init->ring_size / sizeof(u32));2208dev_priv->ring.size = init->ring_size;2209dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);22102211dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;2212dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8);22132214dev_priv->ring.fetch_size = /* init->fetch_size */ 32;2215dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16);22162217dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;22182219dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;22202221#if __OS_HAS_AGP2222if (dev_priv->flags & RADEON_IS_AGP) {2223/* XXX turn off pcie gart */2224} else2225#endif2226{2227dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);2228/* if we have an offset set from userspace */2229if (!dev_priv->pcigart_offset_set) {2230DRM_ERROR("Need gart offset from userspace\n");2231r600_do_cleanup_cp(dev);2232return -EINVAL;2233}22342235DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);22362237dev_priv->gart_info.bus_addr =2238dev_priv->pcigart_offset + dev_priv->fb_location;2239dev_priv->gart_info.mapping.offset =2240dev_priv->pcigart_offset + dev_priv->fb_aper_offset;2241dev_priv->gart_info.mapping.size =2242dev_priv->gart_info.table_size;22432244drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);2245if (!dev_priv->gart_info.mapping.handle) {2246DRM_ERROR("ioremap failed.\n");2247r600_do_cleanup_cp(dev);2248return -EINVAL;2249}22502251dev_priv->gart_info.addr =2252dev_priv->gart_info.mapping.handle;22532254DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",2255dev_priv->gart_info.addr,2256dev_priv->pcigart_offset);22572258if (!r600_page_table_init(dev)) {2259DRM_ERROR("Failed to init GART table\n");2260r600_do_cleanup_cp(dev);2261return -EINVAL;2262}22632264if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))2265r700_vm_init(dev);2266else2267r600_vm_init(dev);2268}22692270if (!dev_priv->me_fw || !dev_priv->pfp_fw) {2271int err = r600_cp_init_microcode(dev_priv);2272if (err) {2273DRM_ERROR("Failed to load firmware!\n");2274r600_do_cleanup_cp(dev);2275return err;2276}2277}2278if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))2279r700_cp_load_microcode(dev_priv);2280else2281r600_cp_load_microcode(dev_priv);22822283r600_cp_init_ring_buffer(dev, dev_priv, file_priv);22842285dev_priv->last_buf = 0;22862287r600_do_engine_reset(dev);2288r600_test_writeback(dev_priv);22892290return 0;2291}22922293int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)2294{2295drm_radeon_private_t *dev_priv = dev->dev_private;22962297DRM_DEBUG("\n");2298if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {2299r700_vm_init(dev);2300r700_cp_load_microcode(dev_priv);2301} else {2302r600_vm_init(dev);2303r600_cp_load_microcode(dev_priv);2304}2305r600_cp_init_ring_buffer(dev, dev_priv, file_priv);2306r600_do_engine_reset(dev);23072308return 0;2309}23102311/* Wait for the CP to go idle.2312*/2313int r600_do_cp_idle(drm_radeon_private_t *dev_priv)2314{2315RING_LOCALS;2316DRM_DEBUG("\n");23172318BEGIN_RING(5);2319OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));2320OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);2321/* wait for 3D idle clean */2322OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));2323OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);2324OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);23252326ADVANCE_RING();2327COMMIT_RING();23282329return r600_do_wait_for_idle(dev_priv);2330}23312332/* Start the Command Processor.2333*/2334void r600_do_cp_start(drm_radeon_private_t *dev_priv)2335{2336u32 cp_me;2337RING_LOCALS;2338DRM_DEBUG("\n");23392340BEGIN_RING(7);2341OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));2342OUT_RING(0x00000001);2343if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770))2344OUT_RING(0x00000003);2345else2346OUT_RING(0x00000000);2347OUT_RING((dev_priv->r600_max_hw_contexts - 1));2348OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));2349OUT_RING(0x00000000);2350OUT_RING(0x00000000);2351ADVANCE_RING();2352COMMIT_RING();23532354/* set the mux and reset the halt bit */2355cp_me = 0xff;2356RADEON_WRITE(R600_CP_ME_CNTL, cp_me);23572358dev_priv->cp_running = 1;23592360}23612362void r600_do_cp_reset(drm_radeon_private_t *dev_priv)2363{2364u32 cur_read_ptr;2365DRM_DEBUG("\n");23662367cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);2368RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);2369SET_RING_HEAD(dev_priv, cur_read_ptr);2370dev_priv->ring.tail = cur_read_ptr;2371}23722373void r600_do_cp_stop(drm_radeon_private_t *dev_priv)2374{2375uint32_t cp_me;23762377DRM_DEBUG("\n");23782379cp_me = 0xff | R600_CP_ME_HALT;23802381RADEON_WRITE(R600_CP_ME_CNTL, cp_me);23822383dev_priv->cp_running = 0;2384}23852386int r600_cp_dispatch_indirect(struct drm_device *dev,2387struct drm_buf *buf, int start, int end)2388{2389drm_radeon_private_t *dev_priv = dev->dev_private;2390RING_LOCALS;23912392if (start != end) {2393unsigned long offset = (dev_priv->gart_buffers_offset2394+ buf->offset + start);2395int dwords = (end - start + 3) / sizeof(u32);23962397DRM_DEBUG("dwords:%d\n", dwords);2398DRM_DEBUG("offset 0x%lx\n", offset);239924002401/* Indirect buffer data must be a multiple of 16 dwords.2402* pad the data with a Type-2 CP packet.2403*/2404while (dwords & 0xf) {2405u32 *data = (u32 *)2406((char *)dev->agp_buffer_map->handle2407+ buf->offset + start);2408data[dwords++] = RADEON_CP_PACKET2;2409}24102411/* Fire off the indirect buffer */2412BEGIN_RING(4);2413OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));2414OUT_RING((offset & 0xfffffffc));2415OUT_RING((upper_32_bits(offset) & 0xff));2416OUT_RING(dwords);2417ADVANCE_RING();2418}24192420return 0;2421}24222423void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)2424{2425drm_radeon_private_t *dev_priv = dev->dev_private;2426struct drm_master *master = file_priv->master;2427struct drm_radeon_master_private *master_priv = master->driver_priv;2428drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;2429int nbox = sarea_priv->nbox;2430struct drm_clip_rect *pbox = sarea_priv->boxes;2431int i, cpp, src_pitch, dst_pitch;2432uint64_t src, dst;2433RING_LOCALS;2434DRM_DEBUG("\n");24352436if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)2437cpp = 4;2438else2439cpp = 2;24402441if (sarea_priv->pfCurrentPage == 0) {2442src_pitch = dev_priv->back_pitch;2443dst_pitch = dev_priv->front_pitch;2444src = dev_priv->back_offset + dev_priv->fb_location;2445dst = dev_priv->front_offset + dev_priv->fb_location;2446} else {2447src_pitch = dev_priv->front_pitch;2448dst_pitch = dev_priv->back_pitch;2449src = dev_priv->front_offset + dev_priv->fb_location;2450dst = dev_priv->back_offset + dev_priv->fb_location;2451}24522453if (r600_prepare_blit_copy(dev, file_priv)) {2454DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");2455return;2456}2457for (i = 0; i < nbox; i++) {2458int x = pbox[i].x1;2459int y = pbox[i].y1;2460int w = pbox[i].x2 - x;2461int h = pbox[i].y2 - y;24622463DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);24642465r600_blit_swap(dev,2466src, dst,2467x, y, x, y, w, h,2468src_pitch, dst_pitch, cpp);2469}2470r600_done_blit_copy(dev);24712472/* Increment the frame counter. The client-side 3D driver must2473* throttle the framerate by waiting for this value before2474* performing the swapbuffer ioctl.2475*/2476sarea_priv->last_frame++;24772478BEGIN_RING(3);2479R600_FRAME_AGE(sarea_priv->last_frame);2480ADVANCE_RING();2481}24822483int r600_cp_dispatch_texture(struct drm_device *dev,2484struct drm_file *file_priv,2485drm_radeon_texture_t *tex,2486drm_radeon_tex_image_t *image)2487{2488drm_radeon_private_t *dev_priv = dev->dev_private;2489struct drm_buf *buf;2490u32 *buffer;2491const u8 __user *data;2492int size, pass_size;2493u64 src_offset, dst_offset;24942495if (!radeon_check_offset(dev_priv, tex->offset)) {2496DRM_ERROR("Invalid destination offset\n");2497return -EINVAL;2498}24992500/* this might fail for zero-sized uploads - are those illegal? */2501if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {2502DRM_ERROR("Invalid final destination offset\n");2503return -EINVAL;2504}25052506size = tex->height * tex->pitch;25072508if (size == 0)2509return 0;25102511dst_offset = tex->offset;25122513if (r600_prepare_blit_copy(dev, file_priv)) {2514DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");2515return -EAGAIN;2516}2517do {2518data = (const u8 __user *)image->data;2519pass_size = size;25202521buf = radeon_freelist_get(dev);2522if (!buf) {2523DRM_DEBUG("EAGAIN\n");2524if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))2525return -EFAULT;2526return -EAGAIN;2527}25282529if (pass_size > buf->total)2530pass_size = buf->total;25312532/* Dispatch the indirect buffer.2533*/2534buffer =2535(u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);25362537if (DRM_COPY_FROM_USER(buffer, data, pass_size)) {2538DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);2539return -EFAULT;2540}25412542buf->file_priv = file_priv;2543buf->used = pass_size;2544src_offset = dev_priv->gart_buffers_offset + buf->offset;25452546r600_blit_copy(dev, src_offset, dst_offset, pass_size);25472548radeon_cp_discard_buffer(dev, file_priv->master, buf);25492550/* Update the input parameters for next time */2551image->data = (const u8 __user *)image->data + pass_size;2552dst_offset += pass_size;2553size -= pass_size;2554} while (size > 0);2555r600_done_blit_copy(dev);25562557return 0;2558}25592560/*2561* Legacy cs ioctl2562*/2563static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)2564{2565/* FIXME: check if wrap affect last reported wrap & sequence */2566radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;2567if (!radeon->cs_id_scnt) {2568/* increment wrap counter */2569radeon->cs_id_wcnt += 0x01000000;2570/* valid sequence counter start at 1 */2571radeon->cs_id_scnt = 1;2572}2573return (radeon->cs_id_scnt | radeon->cs_id_wcnt);2574}25752576static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)2577{2578RING_LOCALS;25792580*id = radeon_cs_id_get(dev_priv);25812582/* SCRATCH 2 */2583BEGIN_RING(3);2584R600_CLEAR_AGE(*id);2585ADVANCE_RING();2586COMMIT_RING();2587}25882589static int r600_ib_get(struct drm_device *dev,2590struct drm_file *fpriv,2591struct drm_buf **buffer)2592{2593struct drm_buf *buf;25942595*buffer = NULL;2596buf = radeon_freelist_get(dev);2597if (!buf) {2598return -EBUSY;2599}2600buf->file_priv = fpriv;2601*buffer = buf;2602return 0;2603}26042605static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,2606struct drm_file *fpriv, int l, int r)2607{2608drm_radeon_private_t *dev_priv = dev->dev_private;26092610if (buf) {2611if (!r)2612r600_cp_dispatch_indirect(dev, buf, 0, l * 4);2613radeon_cp_discard_buffer(dev, fpriv->master, buf);2614COMMIT_RING();2615}2616}26172618int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)2619{2620struct drm_radeon_private *dev_priv = dev->dev_private;2621struct drm_radeon_cs *cs = data;2622struct drm_buf *buf;2623unsigned family;2624int l, r = 0;2625u32 *ib, cs_id = 0;26262627if (dev_priv == NULL) {2628DRM_ERROR("called with no initialization\n");2629return -EINVAL;2630}2631family = dev_priv->flags & RADEON_FAMILY_MASK;2632if (family < CHIP_R600) {2633DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");2634return -EINVAL;2635}2636mutex_lock(&dev_priv->cs_mutex);2637/* get ib */2638r = r600_ib_get(dev, fpriv, &buf);2639if (r) {2640DRM_ERROR("ib_get failed\n");2641goto out;2642}2643ib = dev->agp_buffer_map->handle + buf->offset;2644/* now parse command stream */2645r = r600_cs_legacy(dev, data, fpriv, family, ib, &l);2646if (r) {2647goto out;2648}26492650out:2651r600_ib_free(dev, buf, fpriv, l, r);2652/* emit cs id sequence */2653r600_cs_id_emit(dev_priv, &cs_id);2654cs->cs_id = cs_id;2655mutex_unlock(&dev_priv->cs_mutex);2656return r;2657}26582659void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size)2660{2661struct drm_radeon_private *dev_priv = dev->dev_private;26622663*npipes = dev_priv->r600_npipes;2664*nbanks = dev_priv->r600_nbanks;2665*group_size = dev_priv->r600_group_size;2666}266726682669