Path: blob/master/drivers/gpu/drm/radeon/evergreen_blit_kms.c
15113 views
/*1* Copyright 2010 Advanced Micro Devices, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22* Authors:23* Alex Deucher <[email protected]>24*/2526#include "drmP.h"27#include "drm.h"28#include "radeon_drm.h"29#include "radeon.h"3031#include "evergreend.h"32#include "evergreen_blit_shaders.h"33#include "cayman_blit_shaders.h"3435#define DI_PT_RECTLIST 0x1136#define DI_INDEX_SIZE_16_BIT 0x037#define DI_SRC_SEL_AUTO_INDEX 0x23839#define FMT_8 0x140#define FMT_5_6_5 0x841#define FMT_8_8_8_8 0x1a42#define COLOR_8 0x143#define COLOR_5_6_5 0x844#define COLOR_8_8_8_8 0x1a4546/* emits 17 */47static void48set_render_target(struct radeon_device *rdev, int format,49int w, int h, u64 gpu_addr)50{51u32 cb_color_info;52int pitch, slice;5354h = ALIGN(h, 8);55if (h < 8)56h = 8;5758cb_color_info = ((format << 2) | (1 << 24) | (1 << 8));59pitch = (w / 8) - 1;60slice = ((w * h) / 64) - 1;6162radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 15));63radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2);64radeon_ring_write(rdev, gpu_addr >> 8);65radeon_ring_write(rdev, pitch);66radeon_ring_write(rdev, slice);67radeon_ring_write(rdev, 0);68radeon_ring_write(rdev, cb_color_info);69radeon_ring_write(rdev, (1 << 4));70radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));71radeon_ring_write(rdev, 0);72radeon_ring_write(rdev, 0);73radeon_ring_write(rdev, 0);74radeon_ring_write(rdev, 0);75radeon_ring_write(rdev, 0);76radeon_ring_write(rdev, 0);77radeon_ring_write(rdev, 0);78radeon_ring_write(rdev, 0);79}8081/* emits 5dw */82static void83cp_set_surface_sync(struct radeon_device *rdev,84u32 sync_type, u32 size,85u64 mc_addr)86{87u32 cp_coher_size;8889if (size == 0xffffffff)90cp_coher_size = 0xffffffff;91else92cp_coher_size = ((size + 255) >> 8);9394radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));95radeon_ring_write(rdev, sync_type);96radeon_ring_write(rdev, cp_coher_size);97radeon_ring_write(rdev, mc_addr >> 8);98radeon_ring_write(rdev, 10); /* poll interval */99}100101/* emits 11dw + 1 surface sync = 16dw */102static void103set_shaders(struct radeon_device *rdev)104{105u64 gpu_addr;106107/* VS */108gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;109radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 3));110radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2);111radeon_ring_write(rdev, gpu_addr >> 8);112radeon_ring_write(rdev, 2);113radeon_ring_write(rdev, 0);114115/* PS */116gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;117radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 4));118radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2);119radeon_ring_write(rdev, gpu_addr >> 8);120radeon_ring_write(rdev, 1);121radeon_ring_write(rdev, 0);122radeon_ring_write(rdev, 2);123124gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;125cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);126}127128/* emits 10 + 1 sync (5) = 15 */129static void130set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)131{132u32 sq_vtx_constant_word2, sq_vtx_constant_word3;133134/* high addr, stride */135sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));136#ifdef __BIG_ENDIAN137sq_vtx_constant_word2 |= (2 << 30);138#endif139/* xyzw swizzles */140sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);141142radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));143radeon_ring_write(rdev, 0x580);144radeon_ring_write(rdev, gpu_addr & 0xffffffff);145radeon_ring_write(rdev, 48 - 1); /* size */146radeon_ring_write(rdev, sq_vtx_constant_word2);147radeon_ring_write(rdev, sq_vtx_constant_word3);148radeon_ring_write(rdev, 0);149radeon_ring_write(rdev, 0);150radeon_ring_write(rdev, 0);151radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);152153if ((rdev->family == CHIP_CEDAR) ||154(rdev->family == CHIP_PALM) ||155(rdev->family == CHIP_SUMO) ||156(rdev->family == CHIP_SUMO2) ||157(rdev->family == CHIP_CAICOS))158cp_set_surface_sync(rdev,159PACKET3_TC_ACTION_ENA, 48, gpu_addr);160else161cp_set_surface_sync(rdev,162PACKET3_VC_ACTION_ENA, 48, gpu_addr);163164}165166/* emits 10 */167static void168set_tex_resource(struct radeon_device *rdev,169int format, int w, int h, int pitch,170u64 gpu_addr)171{172u32 sq_tex_resource_word0, sq_tex_resource_word1;173u32 sq_tex_resource_word4, sq_tex_resource_word7;174175if (h < 1)176h = 1;177178sq_tex_resource_word0 = (1 << 0); /* 2D */179sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |180((w - 1) << 18));181sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28);182/* xyzw swizzles */183sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);184185sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30);186187radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));188radeon_ring_write(rdev, 0);189radeon_ring_write(rdev, sq_tex_resource_word0);190radeon_ring_write(rdev, sq_tex_resource_word1);191radeon_ring_write(rdev, gpu_addr >> 8);192radeon_ring_write(rdev, gpu_addr >> 8);193radeon_ring_write(rdev, sq_tex_resource_word4);194radeon_ring_write(rdev, 0);195radeon_ring_write(rdev, 0);196radeon_ring_write(rdev, sq_tex_resource_word7);197}198199/* emits 12 */200static void201set_scissors(struct radeon_device *rdev, int x1, int y1,202int x2, int y2)203{204/* workaround some hw bugs */205if (x2 == 0)206x1 = 1;207if (y2 == 0)208y1 = 1;209if (rdev->family == CHIP_CAYMAN) {210if ((x2 == 1) && (y2 == 1))211x2 = 2;212}213214radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));215radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);216radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));217radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));218219radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));220radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);221radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));222radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));223224radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));225radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);226radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));227radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));228}229230/* emits 10 */231static void232draw_auto(struct radeon_device *rdev)233{234radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));235radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2);236radeon_ring_write(rdev, DI_PT_RECTLIST);237238radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));239radeon_ring_write(rdev,240#ifdef __BIG_ENDIAN241(2 << 2) |242#endif243DI_INDEX_SIZE_16_BIT);244245radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));246radeon_ring_write(rdev, 1);247248radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));249radeon_ring_write(rdev, 3);250radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);251252}253254/* emits 39 */255static void256set_default_state(struct radeon_device *rdev)257{258u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;259u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;260u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;261int num_ps_gprs, num_vs_gprs, num_temp_gprs;262int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs;263int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;264int num_hs_threads, num_ls_threads;265int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;266int num_hs_stack_entries, num_ls_stack_entries;267u64 gpu_addr;268int dwords;269270/* set clear context state */271radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));272radeon_ring_write(rdev, 0);273274if (rdev->family < CHIP_CAYMAN) {275switch (rdev->family) {276case CHIP_CEDAR:277default:278num_ps_gprs = 93;279num_vs_gprs = 46;280num_temp_gprs = 4;281num_gs_gprs = 31;282num_es_gprs = 31;283num_hs_gprs = 23;284num_ls_gprs = 23;285num_ps_threads = 96;286num_vs_threads = 16;287num_gs_threads = 16;288num_es_threads = 16;289num_hs_threads = 16;290num_ls_threads = 16;291num_ps_stack_entries = 42;292num_vs_stack_entries = 42;293num_gs_stack_entries = 42;294num_es_stack_entries = 42;295num_hs_stack_entries = 42;296num_ls_stack_entries = 42;297break;298case CHIP_REDWOOD:299num_ps_gprs = 93;300num_vs_gprs = 46;301num_temp_gprs = 4;302num_gs_gprs = 31;303num_es_gprs = 31;304num_hs_gprs = 23;305num_ls_gprs = 23;306num_ps_threads = 128;307num_vs_threads = 20;308num_gs_threads = 20;309num_es_threads = 20;310num_hs_threads = 20;311num_ls_threads = 20;312num_ps_stack_entries = 42;313num_vs_stack_entries = 42;314num_gs_stack_entries = 42;315num_es_stack_entries = 42;316num_hs_stack_entries = 42;317num_ls_stack_entries = 42;318break;319case CHIP_JUNIPER:320num_ps_gprs = 93;321num_vs_gprs = 46;322num_temp_gprs = 4;323num_gs_gprs = 31;324num_es_gprs = 31;325num_hs_gprs = 23;326num_ls_gprs = 23;327num_ps_threads = 128;328num_vs_threads = 20;329num_gs_threads = 20;330num_es_threads = 20;331num_hs_threads = 20;332num_ls_threads = 20;333num_ps_stack_entries = 85;334num_vs_stack_entries = 85;335num_gs_stack_entries = 85;336num_es_stack_entries = 85;337num_hs_stack_entries = 85;338num_ls_stack_entries = 85;339break;340case CHIP_CYPRESS:341case CHIP_HEMLOCK:342num_ps_gprs = 93;343num_vs_gprs = 46;344num_temp_gprs = 4;345num_gs_gprs = 31;346num_es_gprs = 31;347num_hs_gprs = 23;348num_ls_gprs = 23;349num_ps_threads = 128;350num_vs_threads = 20;351num_gs_threads = 20;352num_es_threads = 20;353num_hs_threads = 20;354num_ls_threads = 20;355num_ps_stack_entries = 85;356num_vs_stack_entries = 85;357num_gs_stack_entries = 85;358num_es_stack_entries = 85;359num_hs_stack_entries = 85;360num_ls_stack_entries = 85;361break;362case CHIP_PALM:363num_ps_gprs = 93;364num_vs_gprs = 46;365num_temp_gprs = 4;366num_gs_gprs = 31;367num_es_gprs = 31;368num_hs_gprs = 23;369num_ls_gprs = 23;370num_ps_threads = 96;371num_vs_threads = 16;372num_gs_threads = 16;373num_es_threads = 16;374num_hs_threads = 16;375num_ls_threads = 16;376num_ps_stack_entries = 42;377num_vs_stack_entries = 42;378num_gs_stack_entries = 42;379num_es_stack_entries = 42;380num_hs_stack_entries = 42;381num_ls_stack_entries = 42;382break;383case CHIP_SUMO:384num_ps_gprs = 93;385num_vs_gprs = 46;386num_temp_gprs = 4;387num_gs_gprs = 31;388num_es_gprs = 31;389num_hs_gprs = 23;390num_ls_gprs = 23;391num_ps_threads = 96;392num_vs_threads = 25;393num_gs_threads = 25;394num_es_threads = 25;395num_hs_threads = 25;396num_ls_threads = 25;397num_ps_stack_entries = 42;398num_vs_stack_entries = 42;399num_gs_stack_entries = 42;400num_es_stack_entries = 42;401num_hs_stack_entries = 42;402num_ls_stack_entries = 42;403break;404case CHIP_SUMO2:405num_ps_gprs = 93;406num_vs_gprs = 46;407num_temp_gprs = 4;408num_gs_gprs = 31;409num_es_gprs = 31;410num_hs_gprs = 23;411num_ls_gprs = 23;412num_ps_threads = 96;413num_vs_threads = 25;414num_gs_threads = 25;415num_es_threads = 25;416num_hs_threads = 25;417num_ls_threads = 25;418num_ps_stack_entries = 85;419num_vs_stack_entries = 85;420num_gs_stack_entries = 85;421num_es_stack_entries = 85;422num_hs_stack_entries = 85;423num_ls_stack_entries = 85;424break;425case CHIP_BARTS:426num_ps_gprs = 93;427num_vs_gprs = 46;428num_temp_gprs = 4;429num_gs_gprs = 31;430num_es_gprs = 31;431num_hs_gprs = 23;432num_ls_gprs = 23;433num_ps_threads = 128;434num_vs_threads = 20;435num_gs_threads = 20;436num_es_threads = 20;437num_hs_threads = 20;438num_ls_threads = 20;439num_ps_stack_entries = 85;440num_vs_stack_entries = 85;441num_gs_stack_entries = 85;442num_es_stack_entries = 85;443num_hs_stack_entries = 85;444num_ls_stack_entries = 85;445break;446case CHIP_TURKS:447num_ps_gprs = 93;448num_vs_gprs = 46;449num_temp_gprs = 4;450num_gs_gprs = 31;451num_es_gprs = 31;452num_hs_gprs = 23;453num_ls_gprs = 23;454num_ps_threads = 128;455num_vs_threads = 20;456num_gs_threads = 20;457num_es_threads = 20;458num_hs_threads = 20;459num_ls_threads = 20;460num_ps_stack_entries = 42;461num_vs_stack_entries = 42;462num_gs_stack_entries = 42;463num_es_stack_entries = 42;464num_hs_stack_entries = 42;465num_ls_stack_entries = 42;466break;467case CHIP_CAICOS:468num_ps_gprs = 93;469num_vs_gprs = 46;470num_temp_gprs = 4;471num_gs_gprs = 31;472num_es_gprs = 31;473num_hs_gprs = 23;474num_ls_gprs = 23;475num_ps_threads = 128;476num_vs_threads = 10;477num_gs_threads = 10;478num_es_threads = 10;479num_hs_threads = 10;480num_ls_threads = 10;481num_ps_stack_entries = 42;482num_vs_stack_entries = 42;483num_gs_stack_entries = 42;484num_es_stack_entries = 42;485num_hs_stack_entries = 42;486num_ls_stack_entries = 42;487break;488}489490if ((rdev->family == CHIP_CEDAR) ||491(rdev->family == CHIP_PALM) ||492(rdev->family == CHIP_SUMO) ||493(rdev->family == CHIP_SUMO2) ||494(rdev->family == CHIP_CAICOS))495sq_config = 0;496else497sq_config = VC_ENABLE;498499sq_config |= (EXPORT_SRC_C |500CS_PRIO(0) |501LS_PRIO(0) |502HS_PRIO(0) |503PS_PRIO(0) |504VS_PRIO(1) |505GS_PRIO(2) |506ES_PRIO(3));507508sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |509NUM_VS_GPRS(num_vs_gprs) |510NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));511sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |512NUM_ES_GPRS(num_es_gprs));513sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |514NUM_LS_GPRS(num_ls_gprs));515sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |516NUM_VS_THREADS(num_vs_threads) |517NUM_GS_THREADS(num_gs_threads) |518NUM_ES_THREADS(num_es_threads));519sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |520NUM_LS_THREADS(num_ls_threads));521sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |522NUM_VS_STACK_ENTRIES(num_vs_stack_entries));523sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |524NUM_ES_STACK_ENTRIES(num_es_stack_entries));525sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |526NUM_LS_STACK_ENTRIES(num_ls_stack_entries));527528/* disable dyn gprs */529radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));530radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);531radeon_ring_write(rdev, 0);532533/* setup LDS */534radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));535radeon_ring_write(rdev, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2);536radeon_ring_write(rdev, 0x10001000);537538/* SQ config */539radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));540radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);541radeon_ring_write(rdev, sq_config);542radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);543radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);544radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);545radeon_ring_write(rdev, 0);546radeon_ring_write(rdev, 0);547radeon_ring_write(rdev, sq_thread_resource_mgmt);548radeon_ring_write(rdev, sq_thread_resource_mgmt_2);549radeon_ring_write(rdev, sq_stack_resource_mgmt_1);550radeon_ring_write(rdev, sq_stack_resource_mgmt_2);551radeon_ring_write(rdev, sq_stack_resource_mgmt_3);552}553554/* CONTEXT_CONTROL */555radeon_ring_write(rdev, 0xc0012800);556radeon_ring_write(rdev, 0x80000000);557radeon_ring_write(rdev, 0x80000000);558559/* SQ_VTX_BASE_VTX_LOC */560radeon_ring_write(rdev, 0xc0026f00);561radeon_ring_write(rdev, 0x00000000);562radeon_ring_write(rdev, 0x00000000);563radeon_ring_write(rdev, 0x00000000);564565/* SET_SAMPLER */566radeon_ring_write(rdev, 0xc0036e00);567radeon_ring_write(rdev, 0x00000000);568radeon_ring_write(rdev, 0x00000012);569radeon_ring_write(rdev, 0x00000000);570radeon_ring_write(rdev, 0x00000000);571572/* set to DX10/11 mode */573radeon_ring_write(rdev, PACKET3(PACKET3_MODE_CONTROL, 0));574radeon_ring_write(rdev, 1);575576/* emit an IB pointing at default state */577dwords = ALIGN(rdev->r600_blit.state_len, 0x10);578gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;579radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));580radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);581radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);582radeon_ring_write(rdev, dwords);583584}585586static inline uint32_t i2f(uint32_t input)587{588u32 result, i, exponent, fraction;589590if ((input & 0x3fff) == 0)591result = 0; /* 0 is a special case */592else {593exponent = 140; /* exponent biased by 127; */594fraction = (input & 0x3fff) << 10; /* cheat and only595handle numbers below 2^^15 */596for (i = 0; i < 14; i++) {597if (fraction & 0x800000)598break;599else {600fraction = fraction << 1; /* keep601shifting left until top bit = 1 */602exponent = exponent - 1;603}604}605result = exponent << 23 | (fraction & 0x7fffff); /* mask606off top bit; assumed 1 */607}608return result;609}610611int evergreen_blit_init(struct radeon_device *rdev)612{613u32 obj_size;614int i, r, dwords;615void *ptr;616u32 packet2s[16];617int num_packet2s = 0;618619/* pin copy shader into vram if already initialized */620if (rdev->r600_blit.shader_obj)621goto done;622623mutex_init(&rdev->r600_blit.mutex);624rdev->r600_blit.state_offset = 0;625626if (rdev->family < CHIP_CAYMAN)627rdev->r600_blit.state_len = evergreen_default_size;628else629rdev->r600_blit.state_len = cayman_default_size;630631dwords = rdev->r600_blit.state_len;632while (dwords & 0xf) {633packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));634dwords++;635}636637obj_size = dwords * 4;638obj_size = ALIGN(obj_size, 256);639640rdev->r600_blit.vs_offset = obj_size;641if (rdev->family < CHIP_CAYMAN)642obj_size += evergreen_vs_size * 4;643else644obj_size += cayman_vs_size * 4;645obj_size = ALIGN(obj_size, 256);646647rdev->r600_blit.ps_offset = obj_size;648if (rdev->family < CHIP_CAYMAN)649obj_size += evergreen_ps_size * 4;650else651obj_size += cayman_ps_size * 4;652obj_size = ALIGN(obj_size, 256);653654r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,655&rdev->r600_blit.shader_obj);656if (r) {657DRM_ERROR("evergreen failed to allocate shader\n");658return r;659}660661DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n",662obj_size,663rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);664665r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);666if (unlikely(r != 0))667return r;668r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);669if (r) {670DRM_ERROR("failed to map blit object %d\n", r);671return r;672}673674if (rdev->family < CHIP_CAYMAN) {675memcpy_toio(ptr + rdev->r600_blit.state_offset,676evergreen_default_state, rdev->r600_blit.state_len * 4);677678if (num_packet2s)679memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),680packet2s, num_packet2s * 4);681for (i = 0; i < evergreen_vs_size; i++)682*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);683for (i = 0; i < evergreen_ps_size; i++)684*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);685} else {686memcpy_toio(ptr + rdev->r600_blit.state_offset,687cayman_default_state, rdev->r600_blit.state_len * 4);688689if (num_packet2s)690memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),691packet2s, num_packet2s * 4);692for (i = 0; i < cayman_vs_size; i++)693*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);694for (i = 0; i < cayman_ps_size; i++)695*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);696}697radeon_bo_kunmap(rdev->r600_blit.shader_obj);698radeon_bo_unreserve(rdev->r600_blit.shader_obj);699700done:701r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);702if (unlikely(r != 0))703return r;704r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,705&rdev->r600_blit.shader_gpu_addr);706radeon_bo_unreserve(rdev->r600_blit.shader_obj);707if (r) {708dev_err(rdev->dev, "(%d) pin blit object failed\n", r);709return r;710}711radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);712return 0;713}714715void evergreen_blit_fini(struct radeon_device *rdev)716{717int r;718719radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);720if (rdev->r600_blit.shader_obj == NULL)721return;722/* If we can't reserve the bo, unref should be enough to destroy723* it when it becomes idle.724*/725r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);726if (!r) {727radeon_bo_unpin(rdev->r600_blit.shader_obj);728radeon_bo_unreserve(rdev->r600_blit.shader_obj);729}730radeon_bo_unref(&rdev->r600_blit.shader_obj);731}732733static int evergreen_vb_ib_get(struct radeon_device *rdev)734{735int r;736r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);737if (r) {738DRM_ERROR("failed to get IB for vertex buffer\n");739return r;740}741742rdev->r600_blit.vb_total = 64*1024;743rdev->r600_blit.vb_used = 0;744return 0;745}746747static void evergreen_vb_ib_put(struct radeon_device *rdev)748{749radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);750radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);751}752753int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)754{755int r;756int ring_size, line_size;757int max_size;758/* loops of emits + fence emit possible */759int dwords_per_loop = 74, num_loops;760761r = evergreen_vb_ib_get(rdev);762if (r)763return r;764765/* 8 bpp vs 32 bpp for xfer unit */766if (size_bytes & 3)767line_size = 8192;768else769line_size = 8192 * 4;770771max_size = 8192 * line_size;772773/* major loops cover the max size transfer */774num_loops = ((size_bytes + max_size) / max_size);775/* minor loops cover the extra non aligned bits */776num_loops += ((size_bytes % line_size) ? 1 : 0);777/* calculate number of loops correctly */778ring_size = num_loops * dwords_per_loop;779/* set default + shaders */780ring_size += 55; /* shaders + def state */781ring_size += 10; /* fence emit for VB IB */782ring_size += 5; /* done copy */783ring_size += 10; /* fence emit for done copy */784r = radeon_ring_lock(rdev, ring_size);785if (r)786return r;787788set_default_state(rdev); /* 36 */789set_shaders(rdev); /* 16 */790return 0;791}792793void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)794{795int r;796797if (rdev->r600_blit.vb_ib)798evergreen_vb_ib_put(rdev);799800if (fence)801r = radeon_fence_emit(rdev, fence);802803radeon_ring_unlock_commit(rdev);804}805806void evergreen_kms_blit_copy(struct radeon_device *rdev,807u64 src_gpu_addr, u64 dst_gpu_addr,808int size_bytes)809{810int max_bytes;811u64 vb_gpu_addr;812u32 *vb;813814DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,815size_bytes, rdev->r600_blit.vb_used);816vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);817if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {818max_bytes = 8192;819820while (size_bytes) {821int cur_size = size_bytes;822int src_x = src_gpu_addr & 255;823int dst_x = dst_gpu_addr & 255;824int h = 1;825src_gpu_addr = src_gpu_addr & ~255ULL;826dst_gpu_addr = dst_gpu_addr & ~255ULL;827828if (!src_x && !dst_x) {829h = (cur_size / max_bytes);830if (h > 8192)831h = 8192;832if (h == 0)833h = 1;834else835cur_size = max_bytes;836} else {837if (cur_size > max_bytes)838cur_size = max_bytes;839if (cur_size > (max_bytes - dst_x))840cur_size = (max_bytes - dst_x);841if (cur_size > (max_bytes - src_x))842cur_size = (max_bytes - src_x);843}844845if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {846WARN_ON(1);847}848849vb[0] = i2f(dst_x);850vb[1] = 0;851vb[2] = i2f(src_x);852vb[3] = 0;853854vb[4] = i2f(dst_x);855vb[5] = i2f(h);856vb[6] = i2f(src_x);857vb[7] = i2f(h);858859vb[8] = i2f(dst_x + cur_size);860vb[9] = i2f(h);861vb[10] = i2f(src_x + cur_size);862vb[11] = i2f(h);863864/* src 10 */865set_tex_resource(rdev, FMT_8,866src_x + cur_size, h, src_x + cur_size,867src_gpu_addr);868869/* 5 */870cp_set_surface_sync(rdev,871PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);872873874/* dst 17 */875set_render_target(rdev, COLOR_8,876dst_x + cur_size, h,877dst_gpu_addr);878879/* scissors 12 */880set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);881882/* 15 */883vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;884set_vtx_resource(rdev, vb_gpu_addr);885886/* draw 10 */887draw_auto(rdev);888889/* 5 */890cp_set_surface_sync(rdev,891PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,892cur_size * h, dst_gpu_addr);893894vb += 12;895rdev->r600_blit.vb_used += 12 * 4;896897src_gpu_addr += cur_size * h;898dst_gpu_addr += cur_size * h;899size_bytes -= cur_size * h;900}901} else {902max_bytes = 8192 * 4;903904while (size_bytes) {905int cur_size = size_bytes;906int src_x = (src_gpu_addr & 255);907int dst_x = (dst_gpu_addr & 255);908int h = 1;909src_gpu_addr = src_gpu_addr & ~255ULL;910dst_gpu_addr = dst_gpu_addr & ~255ULL;911912if (!src_x && !dst_x) {913h = (cur_size / max_bytes);914if (h > 8192)915h = 8192;916if (h == 0)917h = 1;918else919cur_size = max_bytes;920} else {921if (cur_size > max_bytes)922cur_size = max_bytes;923if (cur_size > (max_bytes - dst_x))924cur_size = (max_bytes - dst_x);925if (cur_size > (max_bytes - src_x))926cur_size = (max_bytes - src_x);927}928929if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {930WARN_ON(1);931}932933vb[0] = i2f(dst_x / 4);934vb[1] = 0;935vb[2] = i2f(src_x / 4);936vb[3] = 0;937938vb[4] = i2f(dst_x / 4);939vb[5] = i2f(h);940vb[6] = i2f(src_x / 4);941vb[7] = i2f(h);942943vb[8] = i2f((dst_x + cur_size) / 4);944vb[9] = i2f(h);945vb[10] = i2f((src_x + cur_size) / 4);946vb[11] = i2f(h);947948/* src 10 */949set_tex_resource(rdev, FMT_8_8_8_8,950(src_x + cur_size) / 4,951h, (src_x + cur_size) / 4,952src_gpu_addr);953/* 5 */954cp_set_surface_sync(rdev,955PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);956957/* dst 17 */958set_render_target(rdev, COLOR_8_8_8_8,959(dst_x + cur_size) / 4, h,960dst_gpu_addr);961962/* scissors 12 */963set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);964965/* Vertex buffer setup 15 */966vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;967set_vtx_resource(rdev, vb_gpu_addr);968969/* draw 10 */970draw_auto(rdev);971972/* 5 */973cp_set_surface_sync(rdev,974PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,975cur_size * h, dst_gpu_addr);976977/* 74 ring dwords per loop */978vb += 12;979rdev->r600_blit.vb_used += 12 * 4;980981src_gpu_addr += cur_size * h;982dst_gpu_addr += cur_size * h;983size_bytes -= cur_size * h;984}985}986}987988989990