Path: blob/master/drivers/gpu/drm/radeon/r600_blit.c
15113 views
/*1* Copyright 2009 Advanced Micro Devices, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22* Authors:23* Alex Deucher <[email protected]>24*/25#include "drmP.h"26#include "drm.h"27#include "radeon_drm.h"28#include "radeon_drv.h"2930#include "r600_blit_shaders.h"3132#define DI_PT_RECTLIST 0x1133#define DI_INDEX_SIZE_16_BIT 0x034#define DI_SRC_SEL_AUTO_INDEX 0x23536#define FMT_8 0x137#define FMT_5_6_5 0x838#define FMT_8_8_8_8 0x1a39#define COLOR_8 0x140#define COLOR_5_6_5 0x841#define COLOR_8_8_8_8 0x1a4243static inline void44set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)45{46u32 cb_color_info;47int pitch, slice;48RING_LOCALS;49DRM_DEBUG("\n");5051h = ALIGN(h, 8);52if (h < 8)53h = 8;5455cb_color_info = ((format << 2) | (1 << 27));56pitch = (w / 8) - 1;57slice = ((w * h) / 64) - 1;5859if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&60((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {61BEGIN_RING(21 + 2);62OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));63OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);64OUT_RING(gpu_addr >> 8);65OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));66OUT_RING(2 << 0);67} else {68BEGIN_RING(21);69OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));70OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);71OUT_RING(gpu_addr >> 8);72}7374OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));75OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);76OUT_RING((pitch << 0) | (slice << 10));7778OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));79OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);80OUT_RING(0);8182OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));83OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);84OUT_RING(cb_color_info);8586OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));87OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);88OUT_RING(0);8990OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));91OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);92OUT_RING(0);9394OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));95OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);96OUT_RING(0);9798ADVANCE_RING();99}100101static inline void102cp_set_surface_sync(drm_radeon_private_t *dev_priv,103u32 sync_type, u32 size, u64 mc_addr)104{105u32 cp_coher_size;106RING_LOCALS;107DRM_DEBUG("\n");108109if (size == 0xffffffff)110cp_coher_size = 0xffffffff;111else112cp_coher_size = ((size + 255) >> 8);113114BEGIN_RING(5);115OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));116OUT_RING(sync_type);117OUT_RING(cp_coher_size);118OUT_RING((mc_addr >> 8));119OUT_RING(10); /* poll interval */120ADVANCE_RING();121}122123static inline void124set_shaders(struct drm_device *dev)125{126drm_radeon_private_t *dev_priv = dev->dev_private;127u64 gpu_addr;128int i;129u32 *vs, *ps;130uint32_t sq_pgm_resources;131RING_LOCALS;132DRM_DEBUG("\n");133134/* load shaders */135vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);136ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);137138for (i = 0; i < r6xx_vs_size; i++)139vs[i] = cpu_to_le32(r6xx_vs[i]);140for (i = 0; i < r6xx_ps_size; i++)141ps[i] = cpu_to_le32(r6xx_ps[i]);142143dev_priv->blit_vb->used = 512;144145gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;146147/* setup shader regs */148sq_pgm_resources = (1 << 0);149150BEGIN_RING(9 + 12);151/* VS */152OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));153OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);154OUT_RING(gpu_addr >> 8);155156OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));157OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);158OUT_RING(sq_pgm_resources);159160OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));161OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);162OUT_RING(0);163164/* PS */165OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));166OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);167OUT_RING((gpu_addr + 256) >> 8);168169OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));170OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);171OUT_RING(sq_pgm_resources | (1 << 28));172173OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));174OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);175OUT_RING(2);176177OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));178OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);179OUT_RING(0);180ADVANCE_RING();181182cp_set_surface_sync(dev_priv,183R600_SH_ACTION_ENA, 512, gpu_addr);184}185186static inline void187set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)188{189uint32_t sq_vtx_constant_word2;190RING_LOCALS;191DRM_DEBUG("\n");192193sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));194#ifdef __BIG_ENDIAN195sq_vtx_constant_word2 |= (2 << 30);196#endif197198BEGIN_RING(9);199OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));200OUT_RING(0x460);201OUT_RING(gpu_addr & 0xffffffff);202OUT_RING(48 - 1);203OUT_RING(sq_vtx_constant_word2);204OUT_RING(1 << 0);205OUT_RING(0);206OUT_RING(0);207OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);208ADVANCE_RING();209210if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||211((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||212((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||213((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||214((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))215cp_set_surface_sync(dev_priv,216R600_TC_ACTION_ENA, 48, gpu_addr);217else218cp_set_surface_sync(dev_priv,219R600_VC_ACTION_ENA, 48, gpu_addr);220}221222static inline void223set_tex_resource(drm_radeon_private_t *dev_priv,224int format, int w, int h, int pitch, u64 gpu_addr)225{226uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;227RING_LOCALS;228DRM_DEBUG("\n");229230if (h < 1)231h = 1;232233sq_tex_resource_word0 = (1 << 0);234sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |235((w - 1) << 19));236237sq_tex_resource_word1 = (format << 26);238sq_tex_resource_word1 |= ((h - 1) << 0);239240sq_tex_resource_word4 = ((1 << 14) |241(0 << 16) |242(1 << 19) |243(2 << 22) |244(3 << 25));245246BEGIN_RING(9);247OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));248OUT_RING(0);249OUT_RING(sq_tex_resource_word0);250OUT_RING(sq_tex_resource_word1);251OUT_RING(gpu_addr >> 8);252OUT_RING(gpu_addr >> 8);253OUT_RING(sq_tex_resource_word4);254OUT_RING(0);255OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);256ADVANCE_RING();257258}259260static inline void261set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)262{263RING_LOCALS;264DRM_DEBUG("\n");265266BEGIN_RING(12);267OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));268OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);269OUT_RING((x1 << 0) | (y1 << 16));270OUT_RING((x2 << 0) | (y2 << 16));271272OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));273OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);274OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));275OUT_RING((x2 << 0) | (y2 << 16));276277OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));278OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);279OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));280OUT_RING((x2 << 0) | (y2 << 16));281ADVANCE_RING();282}283284static inline void285draw_auto(drm_radeon_private_t *dev_priv)286{287RING_LOCALS;288DRM_DEBUG("\n");289290BEGIN_RING(10);291OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));292OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);293OUT_RING(DI_PT_RECTLIST);294295OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));296#ifdef __BIG_ENDIAN297OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);298#else299OUT_RING(DI_INDEX_SIZE_16_BIT);300#endif301302OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));303OUT_RING(1);304305OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));306OUT_RING(3);307OUT_RING(DI_SRC_SEL_AUTO_INDEX);308309ADVANCE_RING();310COMMIT_RING();311}312313static inline void314set_default_state(drm_radeon_private_t *dev_priv)315{316int i;317u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;318u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;319int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;320int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;321int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;322RING_LOCALS;323324switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {325case CHIP_R600:326num_ps_gprs = 192;327num_vs_gprs = 56;328num_temp_gprs = 4;329num_gs_gprs = 0;330num_es_gprs = 0;331num_ps_threads = 136;332num_vs_threads = 48;333num_gs_threads = 4;334num_es_threads = 4;335num_ps_stack_entries = 128;336num_vs_stack_entries = 128;337num_gs_stack_entries = 0;338num_es_stack_entries = 0;339break;340case CHIP_RV630:341case CHIP_RV635:342num_ps_gprs = 84;343num_vs_gprs = 36;344num_temp_gprs = 4;345num_gs_gprs = 0;346num_es_gprs = 0;347num_ps_threads = 144;348num_vs_threads = 40;349num_gs_threads = 4;350num_es_threads = 4;351num_ps_stack_entries = 40;352num_vs_stack_entries = 40;353num_gs_stack_entries = 32;354num_es_stack_entries = 16;355break;356case CHIP_RV610:357case CHIP_RV620:358case CHIP_RS780:359case CHIP_RS880:360default:361num_ps_gprs = 84;362num_vs_gprs = 36;363num_temp_gprs = 4;364num_gs_gprs = 0;365num_es_gprs = 0;366num_ps_threads = 136;367num_vs_threads = 48;368num_gs_threads = 4;369num_es_threads = 4;370num_ps_stack_entries = 40;371num_vs_stack_entries = 40;372num_gs_stack_entries = 32;373num_es_stack_entries = 16;374break;375case CHIP_RV670:376num_ps_gprs = 144;377num_vs_gprs = 40;378num_temp_gprs = 4;379num_gs_gprs = 0;380num_es_gprs = 0;381num_ps_threads = 136;382num_vs_threads = 48;383num_gs_threads = 4;384num_es_threads = 4;385num_ps_stack_entries = 40;386num_vs_stack_entries = 40;387num_gs_stack_entries = 32;388num_es_stack_entries = 16;389break;390case CHIP_RV770:391num_ps_gprs = 192;392num_vs_gprs = 56;393num_temp_gprs = 4;394num_gs_gprs = 0;395num_es_gprs = 0;396num_ps_threads = 188;397num_vs_threads = 60;398num_gs_threads = 0;399num_es_threads = 0;400num_ps_stack_entries = 256;401num_vs_stack_entries = 256;402num_gs_stack_entries = 0;403num_es_stack_entries = 0;404break;405case CHIP_RV730:406case CHIP_RV740:407num_ps_gprs = 84;408num_vs_gprs = 36;409num_temp_gprs = 4;410num_gs_gprs = 0;411num_es_gprs = 0;412num_ps_threads = 188;413num_vs_threads = 60;414num_gs_threads = 0;415num_es_threads = 0;416num_ps_stack_entries = 128;417num_vs_stack_entries = 128;418num_gs_stack_entries = 0;419num_es_stack_entries = 0;420break;421case CHIP_RV710:422num_ps_gprs = 192;423num_vs_gprs = 56;424num_temp_gprs = 4;425num_gs_gprs = 0;426num_es_gprs = 0;427num_ps_threads = 144;428num_vs_threads = 48;429num_gs_threads = 0;430num_es_threads = 0;431num_ps_stack_entries = 128;432num_vs_stack_entries = 128;433num_gs_stack_entries = 0;434num_es_stack_entries = 0;435break;436}437438if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||439((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||440((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||441((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||442((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))443sq_config = 0;444else445sq_config = R600_VC_ENABLE;446447sq_config |= (R600_DX9_CONSTS |448R600_ALU_INST_PREFER_VECTOR |449R600_PS_PRIO(0) |450R600_VS_PRIO(1) |451R600_GS_PRIO(2) |452R600_ES_PRIO(3));453454sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |455R600_NUM_VS_GPRS(num_vs_gprs) |456R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));457sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |458R600_NUM_ES_GPRS(num_es_gprs));459sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |460R600_NUM_VS_THREADS(num_vs_threads) |461R600_NUM_GS_THREADS(num_gs_threads) |462R600_NUM_ES_THREADS(num_es_threads));463sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |464R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));465sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |466R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));467468if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {469BEGIN_RING(r7xx_default_size + 10);470for (i = 0; i < r7xx_default_size; i++)471OUT_RING(r7xx_default_state[i]);472} else {473BEGIN_RING(r6xx_default_size + 10);474for (i = 0; i < r6xx_default_size; i++)475OUT_RING(r6xx_default_state[i]);476}477OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));478OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);479/* SQ config */480OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));481OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);482OUT_RING(sq_config);483OUT_RING(sq_gpr_resource_mgmt_1);484OUT_RING(sq_gpr_resource_mgmt_2);485OUT_RING(sq_thread_resource_mgmt);486OUT_RING(sq_stack_resource_mgmt_1);487OUT_RING(sq_stack_resource_mgmt_2);488ADVANCE_RING();489}490491static inline uint32_t i2f(uint32_t input)492{493u32 result, i, exponent, fraction;494495if ((input & 0x3fff) == 0)496result = 0; /* 0 is a special case */497else {498exponent = 140; /* exponent biased by 127; */499fraction = (input & 0x3fff) << 10; /* cheat and only500handle numbers below 2^^15 */501for (i = 0; i < 14; i++) {502if (fraction & 0x800000)503break;504else {505fraction = fraction << 1; /* keep506shifting left until top bit = 1 */507exponent = exponent - 1;508}509}510result = exponent << 23 | (fraction & 0x7fffff); /* mask511off top bit; assumed 1 */512}513return result;514}515516517static inline int r600_nomm_get_vb(struct drm_device *dev)518{519drm_radeon_private_t *dev_priv = dev->dev_private;520dev_priv->blit_vb = radeon_freelist_get(dev);521if (!dev_priv->blit_vb) {522DRM_ERROR("Unable to allocate vertex buffer for blit\n");523return -EAGAIN;524}525return 0;526}527528static inline void r600_nomm_put_vb(struct drm_device *dev)529{530drm_radeon_private_t *dev_priv = dev->dev_private;531532dev_priv->blit_vb->used = 0;533radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);534}535536static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)537{538drm_radeon_private_t *dev_priv = dev->dev_private;539return (((char *)dev->agp_buffer_map->handle +540dev_priv->blit_vb->offset + dev_priv->blit_vb->used));541}542543int544r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)545{546drm_radeon_private_t *dev_priv = dev->dev_private;547int ret;548DRM_DEBUG("\n");549550ret = r600_nomm_get_vb(dev);551if (ret)552return ret;553554dev_priv->blit_vb->file_priv = file_priv;555556set_default_state(dev_priv);557set_shaders(dev);558559return 0;560}561562563void564r600_done_blit_copy(struct drm_device *dev)565{566drm_radeon_private_t *dev_priv = dev->dev_private;567RING_LOCALS;568DRM_DEBUG("\n");569570BEGIN_RING(5);571OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));572OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);573/* wait for 3D idle clean */574OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));575OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);576OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);577578ADVANCE_RING();579COMMIT_RING();580581r600_nomm_put_vb(dev);582}583584void585r600_blit_copy(struct drm_device *dev,586uint64_t src_gpu_addr, uint64_t dst_gpu_addr,587int size_bytes)588{589drm_radeon_private_t *dev_priv = dev->dev_private;590int max_bytes;591u64 vb_addr;592u32 *vb;593594vb = r600_nomm_get_vb_ptr(dev);595596if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {597max_bytes = 8192;598599while (size_bytes) {600int cur_size = size_bytes;601int src_x = src_gpu_addr & 255;602int dst_x = dst_gpu_addr & 255;603int h = 1;604src_gpu_addr = src_gpu_addr & ~255;605dst_gpu_addr = dst_gpu_addr & ~255;606607if (!src_x && !dst_x) {608h = (cur_size / max_bytes);609if (h > 8192)610h = 8192;611if (h == 0)612h = 1;613else614cur_size = max_bytes;615} else {616if (cur_size > max_bytes)617cur_size = max_bytes;618if (cur_size > (max_bytes - dst_x))619cur_size = (max_bytes - dst_x);620if (cur_size > (max_bytes - src_x))621cur_size = (max_bytes - src_x);622}623624if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {625626r600_nomm_put_vb(dev);627r600_nomm_get_vb(dev);628if (!dev_priv->blit_vb)629return;630set_shaders(dev);631vb = r600_nomm_get_vb_ptr(dev);632}633634vb[0] = i2f(dst_x);635vb[1] = 0;636vb[2] = i2f(src_x);637vb[3] = 0;638639vb[4] = i2f(dst_x);640vb[5] = i2f(h);641vb[6] = i2f(src_x);642vb[7] = i2f(h);643644vb[8] = i2f(dst_x + cur_size);645vb[9] = i2f(h);646vb[10] = i2f(src_x + cur_size);647vb[11] = i2f(h);648649/* src */650set_tex_resource(dev_priv, FMT_8,651src_x + cur_size, h, src_x + cur_size,652src_gpu_addr);653654cp_set_surface_sync(dev_priv,655R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);656657/* dst */658set_render_target(dev_priv, COLOR_8,659dst_x + cur_size, h,660dst_gpu_addr);661662/* scissors */663set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);664665/* Vertex buffer setup */666vb_addr = dev_priv->gart_buffers_offset +667dev_priv->blit_vb->offset +668dev_priv->blit_vb->used;669set_vtx_resource(dev_priv, vb_addr);670671/* draw */672draw_auto(dev_priv);673674cp_set_surface_sync(dev_priv,675R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,676cur_size * h, dst_gpu_addr);677678vb += 12;679dev_priv->blit_vb->used += 12 * 4;680681src_gpu_addr += cur_size * h;682dst_gpu_addr += cur_size * h;683size_bytes -= cur_size * h;684}685} else {686max_bytes = 8192 * 4;687688while (size_bytes) {689int cur_size = size_bytes;690int src_x = (src_gpu_addr & 255);691int dst_x = (dst_gpu_addr & 255);692int h = 1;693src_gpu_addr = src_gpu_addr & ~255;694dst_gpu_addr = dst_gpu_addr & ~255;695696if (!src_x && !dst_x) {697h = (cur_size / max_bytes);698if (h > 8192)699h = 8192;700if (h == 0)701h = 1;702else703cur_size = max_bytes;704} else {705if (cur_size > max_bytes)706cur_size = max_bytes;707if (cur_size > (max_bytes - dst_x))708cur_size = (max_bytes - dst_x);709if (cur_size > (max_bytes - src_x))710cur_size = (max_bytes - src_x);711}712713if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {714r600_nomm_put_vb(dev);715r600_nomm_get_vb(dev);716if (!dev_priv->blit_vb)717return;718719set_shaders(dev);720vb = r600_nomm_get_vb_ptr(dev);721}722723vb[0] = i2f(dst_x / 4);724vb[1] = 0;725vb[2] = i2f(src_x / 4);726vb[3] = 0;727728vb[4] = i2f(dst_x / 4);729vb[5] = i2f(h);730vb[6] = i2f(src_x / 4);731vb[7] = i2f(h);732733vb[8] = i2f((dst_x + cur_size) / 4);734vb[9] = i2f(h);735vb[10] = i2f((src_x + cur_size) / 4);736vb[11] = i2f(h);737738/* src */739set_tex_resource(dev_priv, FMT_8_8_8_8,740(src_x + cur_size) / 4,741h, (src_x + cur_size) / 4,742src_gpu_addr);743744cp_set_surface_sync(dev_priv,745R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);746747/* dst */748set_render_target(dev_priv, COLOR_8_8_8_8,749(dst_x + cur_size) / 4, h,750dst_gpu_addr);751752/* scissors */753set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);754755/* Vertex buffer setup */756vb_addr = dev_priv->gart_buffers_offset +757dev_priv->blit_vb->offset +758dev_priv->blit_vb->used;759set_vtx_resource(dev_priv, vb_addr);760761/* draw */762draw_auto(dev_priv);763764cp_set_surface_sync(dev_priv,765R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,766cur_size * h, dst_gpu_addr);767768vb += 12;769dev_priv->blit_vb->used += 12 * 4;770771src_gpu_addr += cur_size * h;772dst_gpu_addr += cur_size * h;773size_bytes -= cur_size * h;774}775}776}777778void779r600_blit_swap(struct drm_device *dev,780uint64_t src_gpu_addr, uint64_t dst_gpu_addr,781int sx, int sy, int dx, int dy,782int w, int h, int src_pitch, int dst_pitch, int cpp)783{784drm_radeon_private_t *dev_priv = dev->dev_private;785int cb_format, tex_format;786int sx2, sy2, dx2, dy2;787u64 vb_addr;788u32 *vb;789790if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {791792r600_nomm_put_vb(dev);793r600_nomm_get_vb(dev);794if (!dev_priv->blit_vb)795return;796797set_shaders(dev);798}799vb = r600_nomm_get_vb_ptr(dev);800801sx2 = sx + w;802sy2 = sy + h;803dx2 = dx + w;804dy2 = dy + h;805806vb[0] = i2f(dx);807vb[1] = i2f(dy);808vb[2] = i2f(sx);809vb[3] = i2f(sy);810811vb[4] = i2f(dx);812vb[5] = i2f(dy2);813vb[6] = i2f(sx);814vb[7] = i2f(sy2);815816vb[8] = i2f(dx2);817vb[9] = i2f(dy2);818vb[10] = i2f(sx2);819vb[11] = i2f(sy2);820821switch(cpp) {822case 4:823cb_format = COLOR_8_8_8_8;824tex_format = FMT_8_8_8_8;825break;826case 2:827cb_format = COLOR_5_6_5;828tex_format = FMT_5_6_5;829break;830default:831cb_format = COLOR_8;832tex_format = FMT_8;833break;834}835836/* src */837set_tex_resource(dev_priv, tex_format,838src_pitch / cpp,839sy2, src_pitch / cpp,840src_gpu_addr);841842cp_set_surface_sync(dev_priv,843R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);844845/* dst */846set_render_target(dev_priv, cb_format,847dst_pitch / cpp, dy2,848dst_gpu_addr);849850/* scissors */851set_scissors(dev_priv, dx, dy, dx2, dy2);852853/* Vertex buffer setup */854vb_addr = dev_priv->gart_buffers_offset +855dev_priv->blit_vb->offset +856dev_priv->blit_vb->used;857set_vtx_resource(dev_priv, vb_addr);858859/* draw */860draw_auto(dev_priv);861862cp_set_surface_sync(dev_priv,863R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,864dst_pitch * dy2, dst_gpu_addr);865866dev_priv->blit_vb->used += 12 * 4;867}868869870