Path: blob/21.2-virgl/src/gallium/drivers/r600/r600_hw_context.c
4570 views
/*1* Copyright 2010 Jerome Glisse <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* on the rights to use, copy, modify, merge, publish, distribute, sub7* license, and/or sell copies of the Software, and to permit persons to whom8* the Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,18* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR19* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE20* USE OR OTHER DEALINGS IN THE SOFTWARE.21*22* Authors:23* Jerome Glisse24*/25#include "r600_pipe.h"26#include "r600d.h"27#include "util/u_memory.h"28#include <errno.h>29#include <unistd.h>303132void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,33boolean count_draw_in, unsigned num_atomics)34{35/* Flush the DMA IB if it's not empty. */36if (radeon_emitted(&ctx->b.dma.cs, 0))37ctx->b.dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);3839if (!radeon_cs_memory_below_limit(ctx->b.screen, &ctx->b.gfx.cs,40ctx->b.vram, ctx->b.gtt)) {41ctx->b.gtt = 0;42ctx->b.vram = 0;43ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);44return;45}46/* all will be accounted once relocation are emitted */47ctx->b.gtt = 0;48ctx->b.vram = 0;4950/* Check available space in CS. */51if (count_draw_in) {52uint64_t mask;5354/* The number of dwords all the dirty states would take. */55mask = ctx->dirty_atoms;56while (mask != 0)57num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw;5859/* The upper-bound of how much space a draw command would take. */60num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;61}6263/* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */64num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);6566/* Count in r600_suspend_queries. */67num_dw += ctx->b.num_cs_dw_queries_suspend;6869/* Count in streamout_end at the end of CS. */70if (ctx->b.streamout.begin_emitted) {71num_dw += ctx->b.streamout.num_dw_for_end;72}7374/* SX_MISC */75if (ctx->b.chip_class == R600) {76num_dw += 3;77}7879/* Count in framebuffer cache flushes at the end of CS. */80num_dw += R600_MAX_FLUSH_CS_DWORDS;8182/* The fence at the end of CS. */83num_dw += 10;8485/* Flush if there's not enough space. */86if (!ctx->b.ws->cs_check_space(&ctx->b.gfx.cs, num_dw, false)) {87ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);88}89}9091void r600_flush_emit(struct r600_context *rctx)92{93struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;94unsigned cp_coher_cntl = 0;95unsigned wait_until = 0;9697if (!rctx->b.flags) {98return;99}100101/* Ensure coherency between streamout and shaders. */102if (rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH)103rctx->b.flags |= r600_get_flush_flags(R600_COHERENCY_SHADER);104105if (rctx->b.flags & R600_CONTEXT_WAIT_3D_IDLE) {106wait_until |= S_008040_WAIT_3D_IDLE(1);107}108if (rctx->b.flags & R600_CONTEXT_WAIT_CP_DMA_IDLE) {109wait_until |= S_008040_WAIT_CP_DMA_IDLE(1);110}111112if (wait_until) {113/* Use of WAIT_UNTIL is deprecated on Cayman+ */114if (rctx->b.family >= CHIP_CAYMAN) {115/* emit a PS partial flush on Cayman/TN */116rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;117}118}119120/* Wait packets must be executed first, because SURFACE_SYNC doesn't121* wait for shaders if it's not flushing CB or DB.122*/123if (rctx->b.flags & R600_CONTEXT_PS_PARTIAL_FLUSH) {124radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));125radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));126}127128if (rctx->b.flags & R600_CONTEXT_CS_PARTIAL_FLUSH) {129radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));130radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));131}132133if (wait_until) {134/* Use of WAIT_UNTIL is deprecated on Cayman+ */135if (rctx->b.family < CHIP_CAYMAN) {136/* wait for things to settle */137radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);138}139}140141if (rctx->b.chip_class >= R700 &&142(rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) {143radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));144radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));145}146147if (rctx->b.chip_class >= R700 &&148(rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_DB_META)) {149radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));150radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));151152/* Set FULL_CACHE_ENA for DB META flushes on r7xx and later.153*154* This hack predates use of FLUSH_AND_INV_DB_META, so it's155* unclear whether it's still needed or even whether it has156* any effect.157*/158cp_coher_cntl |= S_0085F0_FULL_CACHE_ENA(1);159}160161if (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV ||162(rctx->b.chip_class == R600 && rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH)) {163radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));164radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0));165}166167if (rctx->b.flags & R600_CONTEXT_INV_CONST_CACHE) {168/* Direct constant addressing uses the shader cache.169* Indirect contant addressing uses the vertex cache. */170cp_coher_cntl |= S_0085F0_SH_ACTION_ENA(1) |171(rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1)172: S_0085F0_TC_ACTION_ENA(1));173}174if (rctx->b.flags & R600_CONTEXT_INV_VERTEX_CACHE) {175cp_coher_cntl |= rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1)176: S_0085F0_TC_ACTION_ENA(1);177}178if (rctx->b.flags & R600_CONTEXT_INV_TEX_CACHE) {179/* Textures use the texture cache.180* Texture buffer objects use the vertex cache. */181cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1) |182(rctx->has_vertex_cache ? S_0085F0_VC_ACTION_ENA(1) : 0);183}184185/* Don't use the DB CP COHER logic on r6xx.186* There are hw bugs.187*/188if (rctx->b.chip_class >= R700 &&189(rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_DB)) {190cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |191S_0085F0_DB_DEST_BASE_ENA(1) |192S_0085F0_SMX_ACTION_ENA(1);193}194195/* Don't use the CB CP COHER logic on r6xx.196* There are hw bugs.197*/198if (rctx->b.chip_class >= R700 &&199(rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV_CB)) {200cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |201S_0085F0_CB0_DEST_BASE_ENA(1) |202S_0085F0_CB1_DEST_BASE_ENA(1) |203S_0085F0_CB2_DEST_BASE_ENA(1) |204S_0085F0_CB3_DEST_BASE_ENA(1) |205S_0085F0_CB4_DEST_BASE_ENA(1) |206S_0085F0_CB5_DEST_BASE_ENA(1) |207S_0085F0_CB6_DEST_BASE_ENA(1) |208S_0085F0_CB7_DEST_BASE_ENA(1) |209S_0085F0_SMX_ACTION_ENA(1);210if (rctx->b.chip_class >= EVERGREEN)211cp_coher_cntl |= S_0085F0_CB8_DEST_BASE_ENA(1) |212S_0085F0_CB9_DEST_BASE_ENA(1) |213S_0085F0_CB10_DEST_BASE_ENA(1) |214S_0085F0_CB11_DEST_BASE_ENA(1);215}216217if (rctx->b.chip_class >= R700 &&218rctx->b.flags & R600_CONTEXT_STREAMOUT_FLUSH) {219cp_coher_cntl |= S_0085F0_SO0_DEST_BASE_ENA(1) |220S_0085F0_SO1_DEST_BASE_ENA(1) |221S_0085F0_SO2_DEST_BASE_ENA(1) |222S_0085F0_SO3_DEST_BASE_ENA(1) |223S_0085F0_SMX_ACTION_ENA(1);224}225226/* Workaround for buggy flushing on some R6xx chipsets. */227if ((rctx->b.flags & (R600_CONTEXT_FLUSH_AND_INV |228R600_CONTEXT_STREAMOUT_FLUSH)) &&229(rctx->b.family == CHIP_RV670 ||230rctx->b.family == CHIP_RS780 ||231rctx->b.family == CHIP_RS880)) {232cp_coher_cntl |= S_0085F0_CB1_DEST_BASE_ENA(1) |233S_0085F0_DEST_BASE_0_ENA(1);234}235236if (cp_coher_cntl) {237radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));238radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */239radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */240radeon_emit(cs, 0); /* CP_COHER_BASE */241radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */242}243244if (rctx->b.flags & R600_CONTEXT_START_PIPELINE_STATS) {245radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));246radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) |247EVENT_INDEX(0));248} else if (rctx->b.flags & R600_CONTEXT_STOP_PIPELINE_STATS) {249radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));250radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP) |251EVENT_INDEX(0));252}253254/* everything is properly flushed */255rctx->b.flags = 0;256}257258void r600_context_gfx_flush(void *context, unsigned flags,259struct pipe_fence_handle **fence)260{261struct r600_context *ctx = context;262struct radeon_cmdbuf *cs = &ctx->b.gfx.cs;263struct radeon_winsys *ws = ctx->b.ws;264265if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))266return;267268if (r600_check_device_reset(&ctx->b))269return;270271r600_preflush_suspend_features(&ctx->b);272273/* flush the framebuffer cache */274ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV |275R600_CONTEXT_FLUSH_AND_INV_CB |276R600_CONTEXT_FLUSH_AND_INV_DB |277R600_CONTEXT_FLUSH_AND_INV_CB_META |278R600_CONTEXT_FLUSH_AND_INV_DB_META |279R600_CONTEXT_WAIT_3D_IDLE |280R600_CONTEXT_WAIT_CP_DMA_IDLE;281282r600_flush_emit(ctx);283284if (ctx->trace_buf)285eg_trace_emit(ctx);286/* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */287if (ctx->b.chip_class == R600) {288radeon_set_context_reg(cs, R_028350_SX_MISC, 0);289}290291if (ctx->is_debug) {292/* Save the IB for debug contexts. */293radeon_clear_saved_cs(&ctx->last_gfx);294radeon_save_cs(ws, cs, &ctx->last_gfx, true);295r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);296r600_resource_reference(&ctx->trace_buf, NULL);297}298/* Flush the CS. */299ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence);300if (fence)301ws->fence_reference(fence, ctx->b.last_gfx_fence);302ctx->b.num_gfx_cs_flushes++;303304if (ctx->is_debug) {305if (!ws->fence_wait(ws, ctx->b.last_gfx_fence, 10000000)) {306const char *fname = getenv("R600_TRACE");307if (!fname)308exit(-1);309FILE *fl = fopen(fname, "w+");310if (fl) {311eg_dump_debug_state(&ctx->b.b, fl, 0);312fclose(fl);313} else314perror(fname);315exit(-1);316}317}318r600_begin_new_cs(ctx);319}320321void r600_begin_new_cs(struct r600_context *ctx)322{323unsigned shader;324325if (ctx->is_debug) {326uint32_t zero = 0;327328/* Create a buffer used for writing trace IDs and initialize it to 0. */329assert(!ctx->trace_buf);330ctx->trace_buf = (struct r600_resource*)331pipe_buffer_create(ctx->b.b.screen, 0,332PIPE_USAGE_STAGING, 4);333if (ctx->trace_buf)334pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,3350, sizeof(zero), &zero);336ctx->trace_id = 0;337}338339if (ctx->trace_buf)340eg_trace_emit(ctx);341342ctx->b.flags = 0;343ctx->b.gtt = 0;344ctx->b.vram = 0;345346/* Begin a new CS. */347r600_emit_command_buffer(&ctx->b.gfx.cs, &ctx->start_cs_cmd);348349/* Re-emit states. */350r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);351r600_mark_atom_dirty(ctx, &ctx->blend_color.atom);352r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom);353r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom);354r600_mark_atom_dirty(ctx, &ctx->clip_state.atom);355r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);356r600_mark_atom_dirty(ctx, &ctx->db_state.atom);357r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom);358if (ctx->b.chip_class >= EVERGREEN) {359r600_mark_atom_dirty(ctx, &ctx->fragment_images.atom);360r600_mark_atom_dirty(ctx, &ctx->fragment_buffers.atom);361r600_mark_atom_dirty(ctx, &ctx->compute_images.atom);362r600_mark_atom_dirty(ctx, &ctx->compute_buffers.atom);363}364r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_PS].atom);365r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);366r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);367r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);368ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;369r600_mark_atom_dirty(ctx, &ctx->b.scissors.atom);370ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;371ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;372r600_mark_atom_dirty(ctx, &ctx->b.viewports.atom);373if (ctx->b.chip_class <= EVERGREEN) {374r600_mark_atom_dirty(ctx, &ctx->config_state.atom);375}376r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);377r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom);378r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_ES].atom);379r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom);380if (ctx->gs_shader) {381r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_GS].atom);382r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom);383}384if (ctx->tes_shader) {385r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_HS].atom);386r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_LS].atom);387}388r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_VS].atom);389r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);390r600_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);391392if (ctx->blend_state.cso)393r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);394if (ctx->dsa_state.cso)395r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom);396if (ctx->rasterizer_state.cso)397r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom);398399if (ctx->b.chip_class <= R700) {400r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom);401}402403ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;404r600_vertex_buffers_dirty(ctx);405406/* Re-emit shader resources. */407for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {408struct r600_constbuf_state *constbuf = &ctx->constbuf_state[shader];409struct r600_textures_info *samplers = &ctx->samplers[shader];410411constbuf->dirty_mask = constbuf->enabled_mask;412samplers->views.dirty_mask = samplers->views.enabled_mask;413samplers->states.dirty_mask = samplers->states.enabled_mask;414415r600_constant_buffers_dirty(ctx, constbuf);416r600_sampler_views_dirty(ctx, &samplers->views);417r600_sampler_states_dirty(ctx, &samplers->states);418}419420for (shader = 0; shader < ARRAY_SIZE(ctx->scratch_buffers); shader++) {421ctx->scratch_buffers[shader].dirty = true;422}423424r600_postflush_resume_features(&ctx->b);425426/* Re-emit the draw state. */427ctx->last_primitive_type = -1;428ctx->last_start_instance = -1;429ctx->last_rast_prim = -1;430ctx->current_rast_prim = -1;431432assert(!ctx->b.gfx.cs.prev_dw);433ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs.current.cdw;434}435436void r600_emit_pfp_sync_me(struct r600_context *rctx)437{438struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;439440if (rctx->b.chip_class >= EVERGREEN &&441rctx->b.screen->info.drm_minor >= 46) {442radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));443radeon_emit(cs, 0);444} else {445/* Emulate PFP_SYNC_ME by writing a value to memory in ME and446* waiting for it in PFP.447*/448struct r600_resource *buf = NULL;449unsigned offset, reloc;450uint64_t va;451452/* 16-byte address alignment is required by WAIT_REG_MEM. */453u_suballocator_alloc(&rctx->b.allocator_zeroed_memory, 4, 16,454&offset, (struct pipe_resource**)&buf);455if (!buf) {456/* This is too heavyweight, but will work. */457rctx->b.gfx.flush(rctx, PIPE_FLUSH_ASYNC, NULL);458return;459}460461reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, buf,462RADEON_USAGE_READWRITE,463RADEON_PRIO_FENCE);464465va = buf->gpu_address + offset;466assert(va % 16 == 0);467468/* Write 1 to memory in ME. */469radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));470radeon_emit(cs, va);471radeon_emit(cs, ((va >> 32) & 0xff) | MEM_WRITE_32_BITS);472radeon_emit(cs, 1);473radeon_emit(cs, 0);474475radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));476radeon_emit(cs, reloc);477478/* Wait in PFP (PFP can only do GEQUAL against memory). */479radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));480radeon_emit(cs, WAIT_REG_MEM_GEQUAL |481WAIT_REG_MEM_MEMORY |482WAIT_REG_MEM_PFP);483radeon_emit(cs, va);484radeon_emit(cs, va >> 32);485radeon_emit(cs, 1); /* reference value */486radeon_emit(cs, 0xffffffff); /* mask */487radeon_emit(cs, 4); /* poll interval */488489radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));490radeon_emit(cs, reloc);491492r600_resource_reference(&buf, NULL);493}494}495496/* The max number of bytes to copy per packet. */497#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)498499void r600_cp_dma_copy_buffer(struct r600_context *rctx,500struct pipe_resource *dst, uint64_t dst_offset,501struct pipe_resource *src, uint64_t src_offset,502unsigned size)503{504struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;505506assert(size);507assert(rctx->screen->b.has_cp_dma);508509/* Mark the buffer range of destination as valid (initialized),510* so that transfer_map knows it should wait for the GPU when mapping511* that range. */512util_range_add(dst, &r600_resource(dst)->valid_buffer_range, dst_offset,513dst_offset + size);514515dst_offset += r600_resource(dst)->gpu_address;516src_offset += r600_resource(src)->gpu_address;517518/* Flush the caches where the resources are bound. */519rctx->b.flags |= r600_get_flush_flags(R600_COHERENCY_SHADER) |520R600_CONTEXT_WAIT_3D_IDLE;521522/* There are differences between R700 and EG in CP DMA,523* but we only use the common bits here. */524while (size) {525unsigned sync = 0;526unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);527unsigned src_reloc, dst_reloc;528529r600_need_cs_space(rctx,53010 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +5313 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);532533/* Flush the caches for the first copy only. */534if (rctx->b.flags) {535r600_flush_emit(rctx);536}537538/* Do the synchronization after the last copy, so that all data is written to memory. */539if (size == byte_count) {540sync = PKT3_CP_DMA_CP_SYNC;541}542543/* This must be done after r600_need_cs_space. */544src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src,545RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);546dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst,547RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);548549radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));550radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */551radeon_emit(cs, sync | ((src_offset >> 32) & 0xff)); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */552radeon_emit(cs, dst_offset); /* DST_ADDR_LO [31:0] */553radeon_emit(cs, (dst_offset >> 32) & 0xff); /* DST_ADDR_HI [7:0] */554radeon_emit(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */555556radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));557radeon_emit(cs, src_reloc);558radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));559radeon_emit(cs, dst_reloc);560561size -= byte_count;562src_offset += byte_count;563dst_offset += byte_count;564}565566/* CP_DMA_CP_SYNC doesn't wait for idle on R6xx, but this does. */567if (rctx->b.chip_class == R600)568radeon_set_config_reg(cs, R_008040_WAIT_UNTIL,569S_008040_WAIT_CP_DMA_IDLE(1));570571/* CP DMA is executed in ME, but index buffers are read by PFP.572* This ensures that ME (CP DMA) is idle before PFP starts fetching573* indices. If we wanted to execute CP DMA in PFP, this packet574* should precede it.575*/576r600_emit_pfp_sync_me(rctx);577}578579void r600_dma_copy_buffer(struct r600_context *rctx,580struct pipe_resource *dst,581struct pipe_resource *src,582uint64_t dst_offset,583uint64_t src_offset,584uint64_t size)585{586struct radeon_cmdbuf *cs = &rctx->b.dma.cs;587unsigned i, ncopy, csize;588struct r600_resource *rdst = (struct r600_resource*)dst;589struct r600_resource *rsrc = (struct r600_resource*)src;590591/* Mark the buffer range of destination as valid (initialized),592* so that transfer_map knows it should wait for the GPU when mapping593* that range. */594util_range_add(&rdst->b.b, &rdst->valid_buffer_range, dst_offset,595dst_offset + size);596597size >>= 2; /* convert to dwords */598ncopy = (size / R600_DMA_COPY_MAX_SIZE_DW) + !!(size % R600_DMA_COPY_MAX_SIZE_DW);599600r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc);601for (i = 0; i < ncopy; i++) {602csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;603/* emit reloc before writing cs so that cs is always in consistent state */604radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ, 0);605radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE, 0);606radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize));607radeon_emit(cs, dst_offset & 0xfffffffc);608radeon_emit(cs, src_offset & 0xfffffffc);609radeon_emit(cs, (dst_offset >> 32UL) & 0xff);610radeon_emit(cs, (src_offset >> 32UL) & 0xff);611dst_offset += csize << 2;612src_offset += csize << 2;613size -= csize;614}615}616617618