Path: blob/21.2-virgl/src/gallium/drivers/r300/r300_emit.c
4570 views
/*1* Copyright 2008 Corbin Simpson <[email protected]>2* Copyright 2009 Marek Olšák <[email protected]>3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE. */2223/* r300_emit: Functions for emitting state. */2425#include "util/format/u_format.h"26#include "util/u_math.h"2728#include "r300_context.h"29#include "r300_cb.h"30#include "r300_cs.h"31#include "r300_emit.h"32#include "r300_fs.h"33#include "r300_screen.h"34#include "r300_screen_buffer.h"35#include "r300_vs.h"3637void r300_emit_blend_state(struct r300_context* r300,38unsigned size, void* state)39{40struct r300_blend_state* blend = (struct r300_blend_state*)state;41struct pipe_framebuffer_state* fb =42(struct pipe_framebuffer_state*)r300->fb_state.state;43struct pipe_surface *cb;44CS_LOCALS(r300);4546cb = fb->nr_cbufs ? r300_get_nonnull_cb(fb, 0) : NULL;4748if (cb) {49if (cb->format == PIPE_FORMAT_R16G16B16A16_FLOAT) {50WRITE_CS_TABLE(blend->cb_noclamp, size);51} else if (cb->format == PIPE_FORMAT_R16G16B16X16_FLOAT) {52WRITE_CS_TABLE(blend->cb_noclamp_noalpha, size);53} else {54unsigned swz = r300_surface(cb)->colormask_swizzle;55WRITE_CS_TABLE(blend->cb_clamp[swz], size);56}57} else {58WRITE_CS_TABLE(blend->cb_no_readwrite, size);59}60}6162void r300_emit_blend_color_state(struct r300_context* r300,63unsigned size, void* state)64{65struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state;66CS_LOCALS(r300);6768WRITE_CS_TABLE(bc->cb, size);69}7071void r300_emit_clip_state(struct r300_context* r300,72unsigned size, void* state)73{74struct r300_clip_state* clip = (struct r300_clip_state*)state;75CS_LOCALS(r300);7677WRITE_CS_TABLE(clip->cb, size);78}7980void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)81{82struct r300_dsa_state* dsa = (struct r300_dsa_state*)state;83struct pipe_framebuffer_state* fb =84(struct pipe_framebuffer_state*)r300->fb_state.state;85boolean is_r500 = r300->screen->caps.is_r500;86CS_LOCALS(r300);87uint32_t alpha_func = dsa->alpha_function;8889/* Choose the alpha ref value between 8-bit (FG_ALPHA_FUNC.AM_VAL) and90* 16-bit (FG_ALPHA_VALUE). */91if (is_r500 && (alpha_func & R300_FG_ALPHA_FUNC_ENABLE)) {92struct pipe_surface *cb = fb->nr_cbufs ? r300_get_nonnull_cb(fb, 0) : NULL;9394if (cb &&95(cb->format == PIPE_FORMAT_R16G16B16A16_FLOAT ||96cb->format == PIPE_FORMAT_R16G16B16X16_FLOAT)) {97alpha_func |= R500_FG_ALPHA_FUNC_FP16_ENABLE;98} else {99alpha_func |= R500_FG_ALPHA_FUNC_8BIT;100}101}102103/* Setup alpha-to-coverage. */104if (r300->alpha_to_coverage && r300->msaa_enable) {105/* Always set 3/6, it improves precision even for 2x and 4x MSAA. */106alpha_func |= R300_FG_ALPHA_FUNC_MASK_ENABLE |107R300_FG_ALPHA_FUNC_CFG_3_OF_6;108}109110BEGIN_CS(size);111OUT_CS_REG(R300_FG_ALPHA_FUNC, alpha_func);112OUT_CS_TABLE(fb->zsbuf ? &dsa->cb_begin : dsa->cb_zb_no_readwrite, size-2);113END_CS;114}115116static void get_rc_constant_state(117float vec[4],118struct r300_context * r300,119struct rc_constant * constant)120{121struct r300_textures_state* texstate = r300->textures_state.state;122struct r300_resource *tex;123124assert(constant->Type == RC_CONSTANT_STATE);125126/* vec should either be (0, 0, 0, 1), which should be a relatively safe127* RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE128* state factors. */129130switch (constant->u.State[0]) {131/* Factor for converting rectangle coords to132* normalized coords. Should only show up on non-r500. */133case RC_STATE_R300_TEXRECT_FACTOR:134tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture);135vec[0] = 1.0 / tex->tex.width0;136vec[1] = 1.0 / tex->tex.height0;137vec[2] = 0;138vec[3] = 1;139break;140141case RC_STATE_R300_TEXSCALE_FACTOR:142tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture);143/* Add a small number to the texture size to work around rounding errors in hw. */144vec[0] = tex->b.width0 / (tex->tex.width0 + 0.001f);145vec[1] = tex->b.height0 / (tex->tex.height0 + 0.001f);146vec[2] = tex->b.depth0 / (tex->tex.depth0 + 0.001f);147vec[3] = 1;148break;149150case RC_STATE_R300_VIEWPORT_SCALE:151vec[0] = r300->viewport.scale[0];152vec[1] = r300->viewport.scale[1];153vec[2] = r300->viewport.scale[2];154vec[3] = 1;155break;156157case RC_STATE_R300_VIEWPORT_OFFSET:158vec[0] = r300->viewport.translate[0];159vec[1] = r300->viewport.translate[1];160vec[2] = r300->viewport.translate[2];161vec[3] = 1;162break;163164default:165fprintf(stderr, "r300: Implementation error: "166"Unknown RC_CONSTANT type %d\n", constant->u.State[0]);167vec[0] = 0;168vec[1] = 0;169vec[2] = 0;170vec[3] = 1;171}172}173174/* Convert a normal single-precision float into the 7.16 format175* used by the R300 fragment shader.176*/177uint32_t pack_float24(float f)178{179union {180float fl;181uint32_t u;182} u;183float mantissa;184int exponent;185uint32_t float24 = 0;186187if (f == 0.0)188return 0;189190u.fl = f;191192mantissa = frexpf(f, &exponent);193194/* Handle -ve */195if (mantissa < 0) {196float24 |= (1 << 23);197mantissa = mantissa * -1.0;198}199/* Handle exponent, bias of 63 */200exponent += 62;201float24 |= (exponent << 16);202/* Kill 7 LSB of mantissa */203float24 |= (u.u & 0x7FFFFF) >> 7;204205return float24;206}207208void r300_emit_fs(struct r300_context* r300, unsigned size, void *state)209{210struct r300_fragment_shader *fs = r300_fs(r300);211CS_LOCALS(r300);212213WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size);214}215216void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)217{218struct r300_fragment_shader *fs = r300_fs(r300);219struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;220unsigned count = fs->shader->externals_count;221unsigned i, j;222CS_LOCALS(r300);223224if (count == 0)225return;226227BEGIN_CS(size);228OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4);229if (buf->remap_table){230for (i = 0; i < count; i++) {231float *data = (float*)&buf->ptr[buf->remap_table[i]*4];232for (j = 0; j < 4; j++)233OUT_CS(pack_float24(data[j]));234}235} else {236for (i = 0; i < count; i++)237for (j = 0; j < 4; j++)238OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j]));239}240241END_CS;242}243244void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state)245{246struct r300_fragment_shader *fs = r300_fs(r300);247struct rc_constant_list *constants = &fs->shader->code.constants;248unsigned i;249unsigned count = fs->shader->rc_state_count;250unsigned first = fs->shader->externals_count;251unsigned end = constants->Count;252unsigned j;253CS_LOCALS(r300);254255if (count == 0)256return;257258BEGIN_CS(size);259for(i = first; i < end; ++i) {260if (constants->Constants[i].Type == RC_CONSTANT_STATE) {261float data[4];262263get_rc_constant_state(data, r300, &constants->Constants[i]);264265OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);266for (j = 0; j < 4; j++)267OUT_CS(pack_float24(data[j]));268}269}270END_CS;271}272273void r500_emit_fs(struct r300_context* r300, unsigned size, void *state)274{275struct r300_fragment_shader *fs = r300_fs(r300);276CS_LOCALS(r300);277278WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size);279}280281void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)282{283struct r300_fragment_shader *fs = r300_fs(r300);284struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;285unsigned count = fs->shader->externals_count;286CS_LOCALS(r300);287288if (count == 0)289return;290291BEGIN_CS(size);292OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);293OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4);294if (buf->remap_table){295for (unsigned i = 0; i < count; i++) {296uint32_t *data = &buf->ptr[buf->remap_table[i]*4];297OUT_CS_TABLE(data, 4);298}299} else {300OUT_CS_TABLE(buf->ptr, count * 4);301}302END_CS;303}304305void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state)306{307struct r300_fragment_shader *fs = r300_fs(r300);308struct rc_constant_list *constants = &fs->shader->code.constants;309unsigned i;310unsigned count = fs->shader->rc_state_count;311unsigned first = fs->shader->externals_count;312unsigned end = constants->Count;313CS_LOCALS(r300);314315if (count == 0)316return;317318BEGIN_CS(size);319for(i = first; i < end; ++i) {320if (constants->Constants[i].Type == RC_CONSTANT_STATE) {321float data[4];322323get_rc_constant_state(data, r300, &constants->Constants[i]);324325OUT_CS_REG(R500_GA_US_VECTOR_INDEX,326R500_GA_US_VECTOR_INDEX_TYPE_CONST |327(i & R500_GA_US_VECTOR_INDEX_MASK));328OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4);329OUT_CS_TABLE(data, 4);330}331}332END_CS;333}334335void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)336{337struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;338struct pipe_framebuffer_state* fb =339(struct pipe_framebuffer_state*)r300->fb_state.state;340uint32_t height = fb->height;341uint32_t width = fb->width;342CS_LOCALS(r300);343344if (r300->cbzb_clear) {345struct r300_surface *surf = r300_surface(fb->cbufs[0]);346347height = surf->cbzb_height;348width = surf->cbzb_width;349}350351DBG(r300, DBG_SCISSOR,352"r300: Scissor width: %i, height: %i, CBZB clear: %s\n",353width, height, r300->cbzb_clear ? "YES" : "NO");354355BEGIN_CS(size);356357/* Set up scissors.358* By writing to the SC registers, SC & US assert idle. */359OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);360if (r300->screen->caps.is_r500) {361OUT_CS(0);362OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |363((height - 1) << R300_SCISSORS_Y_SHIFT));364} else {365OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |366(1440 << R300_SCISSORS_Y_SHIFT));367OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |368((height + 1440-1) << R300_SCISSORS_Y_SHIFT));369}370371/* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */372OUT_CS_TABLE(gpuflush->cb_flush_clean, 6);373END_CS;374}375376void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state)377{378struct r300_aa_state *aa = (struct r300_aa_state*)state;379CS_LOCALS(r300);380381BEGIN_CS(size);382OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config);383384if (aa->dest) {385OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 3);386OUT_CS(aa->dest->offset);387OUT_CS(aa->dest->pitch & R300_RB3D_AARESOLVE_PITCH_MASK);388OUT_CS(R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |389R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE);390OUT_CS_RELOC(aa->dest);391} else {392OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0);393}394395END_CS;396}397398void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)399{400struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state;401struct r300_surface* surf;402unsigned i;403uint32_t rb3d_cctl = 0;404405CS_LOCALS(r300);406407BEGIN_CS(size);408409if (r300->screen->caps.is_r500) {410rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE;411}412/* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */413if (fb->nr_cbufs && r300->fb_multiwrite) {414rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs);415}416if (r300->cmask_in_use) {417rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE |418R300_RB3D_CCTL_CMASK_ENABLE;419}420421OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl);422423/* Set up colorbuffers. */424for (i = 0; i < fb->nr_cbufs; i++) {425surf = r300_surface(r300_get_nonnull_cb(fb, i));426427OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset);428OUT_CS_RELOC(surf);429430OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch);431OUT_CS_RELOC(surf);432433if (r300->cmask_in_use && i == 0) {434OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0);435OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask);436OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value);437if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) {438OUT_CS_REG_SEQ(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);439OUT_CS(r300->color_clear_value_ar);440OUT_CS(r300->color_clear_value_gb);441}442}443}444445/* Set up the ZB part of the CBZB clear. */446if (r300->cbzb_clear) {447surf = r300_surface(fb->cbufs[0]);448449OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);450451OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset);452OUT_CS_RELOC(surf);453454OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch);455OUT_CS_RELOC(surf);456457DBG(r300, DBG_CBZB,458"CBZB clearing cbuf %08x %08x\n", surf->cbzb_format,459surf->cbzb_pitch);460}461/* Set up a zbuffer. */462else if (fb->zsbuf) {463surf = r300_surface(fb->zsbuf);464465OUT_CS_REG(R300_ZB_FORMAT, surf->format);466467OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset);468OUT_CS_RELOC(surf);469470OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch);471OUT_CS_RELOC(surf);472473if (r300->hyperz_enabled) {474/* HiZ RAM. */475OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);476OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz);477/* Z Mask RAM. (compressed zbuffer) */478OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);479OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask);480}481}482483END_CS;484}485486void r300_emit_hyperz_state(struct r300_context *r300,487unsigned size, void *state)488{489struct r300_hyperz_state *z = state;490CS_LOCALS(r300);491492if (z->flush)493WRITE_CS_TABLE(&z->cb_flush_begin, size);494else495WRITE_CS_TABLE(&z->cb_begin, size - 2);496}497498void r300_emit_hyperz_end(struct r300_context *r300)499{500struct r300_hyperz_state z =501*(struct r300_hyperz_state*)r300->hyperz_state.state;502503z.flush = 1;504z.zb_bw_cntl = 0;505z.zb_depthclearvalue = 0;506z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;507z.gb_z_peq_config = 0;508509r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);510}511512#define R300_NIBBLES(x0, y0, x1, y1, x2, y2, d0y, d0x) \513(((x0) & 0xf) | (((y0) & 0xf) << 4) | \514(((x1) & 0xf) << 8) | (((y1) & 0xf) << 12) | \515(((x2) & 0xf) << 16) | (((y2) & 0xf) << 20) | \516(((d0y) & 0xf) << 24) | (((d0x) & 0xf) << 28))517518static unsigned r300_get_mspos(int index, unsigned *p)519{520unsigned reg, i, distx, disty, dist;521522if (index == 0) {523/* MSPOS0 contains positions for samples 0,1,2 as (X,Y) pairs of nibbles,524* followed by a (Y,X) pair containing the minimum distance from the pixel525* edge:526* X0, Y0, X1, Y1, X2, Y2, D0_Y, D0_X527*528* There is a quirk when setting D0_X. The value represents the distance529* from the left edge of the pixel quad to the first sample in subpixels.530* All values less than eight should use the actual value, but „7‟ should531* be used for the distance „8‟. The hardware will convert 7 into 8 internally.532*/533distx = 11;534for (i = 0; i < 12; i += 2) {535if (p[i] < distx)536distx = p[i];537}538539disty = 11;540for (i = 1; i < 12; i += 2) {541if (p[i] < disty)542disty = p[i];543}544545if (distx == 8)546distx = 7;547548reg = R300_NIBBLES(p[0], p[1], p[2], p[3], p[4], p[5], disty, distx);549} else {550/* MSPOS1 contains positions for samples 3,4,5 as (X,Y) pairs of nibbles,551* followed by the minimum distance from the pixel edge (not sure if X or Y):552* X3, Y3, X4, Y4, X5, Y5, D1553*/554dist = 11;555for (i = 0; i < 12; i++) {556if (p[i] < dist)557dist = p[i];558}559560reg = R300_NIBBLES(p[6], p[7], p[8], p[9], p[10], p[11], dist, 0);561}562return reg;563}564565void r300_emit_fb_state_pipelined(struct r300_context *r300,566unsigned size, void *state)567{568/* The sample coordinates are in the range [0,11], because569* GB_TILE_CONFIG.SUBPIXEL is set to the 1/12 subpixel precision.570*571* Some sample coordinates reach to neighboring pixels and should not be used.572* (e.g. Y=11)573*574* The unused samples must be set to the positions of other valid samples. */575static unsigned sample_locs_1x[12] = {5766,6, 6,6, 6,6, 6,6, 6,6, 6,6577};578static unsigned sample_locs_2x[12] = {5793,9, 9,3, 9,3, 9,3, 9,3, 9,3580};581static unsigned sample_locs_4x[12] = {5824,4, 8,8, 2,10, 10,2, 10,2, 10,2583};584static unsigned sample_locs_6x[12] = {5853,1, 7,3, 11,5, 1,7, 5,9, 9,10586};587588struct pipe_framebuffer_state* fb =589(struct pipe_framebuffer_state*)r300->fb_state.state;590unsigned i, num_cbufs = fb->nr_cbufs;591unsigned mspos0, mspos1;592CS_LOCALS(r300);593594/* If we use the multiwrite feature, the colorbuffers 2,3,4 must be595* marked as UNUSED in the US block. */596if (r300->fb_multiwrite) {597num_cbufs = MIN2(num_cbufs, 1);598}599600BEGIN_CS(size);601602/* Colorbuffer format in the US block.603* (must be written after unpipelined regs) */604OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);605for (i = 0; i < num_cbufs; i++) {606OUT_CS(r300_surface(r300_get_nonnull_cb(fb, i))->format);607}608for (; i < 1; i++) {609OUT_CS(R300_US_OUT_FMT_C4_8 |610R300_C0_SEL_B | R300_C1_SEL_G |611R300_C2_SEL_R | R300_C3_SEL_A);612}613for (; i < 4; i++) {614OUT_CS(R300_US_OUT_FMT_UNUSED);615}616617/* Set sample positions. It depends on the framebuffer sample count.618* These are pipelined regs and as such cannot be moved to the AA state.619*/620switch (r300->num_samples) {621default:622mspos0 = r300_get_mspos(0, sample_locs_1x);623mspos1 = r300_get_mspos(1, sample_locs_1x);624break;625case 2:626mspos0 = r300_get_mspos(0, sample_locs_2x);627mspos1 = r300_get_mspos(1, sample_locs_2x);628break;629case 4:630mspos0 = r300_get_mspos(0, sample_locs_4x);631mspos1 = r300_get_mspos(1, sample_locs_4x);632break;633case 6:634mspos0 = r300_get_mspos(0, sample_locs_6x);635mspos1 = r300_get_mspos(1, sample_locs_6x);636break;637}638639OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);640OUT_CS(mspos0);641OUT_CS(mspos1);642END_CS;643}644645void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state)646{647struct r300_query *query = r300->query_current;648CS_LOCALS(r300);649650if (!query)651return;652653BEGIN_CS(size);654if (r300->screen->caps.family == CHIP_RV530) {655OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);656} else {657OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);658}659OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);660END_CS;661query->begin_emitted = TRUE;662}663664static void r300_emit_query_end_frag_pipes(struct r300_context *r300,665struct r300_query *query)666{667struct r300_capabilities* caps = &r300->screen->caps;668uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes;669CS_LOCALS(r300);670671assert(gb_pipes);672673BEGIN_CS(6 * gb_pipes + 2);674/* I'm not so sure I like this switch, but it's hard to be elegant675* when there's so many special cases...676*677* So here's the basic idea. For each pipe, enable writes to it only,678* then put out the relocation for ZPASS_ADDR, taking into account a679* 4-byte offset for each pipe. RV380 and older are special; they have680* only two pipes, and the second pipe's enable is on bit 3, not bit 1,681* so there's a chipset cap for that. */682switch (gb_pipes) {683case 4:684/* pipe 3 only */685OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);686OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4);687OUT_CS_RELOC(r300->query_current);688FALLTHROUGH;689case 3:690/* pipe 2 only */691OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);692OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4);693OUT_CS_RELOC(r300->query_current);694FALLTHROUGH;695case 2:696/* pipe 1 only */697/* As mentioned above, accommodate RV380 and older. */698OUT_CS_REG(R300_SU_REG_DEST,6991 << (caps->high_second_pipe ? 3 : 1));700OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4);701OUT_CS_RELOC(r300->query_current);702FALLTHROUGH;703case 1:704/* pipe 0 only */705OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);706OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4);707OUT_CS_RELOC(r300->query_current);708break;709default:710fprintf(stderr, "r300: Implementation error: Chipset reports %d"711" pixel pipes!\n", gb_pipes);712abort();713}714715/* And, finally, reset it to normal... */716OUT_CS_REG(R300_SU_REG_DEST, 0xF);717END_CS;718}719720static void rv530_emit_query_end_single_z(struct r300_context *r300,721struct r300_query *query)722{723CS_LOCALS(r300);724725BEGIN_CS(8);726OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);727OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4);728OUT_CS_RELOC(r300->query_current);729OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);730END_CS;731}732733static void rv530_emit_query_end_double_z(struct r300_context *r300,734struct r300_query *query)735{736CS_LOCALS(r300);737738BEGIN_CS(14);739OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);740OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4);741OUT_CS_RELOC(r300->query_current);742OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);743OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4);744OUT_CS_RELOC(r300->query_current);745OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);746END_CS;747}748749void r300_emit_query_end(struct r300_context* r300)750{751struct r300_capabilities *caps = &r300->screen->caps;752struct r300_query *query = r300->query_current;753754if (!query)755return;756757if (query->begin_emitted == FALSE)758return;759760if (caps->family == CHIP_RV530) {761if (r300->screen->info.r300_num_z_pipes == 2)762rv530_emit_query_end_double_z(r300, query);763else764rv530_emit_query_end_single_z(r300, query);765} else766r300_emit_query_end_frag_pipes(r300, query);767768query->begin_emitted = FALSE;769query->num_results += query->num_pipes;770771/* XXX grab all the results and reset the counter. */772if (query->num_results >= query->buf->size / 4 - 4) {773query->num_results = (query->buf->size / 4) / 2;774fprintf(stderr, "r300: Rewinding OQBO...\n");775}776}777778void r300_emit_invariant_state(struct r300_context *r300,779unsigned size, void *state)780{781CS_LOCALS(r300);782WRITE_CS_TABLE(state, size);783}784785void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state)786{787struct r300_rs_state* rs = state;788CS_LOCALS(r300);789790BEGIN_CS(size);791OUT_CS_TABLE(rs->cb_main, RS_STATE_MAIN_SIZE);792if (rs->polygon_offset_enable) {793if (r300->zbuffer_bpp == 16) {794OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5);795} else {796OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5);797}798}799END_CS;800}801802void r300_emit_rs_block_state(struct r300_context* r300,803unsigned size, void* state)804{805struct r300_rs_block* rs = (struct r300_rs_block*)state;806unsigned i;807/* It's the same for both INST and IP tables */808unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1;809CS_LOCALS(r300);810811if (DBG_ON(r300, DBG_RS_BLOCK)) {812r500_dump_rs_block(rs);813814fprintf(stderr, "r300: RS emit:\n");815816for (i = 0; i < count; i++)817fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]);818819for (i = 0; i < count; i++)820fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]);821822fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n",823rs->count, rs->inst_count);824}825826BEGIN_CS(size);827OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);828OUT_CS(rs->vap_vtx_state_cntl);829OUT_CS(rs->vap_vsm_vtx_assm);830OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);831OUT_CS(rs->vap_out_vtx_fmt[0]);832OUT_CS(rs->vap_out_vtx_fmt[1]);833OUT_CS_REG_SEQ(R300_GB_ENABLE, 1);834OUT_CS(rs->gb_enable);835836if (r300->screen->caps.is_r500) {837OUT_CS_REG_SEQ(R500_RS_IP_0, count);838} else {839OUT_CS_REG_SEQ(R300_RS_IP_0, count);840}841OUT_CS_TABLE(rs->ip, count);842843OUT_CS_REG_SEQ(R300_RS_COUNT, 2);844OUT_CS(rs->count);845OUT_CS(rs->inst_count);846847if (r300->screen->caps.is_r500) {848OUT_CS_REG_SEQ(R500_RS_INST_0, count);849} else {850OUT_CS_REG_SEQ(R300_RS_INST_0, count);851}852OUT_CS_TABLE(rs->inst, count);853END_CS;854}855856void r300_emit_sample_mask(struct r300_context *r300,857unsigned size, void *state)858{859unsigned mask = (*(unsigned*)state) & ((1 << 6)-1);860CS_LOCALS(r300);861862BEGIN_CS(size);863OUT_CS_REG(R300_SC_SCREENDOOR,864mask | (mask << 6) | (mask << 12) | (mask << 18));865END_CS;866}867868void r300_emit_scissor_state(struct r300_context* r300,869unsigned size, void* state)870{871struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state;872CS_LOCALS(r300);873874BEGIN_CS(size);875OUT_CS_REG_SEQ(R300_SC_CLIPRECT_TL_0, 2);876if (r300->screen->caps.is_r500) {877OUT_CS((scissor->minx << R300_CLIPRECT_X_SHIFT) |878(scissor->miny << R300_CLIPRECT_Y_SHIFT));879OUT_CS(((scissor->maxx - 1) << R300_CLIPRECT_X_SHIFT) |880((scissor->maxy - 1) << R300_CLIPRECT_Y_SHIFT));881} else {882OUT_CS(((scissor->minx + 1440) << R300_CLIPRECT_X_SHIFT) |883((scissor->miny + 1440) << R300_CLIPRECT_Y_SHIFT));884OUT_CS(((scissor->maxx + 1440-1) << R300_CLIPRECT_X_SHIFT) |885((scissor->maxy + 1440-1) << R300_CLIPRECT_Y_SHIFT));886}887END_CS;888}889890void r300_emit_textures_state(struct r300_context *r300,891unsigned size, void *state)892{893struct r300_textures_state *allstate = (struct r300_textures_state*)state;894struct r300_texture_sampler_state *texstate;895struct r300_resource *tex;896unsigned i;897boolean has_us_format = r300->screen->caps.has_us_format;898CS_LOCALS(r300);899900BEGIN_CS(size);901OUT_CS_REG(R300_TX_ENABLE, allstate->tx_enable);902903for (i = 0; i < allstate->count; i++) {904if ((1 << i) & allstate->tx_enable) {905texstate = &allstate->regs[i];906tex = r300_resource(allstate->sampler_views[i]->base.texture);907908OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0);909OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1);910OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (i * 4),911texstate->border_color);912913OUT_CS_REG(R300_TX_FORMAT0_0 + (i * 4), texstate->format.format0);914OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1);915OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2);916917OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config);918OUT_CS_RELOC(tex);919920if (has_us_format) {921OUT_CS_REG(R500_US_FORMAT0_0 + (i * 4),922texstate->format.us_format0);923}924}925}926END_CS;927}928929void r300_emit_vertex_arrays(struct r300_context* r300, int offset,930boolean indexed, int instance_id)931{932struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;933struct pipe_vertex_element *velem = r300->velems->velem;934struct r300_resource *buf;935int i;936unsigned vertex_array_count = r300->velems->count;937unsigned packet_size = (vertex_array_count * 3 + 1) / 2;938struct pipe_vertex_buffer *vb1, *vb2;939unsigned *hw_format_size = r300->velems->format_size;940unsigned size1, size2, offset1, offset2, stride1, stride2;941CS_LOCALS(r300);942943BEGIN_CS(2 + packet_size + vertex_array_count * 2);944OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);945OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));946947if (instance_id == -1) {948/* Non-instanced arrays. This ignores instance_divisor and instance_id. */949for (i = 0; i < vertex_array_count - 1; i += 2) {950vb1 = &vbuf[velem[i].vertex_buffer_index];951vb2 = &vbuf[velem[i+1].vertex_buffer_index];952size1 = hw_format_size[i];953size2 = hw_format_size[i+1];954955OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |956R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));957OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);958OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);959}960961if (vertex_array_count & 1) {962vb1 = &vbuf[velem[i].vertex_buffer_index];963size1 = hw_format_size[i];964965OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));966OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);967}968969for (i = 0; i < vertex_array_count; i++) {970buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer.resource);971OUT_CS_RELOC(buf);972}973} else {974/* Instanced arrays. */975for (i = 0; i < vertex_array_count - 1; i += 2) {976vb1 = &vbuf[velem[i].vertex_buffer_index];977vb2 = &vbuf[velem[i+1].vertex_buffer_index];978size1 = hw_format_size[i];979size2 = hw_format_size[i+1];980981if (velem[i].instance_divisor) {982stride1 = 0;983offset1 = vb1->buffer_offset + velem[i].src_offset +984(instance_id / velem[i].instance_divisor) * vb1->stride;985} else {986stride1 = vb1->stride;987offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;988}989if (velem[i+1].instance_divisor) {990stride2 = 0;991offset2 = vb2->buffer_offset + velem[i+1].src_offset +992(instance_id / velem[i+1].instance_divisor) * vb2->stride;993} else {994stride2 = vb2->stride;995offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride;996}997998OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) |999R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2));1000OUT_CS(offset1);1001OUT_CS(offset2);1002}10031004if (vertex_array_count & 1) {1005vb1 = &vbuf[velem[i].vertex_buffer_index];1006size1 = hw_format_size[i];10071008if (velem[i].instance_divisor) {1009stride1 = 0;1010offset1 = vb1->buffer_offset + velem[i].src_offset +1011(instance_id / velem[i].instance_divisor) * vb1->stride;1012} else {1013stride1 = vb1->stride;1014offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;1015}10161017OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));1018OUT_CS(offset1);1019}10201021for (i = 0; i < vertex_array_count; i++) {1022buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer.resource);1023OUT_CS_RELOC(buf);1024}1025}1026END_CS;1027}10281029void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)1030{1031CS_LOCALS(r300);10321033DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, "1034"vertex size %d\n", r300->vbo,1035r300->vertex_info.size);1036/* Set the pointer to our vertex buffer. The emitted values are this:1037* PACKET3 [3D_LOAD_VBPNTR]1038* COUNT [1]1039* FORMAT [size | stride << 8]1040* OFFSET [offset into BO]1041* VBPNTR [relocated BO]1042*/1043BEGIN_CS(7);1044OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);1045OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0));1046OUT_CS(r300->vertex_info.size |1047(r300->vertex_info.size << 8));1048OUT_CS(r300->draw_vbo_offset);1049OUT_CS(0);10501051assert(r300->vbo);1052OUT_CS(0xc0001000); /* PKT3_NOP */1053OUT_CS(r300->rws->cs_lookup_buffer(&r300->cs, r300->vbo) * 4);1054END_CS;1055}10561057void r300_emit_vertex_stream_state(struct r300_context* r300,1058unsigned size, void* state)1059{1060struct r300_vertex_stream_state *streams =1061(struct r300_vertex_stream_state*)state;1062unsigned i;1063CS_LOCALS(r300);10641065if (DBG_ON(r300, DBG_PSC)) {1066fprintf(stderr, "r300: PSC emit:\n");10671068for (i = 0; i < streams->count; i++) {1069fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i,1070streams->vap_prog_stream_cntl[i]);1071}10721073for (i = 0; i < streams->count; i++) {1074fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i,1075streams->vap_prog_stream_cntl_ext[i]);1076}1077}10781079BEGIN_CS(size);1080OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);1081OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);1082OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);1083OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count);1084END_CS;1085}10861087void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state)1088{1089CS_LOCALS(r300);10901091BEGIN_CS(size);1092OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);1093END_CS;1094}10951096void r300_emit_vap_invariant_state(struct r300_context *r300,1097unsigned size, void *state)1098{1099CS_LOCALS(r300);1100WRITE_CS_TABLE(state, size);1101}11021103void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)1104{1105struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state;1106struct r300_vertex_program_code* code = &vs->code;1107struct r300_screen* r300screen = r300->screen;1108unsigned instruction_count = code->length / 4;11091110unsigned vtx_mem_size = r300screen->caps.is_r500 ? 128 : 72;1111unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1);1112unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1);1113unsigned temp_count = MAX2(code->num_temporaries, 1);11141115unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count,1116vtx_mem_size / output_count, 10);1117unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5);11181119CS_LOCALS(r300);11201121BEGIN_CS(size);11221123/* R300_VAP_PVS_CODE_CNTL_01124* R300_VAP_PVS_CONST_CNTL1125* R300_VAP_PVS_CODE_CNTL_11126* See the r5xx docs for instructions on how to use these. */1127OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) |1128R300_PVS_XYZW_VALID_INST(instruction_count - 1) |1129R300_PVS_LAST_INST(instruction_count - 1));1130OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, instruction_count - 1);11311132OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);1133OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length);1134OUT_CS_TABLE(code->body.d, code->length);11351136OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) |1137R300_PVS_NUM_CNTLRS(pvs_num_controllers) |1138R300_PVS_NUM_FPUS(r300screen->caps.num_vert_fpus) |1139R300_PVS_VF_MAX_VTX_NUM(12) |1140(r300->clip_halfz ? R300_DX_CLIP_SPACE_DEF : 0) |1141(r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));11421143/* Emit flow control instructions. Even if there are no fc instructions,1144* we still need to write the registers to make sure they are cleared. */1145OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);1146if (r300screen->caps.is_r500) {1147OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2);1148OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2);1149} else {1150OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS);1151OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS);1152}1153OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS);1154OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS);11551156END_CS;1157}11581159void r300_emit_vs_constants(struct r300_context* r300,1160unsigned size, void *state)1161{1162unsigned count =1163((struct r300_vertex_shader*)r300->vs_state.state)->externals_count;1164struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;1165struct r300_vertex_shader *vs = (struct r300_vertex_shader*)r300->vs_state.state;1166unsigned i;1167int imm_first = vs->externals_count;1168int imm_end = vs->code.constants.Count;1169int imm_count = vs->immediates_count;1170CS_LOCALS(r300);11711172BEGIN_CS(size);1173OUT_CS_REG(R300_VAP_PVS_CONST_CNTL,1174R300_PVS_CONST_BASE_OFFSET(buf->buffer_base) |1175R300_PVS_MAX_CONST_ADDR(MAX2(imm_end - 1, 0)));1176if (vs->externals_count) {1177OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,1178(r300->screen->caps.is_r500 ?1179R500_PVS_CONST_START : R300_PVS_CONST_START) + buf->buffer_base);1180OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);1181if (buf->remap_table){1182for (i = 0; i < count; i++) {1183uint32_t *data = &buf->ptr[buf->remap_table[i]*4];1184OUT_CS_TABLE(data, 4);1185}1186} else {1187OUT_CS_TABLE(buf->ptr, count * 4);1188}1189}11901191/* Emit immediates. */1192if (imm_count) {1193OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,1194(r300->screen->caps.is_r500 ?1195R500_PVS_CONST_START : R300_PVS_CONST_START) +1196buf->buffer_base + imm_first);1197OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4);1198for (i = imm_first; i < imm_end; i++) {1199const float *data = vs->code.constants.Constants[i].u.Immediate;1200OUT_CS_TABLE(data, 4);1201}1202}1203END_CS;1204}12051206void r300_emit_viewport_state(struct r300_context* r300,1207unsigned size, void* state)1208{1209struct r300_viewport_state* viewport = (struct r300_viewport_state*)state;1210CS_LOCALS(r300);12111212BEGIN_CS(size);1213OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);1214OUT_CS_TABLE(&viewport->xscale, 6);1215OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);1216END_CS;1217}12181219void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)1220{1221struct pipe_framebuffer_state *fb =1222(struct pipe_framebuffer_state*)r300->fb_state.state;1223struct r300_resource* tex;1224CS_LOCALS(r300);12251226tex = r300_resource(fb->zsbuf->texture);12271228BEGIN_CS(size);1229OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);1230OUT_CS(0);1231OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]);1232OUT_CS(r300->hiz_clear_value);1233END_CS;12341235/* Mark the current zbuffer's hiz ram as in use. */1236r300->hiz_in_use = TRUE;1237r300->hiz_func = HIZ_FUNC_NONE;1238r300_mark_atom_dirty(r300, &r300->hyperz_state);1239}12401241void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)1242{1243struct pipe_framebuffer_state *fb =1244(struct pipe_framebuffer_state*)r300->fb_state.state;1245struct r300_resource *tex;1246CS_LOCALS(r300);12471248tex = r300_resource(fb->zsbuf->texture);12491250BEGIN_CS(size);1251OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);1252OUT_CS(0);1253OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]);1254OUT_CS(0);1255END_CS;12561257/* Mark the current zbuffer's zmask as in use. */1258r300->zmask_in_use = TRUE;1259r300_mark_atom_dirty(r300, &r300->hyperz_state);1260}12611262void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state)1263{1264struct pipe_framebuffer_state *fb =1265(struct pipe_framebuffer_state*)r300->fb_state.state;1266struct r300_resource *tex;1267CS_LOCALS(r300);12681269tex = r300_resource(fb->cbufs[0]->texture);12701271BEGIN_CS(size);1272OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_CMASK, 2);1273OUT_CS(0);1274OUT_CS(tex->tex.cmask_dwords);1275OUT_CS(0);1276END_CS;12771278/* Mark the current zbuffer's zmask as in use. */1279r300->cmask_in_use = TRUE;1280r300_mark_fb_state_dirty(r300, R300_CHANGED_CMASK_ENABLE);1281}12821283void r300_emit_ztop_state(struct r300_context* r300,1284unsigned size, void* state)1285{1286struct r300_ztop_state* ztop = (struct r300_ztop_state*)state;1287CS_LOCALS(r300);12881289BEGIN_CS(size);1290OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top);1291END_CS;1292}12931294void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state)1295{1296CS_LOCALS(r300);12971298BEGIN_CS(size);1299OUT_CS_REG(R300_TX_INVALTAGS, 0);1300END_CS;1301}13021303boolean r300_emit_buffer_validate(struct r300_context *r300,1304boolean do_validate_vertex_buffers,1305struct pipe_resource *index_buffer)1306{1307struct pipe_framebuffer_state *fb =1308(struct pipe_framebuffer_state*)r300->fb_state.state;1309struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;1310struct r300_textures_state *texstate =1311(struct r300_textures_state*)r300->textures_state.state;1312struct r300_resource *tex;1313unsigned i;1314boolean flushed = FALSE;13151316validate:1317if (r300->fb_state.dirty) {1318/* Color buffers... */1319for (i = 0; i < fb->nr_cbufs; i++) {1320if (!fb->cbufs[i])1321continue;1322tex = r300_resource(fb->cbufs[i]->texture);1323assert(tex && tex->buf && "cbuf is marked, but NULL!");1324r300->rws->cs_add_buffer(&r300->cs, tex->buf,1325RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED,1326r300_surface(fb->cbufs[i])->domain,1327tex->b.nr_samples > 1 ?1328RADEON_PRIO_COLOR_BUFFER_MSAA :1329RADEON_PRIO_COLOR_BUFFER);1330}1331/* ...depth buffer... */1332if (fb->zsbuf) {1333tex = r300_resource(fb->zsbuf->texture);1334assert(tex && tex->buf && "zsbuf is marked, but NULL!");1335r300->rws->cs_add_buffer(&r300->cs, tex->buf,1336RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED,1337r300_surface(fb->zsbuf)->domain,1338tex->b.nr_samples > 1 ?1339RADEON_PRIO_DEPTH_BUFFER_MSAA :1340RADEON_PRIO_DEPTH_BUFFER);1341}1342}1343/* The AA resolve buffer. */1344if (r300->aa_state.dirty) {1345if (aa->dest) {1346r300->rws->cs_add_buffer(&r300->cs, aa->dest->buf,1347RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED,1348aa->dest->domain,1349RADEON_PRIO_COLOR_BUFFER);1350}1351}1352if (r300->textures_state.dirty) {1353/* ...textures... */1354for (i = 0; i < texstate->count; i++) {1355if (!(texstate->tx_enable & (1 << i))) {1356continue;1357}13581359tex = r300_resource(texstate->sampler_views[i]->base.texture);1360r300->rws->cs_add_buffer(&r300->cs, tex->buf,1361RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,1362tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);1363}1364}1365/* ...occlusion query buffer... */1366if (r300->query_current)1367r300->rws->cs_add_buffer(&r300->cs, r300->query_current->buf,1368RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED,1369RADEON_DOMAIN_GTT,1370RADEON_PRIO_QUERY);1371/* ...vertex buffer for SWTCL path... */1372if (r300->vbo)1373r300->rws->cs_add_buffer(&r300->cs, r300->vbo,1374RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,1375RADEON_DOMAIN_GTT,1376RADEON_PRIO_VERTEX_BUFFER);1377/* ...vertex buffers for HWTCL path... */1378if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {1379struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;1380struct pipe_vertex_buffer *last = r300->vertex_buffer +1381r300->nr_vertex_buffers;1382struct pipe_resource *buf;13831384for (; vbuf != last; vbuf++) {1385buf = vbuf->buffer.resource;1386if (!buf)1387continue;13881389r300->rws->cs_add_buffer(&r300->cs, r300_resource(buf)->buf,1390RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,1391r300_resource(buf)->domain,1392RADEON_PRIO_SAMPLER_BUFFER);1393}1394}1395/* ...and index buffer for HWTCL path. */1396if (index_buffer)1397r300->rws->cs_add_buffer(&r300->cs, r300_resource(index_buffer)->buf,1398RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,1399r300_resource(index_buffer)->domain,1400RADEON_PRIO_INDEX_BUFFER);14011402/* Now do the validation (flush is called inside cs_validate on failure). */1403if (!r300->rws->cs_validate(&r300->cs)) {1404/* Ooops, an infinite loop, give up. */1405if (flushed)1406return FALSE;14071408flushed = TRUE;1409goto validate;1410}14111412return TRUE;1413}14141415unsigned r300_get_num_dirty_dwords(struct r300_context *r300)1416{1417struct r300_atom* atom;1418unsigned dwords = 0;14191420foreach_dirty_atom(r300, atom) {1421if (atom->dirty) {1422dwords += atom->size;1423}1424}14251426/* let's reserve some more, just in case */1427dwords += 32;14281429return dwords;1430}14311432unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)1433{1434unsigned dwords = 0;14351436/* Emitted in flush. */1437dwords += 26; /* emit_query_end */1438dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */1439if (r300->screen->caps.is_r500)1440dwords += 2; /* emit_index_bias */1441dwords += 3; /* MSPOS */14421443return dwords;1444}14451446/* Emit all dirty state. */1447void r300_emit_dirty_state(struct r300_context* r300)1448{1449struct r300_atom *atom;14501451foreach_dirty_atom(r300, atom) {1452if (atom->dirty) {1453atom->emit(r300, atom->size, atom->state);1454atom->dirty = FALSE;1455}1456}14571458r300->first_dirty = NULL;1459r300->last_dirty = NULL;1460r300->dirty_hw++;1461}146214631464