Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
4574 views
/*1* Copyright 2010 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "pipe/p_context.h"23#include "pipe/p_state.h"24#include "util/u_draw.h"25#include "util/u_inlines.h"26#include "util/u_prim.h"27#include "util/format/u_format.h"28#include "translate/translate.h"2930#include "nv50/nv50_context.h"31#include "nv50/nv50_query_hw.h"32#include "nv50/nv50_resource.h"3334#include "nv50/nv50_3d.xml.h"3536void37nv50_vertex_state_delete(struct pipe_context *pipe,38void *hwcso)39{40struct nv50_vertex_stateobj *so = hwcso;4142if (so->translate)43so->translate->release(so->translate);44FREE(hwcso);45}4647void *48nv50_vertex_state_create(struct pipe_context *pipe,49unsigned num_elements,50const struct pipe_vertex_element *elements)51{52struct nv50_vertex_stateobj *so;53struct translate_key transkey;54unsigned i;5556so = MALLOC(sizeof(*so) +57num_elements * sizeof(struct nv50_vertex_element));58if (!so)59return NULL;60so->num_elements = num_elements;61so->instance_elts = 0;62so->instance_bufs = 0;63so->need_conversion = false;6465memset(so->vb_access_size, 0, sizeof(so->vb_access_size));6667for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)68so->min_instance_div[i] = 0xffffffff;6970transkey.nr_elements = 0;71transkey.output_stride = 0;7273for (i = 0; i < num_elements; ++i) {74const struct pipe_vertex_element *ve = &elements[i];75const unsigned vbi = ve->vertex_buffer_index;76unsigned size;77enum pipe_format fmt = ve->src_format;7879so->element[i].pipe = elements[i];80so->element[i].state = nv50_vertex_format[fmt].vtx;8182if (!so->element[i].state) {83switch (util_format_get_nr_components(fmt)) {84case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;85case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;86case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;87case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;88default:89assert(0);90FREE(so);91return NULL;92}93so->element[i].state = nv50_vertex_format[fmt].vtx;94so->need_conversion = true;95pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,96"Converting vertex element %d, no hw format %s",97i, util_format_name(ve->src_format));98}99so->element[i].state |= i;100101size = util_format_get_blocksize(fmt);102if (so->vb_access_size[vbi] < (ve->src_offset + size))103so->vb_access_size[vbi] = ve->src_offset + size;104105if (1) {106unsigned j = transkey.nr_elements++;107108transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;109transkey.element[j].input_format = ve->src_format;110transkey.element[j].input_buffer = vbi;111transkey.element[j].input_offset = ve->src_offset;112transkey.element[j].instance_divisor = ve->instance_divisor;113114transkey.element[j].output_format = fmt;115transkey.element[j].output_offset = transkey.output_stride;116transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;117118if (unlikely(ve->instance_divisor)) {119so->instance_elts |= 1 << i;120so->instance_bufs |= 1 << vbi;121if (ve->instance_divisor < so->min_instance_div[vbi])122so->min_instance_div[vbi] = ve->instance_divisor;123}124}125}126127so->translate = translate_create(&transkey);128so->vertex_size = transkey.output_stride / 4;129so->packet_vertex_limit = NV04_PFIFO_MAX_PACKET_LEN /130MAX2(so->vertex_size, 1);131132return so;133}134135#define NV50_3D_VERTEX_ATTRIB_INACTIVE \136NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT | \137NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 | \138NV50_3D_VERTEX_ARRAY_ATTRIB_CONST139140static void141nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb,142struct pipe_vertex_element *ve, unsigned attr)143{144struct nouveau_pushbuf *push = nv50->base.pushbuf;145const void *data = (const uint8_t *)vb->buffer.user + ve->src_offset;146float v[4];147const unsigned nc = util_format_get_nr_components(ve->src_format);148149assert(vb->is_user_buffer);150151util_format_unpack_rgba(ve->src_format, v, data, 1);152153switch (nc) {154case 4:155BEGIN_NV04(push, NV50_3D(VTX_ATTR_4F_X(attr)), 4);156PUSH_DATAf(push, v[0]);157PUSH_DATAf(push, v[1]);158PUSH_DATAf(push, v[2]);159PUSH_DATAf(push, v[3]);160break;161case 3:162BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(attr)), 3);163PUSH_DATAf(push, v[0]);164PUSH_DATAf(push, v[1]);165PUSH_DATAf(push, v[2]);166break;167case 2:168BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(attr)), 2);169PUSH_DATAf(push, v[0]);170PUSH_DATAf(push, v[1]);171break;172case 1:173if (attr == nv50->vertprog->vp.edgeflag) {174BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);175PUSH_DATA (push, v[0] ? 1 : 0);176}177BEGIN_NV04(push, NV50_3D(VTX_ATTR_1F(attr)), 1);178PUSH_DATAf(push, v[0]);179break;180default:181assert(0);182break;183}184}185186static inline void187nv50_user_vbuf_range(struct nv50_context *nv50, unsigned vbi,188uint32_t *base, uint32_t *size)189{190assert(vbi < PIPE_MAX_ATTRIBS);191if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) {192const uint32_t div = nv50->vertex->min_instance_div[vbi];193*base = nv50->instance_off * nv50->vtxbuf[vbi].stride;194*size = (nv50->instance_max / div) * nv50->vtxbuf[vbi].stride +195nv50->vertex->vb_access_size[vbi];196} else {197/* NOTE: if there are user buffers, we *must* have index bounds */198assert(nv50->vb_elt_limit != ~0);199*base = nv50->vb_elt_first * nv50->vtxbuf[vbi].stride;200*size = nv50->vb_elt_limit * nv50->vtxbuf[vbi].stride +201nv50->vertex->vb_access_size[vbi];202}203}204205static void206nv50_upload_user_buffers(struct nv50_context *nv50,207uint64_t addrs[], uint32_t limits[])208{209unsigned b;210211assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);212for (b = 0; b < nv50->num_vtxbufs; ++b) {213struct nouveau_bo *bo;214const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];215uint32_t base, size;216217if (!(nv50->vbo_user & (1 << b)) || !vb->stride)218continue;219nv50_user_vbuf_range(nv50, b, &base, &size);220221limits[b] = base + size - 1;222addrs[b] = nouveau_scratch_data(&nv50->base, vb->buffer.user, base, size,223&bo);224if (addrs[b])225BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, NOUVEAU_BO_GART |226NOUVEAU_BO_RD, bo);227}228nv50->base.vbo_dirty = true;229}230231static void232nv50_update_user_vbufs(struct nv50_context *nv50)233{234uint64_t address[PIPE_MAX_ATTRIBS];235struct nouveau_pushbuf *push = nv50->base.pushbuf;236unsigned i;237uint32_t written = 0;238239for (i = 0; i < nv50->vertex->num_elements; ++i) {240struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe;241const unsigned b = ve->vertex_buffer_index;242struct pipe_vertex_buffer *vb;243uint32_t base, size;244245assert(b < PIPE_MAX_ATTRIBS);246vb = &nv50->vtxbuf[b];247248if (!(nv50->vbo_user & (1 << b)))249continue;250251if (!vb->stride) {252nv50_emit_vtxattr(nv50, vb, ve, i);253continue;254}255nv50_user_vbuf_range(nv50, b, &base, &size);256257if (!(written & (1 << b))) {258struct nouveau_bo *bo;259const uint32_t bo_flags = NOUVEAU_BO_GART | NOUVEAU_BO_RD;260written |= 1 << b;261address[b] = nouveau_scratch_data(&nv50->base, vb->buffer.user,262base, size, &bo);263if (address[b])264BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, bo_flags, bo);265}266267BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);268PUSH_DATAh(push, address[b] + base + size - 1);269PUSH_DATA (push, address[b] + base + size - 1);270BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2);271PUSH_DATAh(push, address[b] + ve->src_offset);272PUSH_DATA (push, address[b] + ve->src_offset);273}274nv50->base.vbo_dirty = true;275}276277static inline void278nv50_release_user_vbufs(struct nv50_context *nv50)279{280if (nv50->vbo_user) {281nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX_TMP);282nouveau_scratch_done(&nv50->base);283}284}285286void287nv50_vertex_arrays_validate(struct nv50_context *nv50)288{289uint64_t addrs[PIPE_MAX_ATTRIBS];290uint32_t limits[PIPE_MAX_ATTRIBS];291struct nouveau_pushbuf *push = nv50->base.pushbuf;292struct nv50_vertex_stateobj *vertex = nv50->vertex;293struct pipe_vertex_buffer *vb;294struct nv50_vertex_element *ve;295uint32_t mask;296uint32_t refd = 0;297unsigned i;298const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);299300if (unlikely(vertex->need_conversion))301nv50->vbo_fifo = ~0;302else303if (nv50->vbo_user & ~nv50->vbo_constant)304nv50->vbo_fifo = nv50->vbo_push_hint ? ~0 : 0;305else306nv50->vbo_fifo = 0;307308if (!nv50->vbo_fifo) {309/* if vertex buffer was written by GPU - flush VBO cache */310assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);311for (i = 0; i < nv50->num_vtxbufs; ++i) {312struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer.resource);313if (!nv50->vtxbuf[i].is_user_buffer &&314buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {315buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;316nv50->base.vbo_dirty = true;317}318}319}320321/* update vertex format state */322BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), n);323if (nv50->vbo_fifo) {324nv50->state.num_vtxelts = vertex->num_elements;325for (i = 0; i < vertex->num_elements; ++i)326PUSH_DATA (push, vertex->element[i].state);327for (; i < n; ++i)328PUSH_DATA (push, NV50_3D_VERTEX_ATTRIB_INACTIVE);329for (i = 0; i < n; ++i) {330BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);331PUSH_DATA (push, 0);332}333return;334}335for (i = 0; i < vertex->num_elements; ++i) {336const unsigned b = vertex->element[i].pipe.vertex_buffer_index;337338assert(b < PIPE_MAX_ATTRIBS);339ve = &vertex->element[i];340vb = &nv50->vtxbuf[b];341342if (likely(vb->stride) || !(nv50->vbo_user & (1 << b)))343PUSH_DATA(push, ve->state);344else345PUSH_DATA(push, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST);346}347for (; i < n; ++i)348PUSH_DATA(push, NV50_3D_VERTEX_ATTRIB_INACTIVE);349350/* update per-instance enables */351mask = vertex->instance_elts ^ nv50->state.instance_elts;352while (mask) {353const int i = ffs(mask) - 1;354mask &= ~(1 << i);355BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);356PUSH_DATA (push, (vertex->instance_elts >> i) & 1);357}358nv50->state.instance_elts = vertex->instance_elts;359360if (nv50->vbo_user & ~nv50->vbo_constant)361nv50_upload_user_buffers(nv50, addrs, limits);362363/* update buffers and set constant attributes */364for (i = 0; i < vertex->num_elements; ++i) {365uint64_t address, limit;366const unsigned b = vertex->element[i].pipe.vertex_buffer_index;367368assert(b < PIPE_MAX_ATTRIBS);369ve = &vertex->element[i];370vb = &nv50->vtxbuf[b];371372if (unlikely(nv50->vbo_constant & (1 << b))) {373BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);374PUSH_DATA (push, 0);375nv50_emit_vtxattr(nv50, vb, &ve->pipe, i);376continue;377} else378if (nv50->vbo_user & (1 << b)) {379address = addrs[b] + ve->pipe.src_offset;380limit = addrs[b] + limits[b];381} else382if (!vb->buffer.resource) {383BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);384PUSH_DATA (push, 0);385continue;386} else {387struct nv04_resource *buf = nv04_resource(vb->buffer.resource);388if (!(refd & (1 << b))) {389refd |= 1 << b;390BCTX_REFN(nv50->bufctx_3d, 3D_VERTEX, buf, RD);391}392address = buf->address + vb->buffer_offset + ve->pipe.src_offset;393limit = buf->address + buf->base.width0 - 1;394}395396if (unlikely(ve->pipe.instance_divisor)) {397BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 4);398PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);399PUSH_DATAh(push, address);400PUSH_DATA (push, address);401PUSH_DATA (push, ve->pipe.instance_divisor);402} else {403BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 3);404PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);405PUSH_DATAh(push, address);406PUSH_DATA (push, address);407}408BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);409PUSH_DATAh(push, limit);410PUSH_DATA (push, limit);411}412for (; i < nv50->state.num_vtxelts; ++i) {413BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);414PUSH_DATA (push, 0);415}416nv50->state.num_vtxelts = vertex->num_elements;417}418419#define NV50_PRIM_GL_CASE(n) \420case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n421422static inline unsigned423nv50_prim_gl(unsigned prim)424{425switch (prim) {426NV50_PRIM_GL_CASE(POINTS);427NV50_PRIM_GL_CASE(LINES);428NV50_PRIM_GL_CASE(LINE_LOOP);429NV50_PRIM_GL_CASE(LINE_STRIP);430NV50_PRIM_GL_CASE(TRIANGLES);431NV50_PRIM_GL_CASE(TRIANGLE_STRIP);432NV50_PRIM_GL_CASE(TRIANGLE_FAN);433NV50_PRIM_GL_CASE(QUADS);434NV50_PRIM_GL_CASE(QUAD_STRIP);435NV50_PRIM_GL_CASE(POLYGON);436NV50_PRIM_GL_CASE(LINES_ADJACENCY);437NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);438NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY);439NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);440default:441return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;442break;443}444}445446/* For pre-nva0 transform feedback. */447static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] =448{449[PIPE_PRIM_POINTS] = 1,450[PIPE_PRIM_LINES] = 2,451[PIPE_PRIM_LINE_LOOP] = 2,452[PIPE_PRIM_LINE_STRIP] = 2,453[PIPE_PRIM_TRIANGLES] = 3,454[PIPE_PRIM_TRIANGLE_STRIP] = 3,455[PIPE_PRIM_TRIANGLE_FAN] = 3,456[PIPE_PRIM_QUADS] = 3,457[PIPE_PRIM_QUAD_STRIP] = 3,458[PIPE_PRIM_POLYGON] = 3,459[PIPE_PRIM_LINES_ADJACENCY] = 2,460[PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2,461[PIPE_PRIM_TRIANGLES_ADJACENCY] = 3,462[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3463};464465static void466nv50_draw_arrays(struct nv50_context *nv50,467unsigned mode, unsigned start, unsigned count,468unsigned instance_count)469{470struct nouveau_pushbuf *push = nv50->base.pushbuf;471unsigned prim;472473if (nv50->state.index_bias) {474BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);475PUSH_DATA (push, 0);476if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {477BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);478PUSH_DATA (push, 0);479}480nv50->state.index_bias = 0;481}482483prim = nv50_prim_gl(mode);484485while (instance_count--) {486BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);487PUSH_DATA (push, prim);488BEGIN_NV04(push, NV50_3D(VERTEX_BUFFER_FIRST), 2);489PUSH_DATA (push, start);490PUSH_DATA (push, count);491BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);492PUSH_DATA (push, 0);493494prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;495}496}497498static void499nv50_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map,500unsigned start, unsigned count)501{502map += start;503504if (count & 3) {505unsigned i;506BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), count & 3);507for (i = 0; i < (count & 3); ++i)508PUSH_DATA(push, *map++);509count &= ~3;510}511while (count) {512unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;513514BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U8), nr);515for (i = 0; i < nr; ++i) {516PUSH_DATA(push,517(map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);518map += 4;519}520count -= nr * 4;521}522}523524static void525nv50_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map,526unsigned start, unsigned count)527{528map += start;529530if (count & 1) {531count &= ~1;532BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1);533PUSH_DATA (push, *map++);534}535while (count) {536unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;537538BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr);539for (i = 0; i < nr; ++i) {540PUSH_DATA(push, (map[1] << 16) | map[0]);541map += 2;542}543count -= nr * 2;544}545}546547static void548nv50_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map,549unsigned start, unsigned count)550{551map += start;552553while (count) {554const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);555556BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), nr);557PUSH_DATAp(push, map, nr);558559map += nr;560count -= nr;561}562}563564static void565nv50_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,566const uint32_t *map,567unsigned start, unsigned count)568{569map += start;570571if (count & 1) {572count--;573BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1);574PUSH_DATA (push, *map++);575}576while (count) {577unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;578579BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr);580for (i = 0; i < nr; ++i) {581PUSH_DATA(push, (map[1] << 16) | map[0]);582map += 2;583}584count -= nr * 2;585}586}587588static void589nv50_draw_elements(struct nv50_context *nv50, bool shorten,590const struct pipe_draw_info *info,591unsigned mode, unsigned start, unsigned count,592unsigned instance_count, int32_t index_bias,593unsigned index_size)594{595struct nouveau_pushbuf *push = nv50->base.pushbuf;596unsigned prim;597598prim = nv50_prim_gl(mode);599600if (index_bias != nv50->state.index_bias) {601BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);602PUSH_DATA (push, index_bias);603if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {604BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);605PUSH_DATA (push, index_bias);606}607nv50->state.index_bias = index_bias;608}609610if (!info->has_user_indices) {611struct nv04_resource *buf = nv04_resource(info->index.resource);612unsigned pb_start;613unsigned pb_bytes;614const unsigned base = buf->offset & ~3;615616start += (buf->offset & 3) >> (index_size >> 1);617618assert(nouveau_resource_mapped_by_gpu(info->index.resource));619620/* This shouldn't have to be here. The going theory is that the buffer621* is being filled in by PGRAPH, and it's not done yet by the time it622* gets submitted to PFIFO, which in turn starts immediately prefetching623* the not-yet-written data. Ideally this wait would only happen on624* pushbuf submit, but it's probably not a big performance difference.625*/626if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))627nouveau_fence_wait(buf->fence_wr, &nv50->base.debug);628629while (instance_count--) {630BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);631PUSH_DATA (push, prim);632633nouveau_pushbuf_space(push, 16, 0, 1);634PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);635636switch (index_size) {637case 4:638BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U32), count);639nouveau_pushbuf_data(push, buf->bo, base + start * 4, count * 4);640break;641case 2:642pb_start = (start & ~1) * 2;643pb_bytes = ((start + count + 1) & ~1) * 2 - pb_start;644645BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1);646PUSH_DATA (push, (start << 31) | count);647BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U16), pb_bytes / 4);648nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes);649BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1);650PUSH_DATA (push, 0);651break;652default:653assert(index_size == 1);654pb_start = start & ~3;655pb_bytes = ((start + count + 3) & ~3) - pb_start;656657BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1);658PUSH_DATA (push, (start << 30) | count);659BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U8), pb_bytes / 4);660nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes);661BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1);662PUSH_DATA (push, 0);663break;664}665BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);666PUSH_DATA (push, 0);667668prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;669}670} else {671const void *data = info->index.user;672673while (instance_count--) {674BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);675PUSH_DATA (push, prim);676switch (index_size) {677case 1:678nv50_draw_elements_inline_u08(push, data, start, count);679break;680case 2:681nv50_draw_elements_inline_u16(push, data, start, count);682break;683case 4:684if (shorten)685nv50_draw_elements_inline_u32_short(push, data, start, count);686else687nv50_draw_elements_inline_u32(push, data, start, count);688break;689default:690assert(0);691return;692}693BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);694PUSH_DATA (push, 0);695696prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;697}698}699NOUVEAU_DRV_STAT(&nv50->screen->base, draw_calls_indexed, 1);700}701702static void703nva0_draw_stream_output(struct nv50_context *nv50,704const struct pipe_draw_info *info,705const struct pipe_draw_indirect_info *indirect)706{707struct nouveau_pushbuf *push = nv50->base.pushbuf;708struct nv50_so_target *so = nv50_so_target(indirect->count_from_stream_output);709struct nv04_resource *res = nv04_resource(so->pipe.buffer);710unsigned num_instances = info->instance_count;711unsigned mode = nv50_prim_gl(info->mode);712713if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) {714/* A proper implementation without waiting doesn't seem possible,715* so don't bother.716*/717NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");718return;719}720721if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {722res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;723PUSH_SPACE(push, 4);724BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);725PUSH_DATA (push, 0);726BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);727PUSH_DATA (push, 0);728}729730assert(num_instances);731do {732PUSH_SPACE(push, 8);733BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);734PUSH_DATA (push, mode);735BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);736PUSH_DATA (push, 0);737BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);738PUSH_DATA (push, so->stride);739nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES,740nv50_query(so->pq), 0x4);741BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);742PUSH_DATA (push, 0);743744mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;745} while (--num_instances);746}747748static void749nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)750{751struct nv50_screen *screen = chan->user_priv;752753nouveau_fence_update(&screen->base, true);754755nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, true);756}757758void759nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,760unsigned drawid_offset,761const struct pipe_draw_indirect_info *indirect,762const struct pipe_draw_start_count_bias *draws,763unsigned num_draws)764{765if (num_draws > 1) {766util_draw_multi(pipe, info, drawid_offset, indirect, draws, num_draws);767return;768}769770if (!indirect && (!draws[0].count || !info->instance_count))771return;772773/* We don't actually support indirect draws, so add a fallback for ES 3.1's774* benefit.775*/776if (indirect && indirect->buffer) {777util_draw_indirect(pipe, info, indirect);778return;779}780781struct nv50_context *nv50 = nv50_context(pipe);782struct nouveau_pushbuf *push = nv50->base.pushbuf;783bool tex_dirty = false;784int s;785786if (info->index_size && !info->has_user_indices)787BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(info->index.resource), RD);788789/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */790if (info->index_bounds_valid) {791nv50->vb_elt_first = info->min_index + (info->index_size ? draws->index_bias : 0);792nv50->vb_elt_limit = info->max_index - info->min_index;793} else {794nv50->vb_elt_first = 0;795nv50->vb_elt_limit = ~0;796}797nv50->instance_off = info->start_instance;798nv50->instance_max = info->instance_count - 1;799800/* For picking only a few vertices from a large user buffer, push is better,801* if index count is larger and we expect repeated vertices, suggest upload.802*/803nv50->vbo_push_hint = /* the 64 is heuristic */804!(info->index_size && ((nv50->vb_elt_limit + 64) < draws[0].count));805806if (nv50->vbo_user && !(nv50->dirty_3d & (NV50_NEW_3D_ARRAYS | NV50_NEW_3D_VERTEX))) {807if (!!nv50->vbo_fifo != nv50->vbo_push_hint)808nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;809else810if (!nv50->vbo_fifo)811nv50_update_user_vbufs(nv50);812}813814if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))815nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];816817nv50_state_validate_3d(nv50, ~0);818819push->kick_notify = nv50_draw_vbo_kick_notify;820821for (s = 0; s < NV50_MAX_3D_SHADER_STAGES && !nv50->cb_dirty; ++s) {822if (nv50->constbuf_coherent[s])823nv50->cb_dirty = true;824}825826/* If there are any coherent constbufs, flush the cache */827if (nv50->cb_dirty) {828BEGIN_NV04(push, NV50_3D(CODE_CB_FLUSH), 1);829PUSH_DATA (push, 0);830nv50->cb_dirty = false;831}832833for (s = 0; s < NV50_MAX_3D_SHADER_STAGES && !tex_dirty; ++s) {834if (nv50->textures_coherent[s])835tex_dirty = true;836}837838if (tex_dirty) {839BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);840PUSH_DATA (push, 0x20);841}842843if (nv50->screen->base.class_3d >= NVA0_3D_CLASS &&844nv50->seamless_cube_map != nv50->state.seamless_cube_map) {845nv50->state.seamless_cube_map = nv50->seamless_cube_map;846BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);847PUSH_DATA (push, nv50->seamless_cube_map ? NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);848}849850if (nv50->vertprog->mul_zero_wins != nv50->state.mul_zero_wins) {851nv50->state.mul_zero_wins = nv50->vertprog->mul_zero_wins;852BEGIN_NV04(push, NV50_3D(UNK1690), 1);853PUSH_DATA (push, 0x00010000 * !!nv50->state.mul_zero_wins);854}855856/* Make starting/pausing streamout work pre-NVA0 enough for ES3.0. This857* means counting vertices in a vertex shader when it has so outputs.858*/859if (nv50->screen->base.class_3d < NVA0_3D_CLASS &&860nv50->vertprog->pipe.stream_output.num_outputs) {861for (int i = 0; i < nv50->num_so_targets; i++) {862nv50->so_used[i] += info->instance_count *863u_stream_outputs_for_vertices(info->mode, draws[0].count) *864nv50->vertprog->pipe.stream_output.stride[i] * 4;865}866}867868if (nv50->vbo_fifo) {869nv50_push_vbo(nv50, info, indirect, &draws[0]);870goto cleanup;871}872873if (nv50->state.instance_base != info->start_instance) {874nv50->state.instance_base = info->start_instance;875/* NOTE: this does not affect the shader input, should it ? */876BEGIN_NV04(push, NV50_3D(VB_INSTANCE_BASE), 1);877PUSH_DATA (push, info->start_instance);878}879880nv50->base.vbo_dirty |= !!nv50->vtxbufs_coherent;881882if (nv50->base.vbo_dirty) {883BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);884PUSH_DATA (push, 0);885nv50->base.vbo_dirty = false;886}887888if (info->index_size) {889bool shorten = info->index_bounds_valid && info->max_index <= 65535;890891if (info->primitive_restart != nv50->state.prim_restart) {892if (info->primitive_restart) {893BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 2);894PUSH_DATA (push, 1);895PUSH_DATA (push, info->restart_index);896897if (info->restart_index > 65535)898shorten = false;899} else {900BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1);901PUSH_DATA (push, 0);902}903nv50->state.prim_restart = info->primitive_restart;904} else905if (info->primitive_restart) {906BEGIN_NV04(push, NV50_3D(PRIM_RESTART_INDEX), 1);907PUSH_DATA (push, info->restart_index);908909if (info->restart_index > 65535)910shorten = false;911}912913nv50_draw_elements(nv50, shorten, info,914info->mode, draws[0].start, draws[0].count,915info->instance_count, draws->index_bias, info->index_size);916} else917if (unlikely(indirect && indirect->count_from_stream_output)) {918nva0_draw_stream_output(nv50, info, indirect);919} else {920nv50_draw_arrays(nv50,921info->mode, draws[0].start, draws[0].count,922info->instance_count);923}924925cleanup:926push->kick_notify = nv50_default_kick_notify;927928nv50_release_user_vbufs(nv50);929930nouveau_pushbuf_bufctx(push, NULL);931932nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);933}934935936