Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
4574 views
/*1* Copyright 2012 Red Hat Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*21* Authors: Ben Skeggs22*23*/2425#include "util/format/u_format.h"26#include "util/u_draw.h"27#include "util/u_inlines.h"28#include "util/u_prim.h"29#include "translate/translate.h"3031#include "nouveau_fence.h"32#include "nv_object.xml.h"33#include "nv30/nv30-40_3d.xml.h"34#include "nv30/nv30_context.h"35#include "nv30/nv30_format.h"3637static void38nv30_emit_vtxattr(struct nv30_context *nv30, struct pipe_vertex_buffer *vb,39struct pipe_vertex_element *ve, unsigned attr)40{41const unsigned nc = util_format_get_nr_components(ve->src_format);42struct nouveau_pushbuf *push = nv30->base.pushbuf;43struct nv04_resource *res = nv04_resource(vb->buffer.resource);44const void *data;45float v[4];4647data = nouveau_resource_map_offset(&nv30->base, res, vb->buffer_offset +48ve->src_offset, NOUVEAU_BO_RD);4950util_format_unpack_rgba(ve->src_format, v, data, 1);5152switch (nc) {53case 4:54BEGIN_NV04(push, NV30_3D(VTX_ATTR_4F(attr)), 4);55PUSH_DATAf(push, v[0]);56PUSH_DATAf(push, v[1]);57PUSH_DATAf(push, v[2]);58PUSH_DATAf(push, v[3]);59break;60case 3:61BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(attr)), 3);62PUSH_DATAf(push, v[0]);63PUSH_DATAf(push, v[1]);64PUSH_DATAf(push, v[2]);65break;66case 2:67BEGIN_NV04(push, NV30_3D(VTX_ATTR_2F(attr)), 2);68PUSH_DATAf(push, v[0]);69PUSH_DATAf(push, v[1]);70break;71case 1:72BEGIN_NV04(push, NV30_3D(VTX_ATTR_1F(attr)), 1);73PUSH_DATAf(push, v[0]);74break;75default:76assert(0);77break;78}79}8081static inline void82nv30_vbuf_range(struct nv30_context *nv30, int vbi,83uint32_t *base, uint32_t *size)84{85assert(nv30->vbo_max_index != ~0);86*base = nv30->vbo_min_index * nv30->vtxbuf[vbi].stride;87*size = (nv30->vbo_max_index -88nv30->vbo_min_index + 1) * nv30->vtxbuf[vbi].stride;89}9091static void92nv30_prevalidate_vbufs(struct nv30_context *nv30)93{94struct pipe_vertex_buffer *vb;95struct nv04_resource *buf;96int i;97uint32_t base, size;9899nv30->vbo_fifo = nv30->vbo_user = 0;100101for (i = 0; i < nv30->num_vtxbufs; i++) {102vb = &nv30->vtxbuf[i];103if (!vb->stride || !vb->buffer.resource) /* NOTE: user_buffer not implemented */104continue;105buf = nv04_resource(vb->buffer.resource);106107/* NOTE: user buffers with temporary storage count as mapped by GPU */108if (!nouveau_resource_mapped_by_gpu(vb->buffer.resource)) {109if (nv30->vbo_push_hint) {110nv30->vbo_fifo = ~0;111continue;112} else {113if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) {114nv30->vbo_user |= 1 << i;115assert(vb->stride > vb->buffer_offset);116nv30_vbuf_range(nv30, i, &base, &size);117nouveau_user_buffer_upload(&nv30->base, buf, base, size);118} else {119nouveau_buffer_migrate(&nv30->base, buf, NOUVEAU_BO_GART);120}121nv30->base.vbo_dirty = true;122}123}124}125}126127static void128nv30_update_user_vbufs(struct nv30_context *nv30)129{130struct nouveau_pushbuf *push = nv30->base.pushbuf;131uint32_t base, offset, size;132int i;133uint32_t written = 0;134135for (i = 0; i < nv30->vertex->num_elements; i++) {136struct pipe_vertex_element *ve = &nv30->vertex->pipe[i];137const int b = ve->vertex_buffer_index;138struct pipe_vertex_buffer *vb = &nv30->vtxbuf[b];139struct nv04_resource *buf = nv04_resource(vb->buffer.resource);140141if (!(nv30->vbo_user & (1 << b)))142continue;143144if (!vb->stride) {145nv30_emit_vtxattr(nv30, vb, ve, i);146continue;147}148nv30_vbuf_range(nv30, b, &base, &size);149150if (!(written & (1 << b))) {151written |= 1 << b;152nouveau_user_buffer_upload(&nv30->base, buf, base, size);153}154155offset = vb->buffer_offset + ve->src_offset;156157BEGIN_NV04(push, NV30_3D(VTXBUF(i)), 1);158PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, buf, offset,159NOUVEAU_BO_LOW | NOUVEAU_BO_RD,1600, NV30_3D_VTXBUF_DMA1);161}162nv30->base.vbo_dirty = true;163}164165static inline void166nv30_release_user_vbufs(struct nv30_context *nv30)167{168uint32_t vbo_user = nv30->vbo_user;169170while (vbo_user) {171int i = ffs(vbo_user) - 1;172vbo_user &= ~(1 << i);173174nouveau_buffer_release_gpu_storage(nv04_resource(nv30->vtxbuf[i].buffer.resource));175}176177nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VTXTMP);178}179180void181nv30_vbo_validate(struct nv30_context *nv30)182{183struct nouveau_pushbuf *push = nv30->base.pushbuf;184struct nv30_vertex_stateobj *vertex = nv30->vertex;185struct pipe_vertex_element *ve;186struct pipe_vertex_buffer *vb;187unsigned i, redefine;188189nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VTXBUF);190if (!nv30->vertex || nv30->draw_flags)191return;192193#if UTIL_ARCH_BIG_ENDIAN194if (1) { /* Figure out where the buffers are getting messed up */195#else196if (unlikely(vertex->need_conversion)) {197#endif198nv30->vbo_fifo = ~0;199nv30->vbo_user = 0;200} else {201nv30_prevalidate_vbufs(nv30);202}203204if (!PUSH_SPACE(push, 128))205return;206207redefine = MAX2(vertex->num_elements, nv30->state.num_vtxelts);208if (redefine == 0)209return;210211BEGIN_NV04(push, NV30_3D(VTXFMT(0)), redefine);212213for (i = 0; i < vertex->num_elements; i++) {214ve = &vertex->pipe[i];215vb = &nv30->vtxbuf[ve->vertex_buffer_index];216217if (likely(vb->stride) || nv30->vbo_fifo)218PUSH_DATA (push, (vb->stride << 8) | vertex->element[i].state);219else220PUSH_DATA (push, NV30_3D_VTXFMT_TYPE_V32_FLOAT);221}222223for (; i < nv30->state.num_vtxelts; i++) {224PUSH_DATA (push, NV30_3D_VTXFMT_TYPE_V32_FLOAT);225}226227for (i = 0; i < vertex->num_elements; i++) {228struct nv04_resource *res;229unsigned offset;230bool user;231232ve = &vertex->pipe[i];233vb = &nv30->vtxbuf[ve->vertex_buffer_index];234user = (nv30->vbo_user & (1 << ve->vertex_buffer_index));235236res = nv04_resource(vb->buffer.resource);237238if (nv30->vbo_fifo || unlikely(vb->stride == 0)) {239if (!nv30->vbo_fifo)240nv30_emit_vtxattr(nv30, vb, ve, i);241continue;242}243244offset = ve->src_offset + vb->buffer_offset;245246BEGIN_NV04(push, NV30_3D(VTXBUF(i)), 1);247PUSH_RESRC(push, NV30_3D(VTXBUF(i)), user ? BUFCTX_VTXTMP : BUFCTX_VTXBUF,248res, offset, NOUVEAU_BO_LOW | NOUVEAU_BO_RD,2490, NV30_3D_VTXBUF_DMA1);250}251252nv30->state.num_vtxelts = vertex->num_elements;253}254255static void *256nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements,257const struct pipe_vertex_element *elements)258{259struct nv30_vertex_stateobj *so;260struct translate_key transkey;261unsigned i;262263so = MALLOC(sizeof(*so) + sizeof(*so->element) * num_elements);264if (!so)265return NULL;266memcpy(so->pipe, elements, sizeof(*elements) * num_elements);267so->num_elements = num_elements;268so->need_conversion = false;269270transkey.nr_elements = 0;271transkey.output_stride = 0;272273for (i = 0; i < num_elements; i++) {274const struct pipe_vertex_element *ve = &elements[i];275const unsigned vbi = ve->vertex_buffer_index;276enum pipe_format fmt = ve->src_format;277278so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw;279if (!so->element[i].state) {280switch (util_format_get_nr_components(fmt)) {281case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;282case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;283case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;284case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;285default:286assert(0);287FREE(so);288return NULL;289}290so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw;291so->need_conversion = true;292}293294if (1) {295unsigned j = transkey.nr_elements++;296297transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;298transkey.element[j].input_format = ve->src_format;299transkey.element[j].input_buffer = vbi;300transkey.element[j].input_offset = ve->src_offset;301transkey.element[j].instance_divisor = ve->instance_divisor;302303transkey.element[j].output_format = fmt;304transkey.element[j].output_offset = transkey.output_stride;305transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;306}307}308309so->translate = translate_create(&transkey);310so->vtx_size = transkey.output_stride / 4;311so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1);312return so;313}314315static void316nv30_vertex_state_delete(struct pipe_context *pipe, void *hwcso)317{318struct nv30_vertex_stateobj *so = hwcso;319320if (so->translate)321so->translate->release(so->translate);322FREE(hwcso);323}324325static void326nv30_vertex_state_bind(struct pipe_context *pipe, void *hwcso)327{328struct nv30_context *nv30 = nv30_context(pipe);329330nv30->vertex = hwcso;331nv30->dirty |= NV30_NEW_VERTEX;332}333334static void335nv30_draw_arrays(struct nv30_context *nv30,336unsigned mode, unsigned start, unsigned count,337unsigned instance_count)338{339struct nouveau_pushbuf *push = nv30->base.pushbuf;340unsigned prim;341342prim = nv30_prim_gl(mode);343344BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);345PUSH_DATA (push, prim);346while (count) {347const unsigned mpush = 2047 * 256;348unsigned npush = (count > mpush) ? mpush : count;349unsigned wpush = ((npush + 255) & ~255) >> 8;350351count -= npush;352353BEGIN_NI04(push, NV30_3D(VB_VERTEX_BATCH), wpush);354while (npush >= 256) {355PUSH_DATA (push, 0xff000000 | start);356start += 256;357npush -= 256;358}359360if (npush)361PUSH_DATA (push, ((npush - 1) << 24) | start);362}363BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);364PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);365}366367static void368nv30_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map,369unsigned start, unsigned count)370{371map += start;372373if (count & 1) {374BEGIN_NV04(push, NV30_3D(VB_ELEMENT_U32), 1);375PUSH_DATA (push, *map++);376}377378count >>= 1;379while (count) {380unsigned npush = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);381count -= npush;382383BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U16), npush);384while (npush--) {385PUSH_DATA (push, (map[1] << 16) | map[0]);386map += 2;387}388}389390}391392static void393nv30_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map,394unsigned start, unsigned count)395{396map += start;397398if (count & 1) {399BEGIN_NV04(push, NV30_3D(VB_ELEMENT_U32), 1);400PUSH_DATA (push, *map++);401}402403count >>= 1;404while (count) {405unsigned npush = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);406count -= npush;407408BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U16), npush);409while (npush--) {410PUSH_DATA (push, (map[1] << 16) | map[0]);411map += 2;412}413}414}415416static void417nv30_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map,418unsigned start, unsigned count)419{420map += start;421422while (count) {423const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);424425BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U32), nr);426PUSH_DATAp(push, map, nr);427428map += nr;429count -= nr;430}431}432433static void434nv30_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,435const uint32_t *map,436unsigned start, unsigned count)437{438map += start;439440if (count & 1) {441BEGIN_NV04(push, NV30_3D(VB_ELEMENT_U32), 1);442PUSH_DATA (push, *map++);443}444445count >>= 1;446while (count) {447unsigned npush = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);448count -= npush;449450BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U16), npush);451while (npush--) {452PUSH_DATA (push, (map[1] << 16) | map[0]);453map += 2;454}455}456}457458static void459nv30_draw_elements(struct nv30_context *nv30, bool shorten,460const struct pipe_draw_info *info,461unsigned mode, unsigned start, unsigned count,462unsigned instance_count, int32_t index_bias,463unsigned index_size)464{465struct nouveau_pushbuf *push = nv30->base.pushbuf;466struct nouveau_object *eng3d = nv30->screen->eng3d;467unsigned prim = nv30_prim_gl(mode);468469if (eng3d->oclass >= NV40_3D_CLASS && index_bias != nv30->state.index_bias) {470BEGIN_NV04(push, NV40_3D(VB_ELEMENT_BASE), 1);471PUSH_DATA (push, index_bias);472nv30->state.index_bias = index_bias;473}474475if (eng3d->oclass == NV40_3D_CLASS && index_size > 1 &&476!info->has_user_indices) {477struct nv04_resource *res = nv04_resource(info->index.resource);478unsigned offset = 0;479480assert(nouveau_resource_mapped_by_gpu(&res->base));481482BEGIN_NV04(push, NV30_3D(IDXBUF_OFFSET), 2);483PUSH_RESRC(push, NV30_3D(IDXBUF_OFFSET), BUFCTX_IDXBUF, res, offset,484NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0);485PUSH_MTHD (push, NV30_3D(IDXBUF_FORMAT), BUFCTX_IDXBUF, res->bo,486(index_size == 2) ? 0x00000010 : 0x00000000,487res->domain | NOUVEAU_BO_RD,4880, NV30_3D_IDXBUF_FORMAT_DMA1);489BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);490PUSH_DATA (push, prim);491while (count) {492const unsigned mpush = 2047 * 256;493unsigned npush = (count > mpush) ? mpush : count;494unsigned wpush = ((npush + 255) & ~255) >> 8;495496count -= npush;497498BEGIN_NI04(push, NV30_3D(VB_INDEX_BATCH), wpush);499while (npush >= 256) {500PUSH_DATA (push, 0xff000000 | start);501start += 256;502npush -= 256;503}504505if (npush)506PUSH_DATA (push, ((npush - 1) << 24) | start);507}508BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);509PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);510PUSH_RESET(push, BUFCTX_IDXBUF);511} else {512const void *data;513if (!info->has_user_indices)514data = nouveau_resource_map_offset(&nv30->base,515nv04_resource(info->index.resource),5160, NOUVEAU_BO_RD);517else518data = info->index.user;519if (!data)520return;521522BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);523PUSH_DATA (push, prim);524switch (index_size) {525case 1:526nv30_draw_elements_inline_u08(push, data, start, count);527break;528case 2:529nv30_draw_elements_inline_u16(push, data, start, count);530break;531case 4:532if (shorten)533nv30_draw_elements_inline_u32_short(push, data, start, count);534else535nv30_draw_elements_inline_u32(push, data, start, count);536break;537default:538assert(0);539return;540}541BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);542PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);543}544}545546static void547nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,548unsigned drawid_offset,549const struct pipe_draw_indirect_info *indirect,550const struct pipe_draw_start_count_bias *draws,551unsigned num_draws)552{553if (num_draws > 1) {554util_draw_multi(pipe, info, drawid_offset, indirect, draws, num_draws);555return;556}557558if (!indirect && (!draws[0].count || !info->instance_count))559return;560561struct nv30_context *nv30 = nv30_context(pipe);562struct nouveau_pushbuf *push = nv30->base.pushbuf;563int i;564565if (!info->primitive_restart &&566!u_trim_pipe_prim(info->mode, (unsigned*)&draws[0].count))567return;568569/* For picking only a few vertices from a large user buffer, push is better,570* if index count is larger and we expect repeated vertices, suggest upload.571*/572nv30->vbo_push_hint = /* the 64 is heuristic */573!(info->index_size &&574info->index_bounds_valid &&575((info->max_index - info->min_index + 64) < draws[0].count));576577if (info->index_bounds_valid) {578nv30->vbo_min_index = info->min_index;579nv30->vbo_max_index = info->max_index;580} else {581nv30->vbo_min_index = 0;582nv30->vbo_max_index = ~0;583}584585if (nv30->vbo_push_hint != !!nv30->vbo_fifo)586nv30->dirty |= NV30_NEW_ARRAYS;587588push->user_priv = &nv30->bufctx;589if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))590nv30_update_user_vbufs(nv30);591592nv30_state_validate(nv30, ~0, true);593if (nv30->draw_flags) {594nv30_render_vbo(pipe, info, drawid_offset, &draws[0]);595return;596} else597if (nv30->vbo_fifo) {598nv30_push_vbo(nv30, info, &draws[0]);599return;600}601602for (i = 0; i < nv30->num_vtxbufs && !nv30->base.vbo_dirty; ++i) {603if (!nv30->vtxbuf[i].buffer.resource)604continue;605if (nv30->vtxbuf[i].buffer.resource->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)606nv30->base.vbo_dirty = true;607}608609if (!nv30->base.vbo_dirty && info->index_size && !info->has_user_indices &&610info->index.resource->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)611nv30->base.vbo_dirty = true;612613if (nv30->base.vbo_dirty) {614BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1);615PUSH_DATA (push, 0);616nv30->base.vbo_dirty = false;617}618619if (!info->index_size) {620nv30_draw_arrays(nv30,621info->mode, draws[0].start, draws[0].count,622info->instance_count);623} else {624bool shorten = info->index_bounds_valid && info->max_index <= 65535;625626if (info->primitive_restart != nv30->state.prim_restart) {627if (info->primitive_restart) {628BEGIN_NV04(push, NV40_3D(PRIM_RESTART_ENABLE), 2);629PUSH_DATA (push, 1);630PUSH_DATA (push, info->restart_index);631632if (info->restart_index > 65535)633shorten = false;634} else {635BEGIN_NV04(push, NV40_3D(PRIM_RESTART_ENABLE), 1);636PUSH_DATA (push, 0);637}638nv30->state.prim_restart = info->primitive_restart;639} else640if (info->primitive_restart) {641BEGIN_NV04(push, NV40_3D(PRIM_RESTART_INDEX), 1);642PUSH_DATA (push, info->restart_index);643644if (info->restart_index > 65535)645shorten = false;646}647648nv30_draw_elements(nv30, shorten, info,649info->mode, draws[0].start, draws[0].count,650info->instance_count, draws[0].index_bias, info->index_size);651}652653nv30_state_release(nv30);654nv30_release_user_vbufs(nv30);655}656657void658nv30_vbo_init(struct pipe_context *pipe)659{660pipe->create_vertex_elements_state = nv30_vertex_state_create;661pipe->delete_vertex_elements_state = nv30_vertex_state_delete;662pipe->bind_vertex_elements_state = nv30_vertex_state_bind;663pipe->draw_vbo = nv30_draw_vbo;664}665666667