Path: blob/21.2-virgl/src/gallium/drivers/svga/svga_pipe_streamout.c
4570 views
/**********************************************************1* Copyright 2014 VMware, Inc. All rights reserved.2*3* Permission is hereby granted, free of charge, to any person4* obtaining a copy of this software and associated documentation5* files (the "Software"), to deal in the Software without6* restriction, including without limitation the rights to use, copy,7* modify, merge, publish, distribute, sublicense, and/or sell copies8* of the Software, and to permit persons to whom the Software is9* furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*23**********************************************************/2425#include "util/u_memory.h"26#include "util/u_bitmask.h"2728#include "svga_cmd.h"29#include "svga_context.h"30#include "svga_resource_buffer.h"31#include "svga_shader.h"32#include "svga_debug.h"33#include "svga_streamout.h"3435struct svga_stream_output_target {36struct pipe_stream_output_target base;37};3839/** cast wrapper */40static inline struct svga_stream_output_target *41svga_stream_output_target(struct pipe_stream_output_target *s)42{43return (struct svga_stream_output_target *)s;44}454647/**48* A helper function to send different version of the DefineStreamOutput command49* depending on if device is SM5 capable or not.50*/51static enum pipe_error52svga_define_stream_output(struct svga_context *svga,53SVGA3dStreamOutputId soid,54uint32 numOutputStreamEntries,55uint32 numOutputStreamStrides,56uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS],57const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS],58uint32 rasterizedStream,59struct svga_stream_output *streamout)60{61unsigned i;6263SVGA_DBG(DEBUG_STREAMOUT, "%s: id=%d\n", __FUNCTION__, soid);64SVGA_DBG(DEBUG_STREAMOUT,65"numOutputStreamEntires=%d\n", numOutputStreamEntries);6667for (i = 0; i < numOutputStreamEntries; i++) {68SVGA_DBG(DEBUG_STREAMOUT,69" %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n",70i, decls[i].outputSlot, decls[i].registerIndex,71decls[i].registerMask, decls[i].stream);72}7374SVGA_DBG(DEBUG_STREAMOUT,75"numOutputStreamStrides=%d\n", numOutputStreamStrides);76for (i = 0; i < numOutputStreamStrides; i++) {77SVGA_DBG(DEBUG_STREAMOUT, " %d ", streamStrides[i]);78}79SVGA_DBG(DEBUG_STREAMOUT, "\n");8081if (svga_have_sm5(svga) &&82(numOutputStreamEntries > SVGA3D_MAX_DX10_STREAMOUT_DECLS ||83numOutputStreamStrides > 1)) {84unsigned bufSize = sizeof(SVGA3dStreamOutputDeclarationEntry)85* numOutputStreamEntries;86struct svga_winsys_buffer *declBuf;87struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;88void *map;8990declBuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED,91bufSize);92if (!declBuf)93return PIPE_ERROR;94map = sws->buffer_map(sws, declBuf, PIPE_MAP_WRITE);95if (!map) {96sws->buffer_destroy(sws, declBuf);97return PIPE_ERROR;98}99100/* copy decls to buffer */101memcpy(map, decls, bufSize);102103/* unmap buffer */104sws->buffer_unmap(sws, declBuf);105streamout->declBuf = declBuf;106107SVGA_RETRY(svga, SVGA3D_sm5_DefineAndBindStreamOutput108(svga->swc, soid,109numOutputStreamEntries,110numOutputStreamStrides,111streamStrides,112streamout->declBuf,113rasterizedStream,114bufSize));115} else {116SVGA_RETRY(svga, SVGA3D_vgpu10_DefineStreamOutput(svga->swc, soid,117numOutputStreamEntries,118streamStrides,119decls));120}121122return PIPE_OK;123}124125126/**127* Creates stream output from the stream output info.128*/129struct svga_stream_output *130svga_create_stream_output(struct svga_context *svga,131struct svga_shader *shader,132const struct pipe_stream_output_info *info)133{134struct svga_stream_output *streamout;135SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];136unsigned strides[SVGA3D_DX_MAX_SOTARGETS];137unsigned dstOffset[SVGA3D_DX_MAX_SOTARGETS];138unsigned numStreamStrides = 0;139unsigned numDecls;140unsigned i;141enum pipe_error ret;142unsigned id;143ASSERTED unsigned maxDecls;144145assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);146147/* Gallium utility creates shaders with stream output.148* For non-DX10, just return NULL.149*/150if (!svga_have_vgpu10(svga))151return NULL;152153if (svga_have_sm5(svga))154maxDecls = SVGA3D_MAX_STREAMOUT_DECLS;155else if (svga_have_vgpu10(svga))156maxDecls = SVGA3D_MAX_DX10_STREAMOUT_DECLS;157158assert(info->num_outputs <= maxDecls);159160/* Allocate an integer ID for the stream output */161id = util_bitmask_add(svga->stream_output_id_bm);162if (id == UTIL_BITMASK_INVALID_INDEX) {163return NULL;164}165166/* Allocate the streamout data structure */167streamout = CALLOC_STRUCT(svga_stream_output);168169if (!streamout)170return NULL;171172streamout->info = *info;173streamout->id = id;174streamout->pos_out_index = -1;175streamout->streammask = 0;176177/* Init whole decls and stride arrays to zero to avoid garbage values */178memset(decls, 0, sizeof(decls));179memset(strides, 0, sizeof(strides));180memset(dstOffset, 0, sizeof(dstOffset));181182SVGA_DBG(DEBUG_STREAMOUT, "%s: num_outputs=%d\n",183__FUNCTION__, info->num_outputs);184185for (i = 0, numDecls = 0; i < info->num_outputs; i++, numDecls++) {186unsigned reg_idx = info->output[i].register_index;187unsigned buf_idx = info->output[i].output_buffer;188const enum tgsi_semantic sem_name =189shader->info.output_semantic_name[reg_idx];190191assert(buf_idx <= PIPE_MAX_SO_BUFFERS);192193numStreamStrides = MAX2(numStreamStrides, buf_idx);194195SVGA_DBG(DEBUG_STREAMOUT,196" %d: register_index=%d output_buffer=%d stream=%d\n",197i, reg_idx, buf_idx, info->output[i].stream);198199SVGA_DBG(DEBUG_STREAMOUT,200" dst_offset=%d start_component=%d num_components=%d\n",201info->output[i].dst_offset,202info->output[i].start_component,203info->output[i].num_components);204205streamout->buffer_stream |= info->output[i].stream << (buf_idx * 4);206207/**208* Check if the destination offset of the current output209* is at the expected offset. If it is greater, then that means210* there is a gap in the stream output. We need to insert211* extra declaration entries with an invalid register index212* to specify a gap.213*/214while (info->output[i].dst_offset > dstOffset[buf_idx]) {215216unsigned numComponents = info->output[i].dst_offset -217dstOffset[buf_idx];;218219assert(svga_have_sm5(svga));220221/* We can only specify at most 4 components to skip in each222* declaration entry.223*/224numComponents = numComponents > 4 ? 4 : numComponents;225226decls[numDecls].outputSlot = buf_idx,227decls[numDecls].stream = info->output[i].stream;228decls[numDecls].registerIndex = SVGA3D_INVALID_ID;229decls[numDecls].registerMask = (1 << numComponents) - 1;230231dstOffset[buf_idx] += numComponents;232numDecls++;233}234235if (sem_name == TGSI_SEMANTIC_POSITION) {236/**237* Check if streaming out POSITION. If so, replace the238* register index with the index for NON_ADJUSTED POSITION.239*/240decls[numDecls].registerIndex = shader->info.num_outputs;241242/* Save this output index, so we can tell later if this stream output243* includes an output of a vertex position244*/245streamout->pos_out_index = numDecls;246}247else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {248/**249* Use the shadow copy for clip distance because250* CLIPDIST instruction is only emitted for enabled clip planes.251* It's valid to write to ClipDistance variable for non-enabled252* clip planes.253*/254decls[numDecls].registerIndex =255shader->info.num_outputs + 1 +256shader->info.output_semantic_index[reg_idx];257}258else {259decls[numDecls].registerIndex = reg_idx;260}261262decls[numDecls].outputSlot = buf_idx;263decls[numDecls].registerMask =264((1 << info->output[i].num_components) - 1)265<< info->output[i].start_component;266267decls[numDecls].stream = info->output[i].stream;268assert(decls[numDecls].stream == 0 || svga_have_sm5(svga));269270/* Set the bit in streammask for the enabled stream */271streamout->streammask |= 1 << info->output[i].stream;272273/* Update the expected offset for the next output */274dstOffset[buf_idx] += info->output[i].num_components;275276strides[buf_idx] = info->stride[buf_idx] * sizeof(float);277}278279assert(numDecls <= maxDecls);280281/* Send the DefineStreamOutput command.282* Note, rasterizedStream is always 0.283*/284ret = svga_define_stream_output(svga, id,285numDecls, numStreamStrides+1,286strides, decls, 0, streamout);287288if (ret != PIPE_OK) {289util_bitmask_clear(svga->stream_output_id_bm, id);290FREE(streamout);291streamout = NULL;292}293return streamout;294}295296297enum pipe_error298svga_set_stream_output(struct svga_context *svga,299struct svga_stream_output *streamout)300{301unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID;302303if (!svga_have_vgpu10(svga)) {304return PIPE_OK;305}306307SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__,308streamout, id);309310if (svga->current_so != streamout) {311312/* Before unbinding the current stream output, stop the stream output313* statistics queries for the active streams.314*/315if (svga_have_sm5(svga) && svga->current_so) {316svga->vcount_buffer_stream = svga->current_so->buffer_stream;317svga_end_stream_output_queries(svga, svga->current_so->streammask);318}319320enum pipe_error ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);321if (ret != PIPE_OK) {322return ret;323}324325svga->current_so = streamout;326327/* After binding the new stream output, start the stream output328* statistics queries for the active streams.329*/330if (svga_have_sm5(svga) && svga->current_so) {331svga_begin_stream_output_queries(svga, svga->current_so->streammask);332}333}334335return PIPE_OK;336}337338void339svga_delete_stream_output(struct svga_context *svga,340struct svga_stream_output *streamout)341{342struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;343344SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout);345346assert(svga_have_vgpu10(svga));347assert(streamout != NULL);348349SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyStreamOutput(svga->swc,350streamout->id));351352if (svga_have_sm5(svga) && streamout->declBuf) {353sws->buffer_destroy(sws, streamout->declBuf);354}355356/* Before deleting the current streamout, make sure to stop any pending357* SO queries.358*/359if (svga->current_so == streamout) {360if (svga->in_streamout)361svga_end_stream_output_queries(svga, svga->current_so->streammask);362svga->current_so = NULL;363}364365/* Release the ID */366util_bitmask_clear(svga->stream_output_id_bm, streamout->id);367368/* Free streamout structure */369FREE(streamout);370}371372373static struct pipe_stream_output_target *374svga_create_stream_output_target(struct pipe_context *pipe,375struct pipe_resource *buffer,376unsigned buffer_offset,377unsigned buffer_size)378{379struct svga_context *svga = svga_context(pipe);380struct svga_stream_output_target *sot;381382SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__,383buffer_offset, buffer_size);384385assert(svga_have_vgpu10(svga));386(void) svga;387388sot = CALLOC_STRUCT(svga_stream_output_target);389if (!sot)390return NULL;391392pipe_reference_init(&sot->base.reference, 1);393pipe_resource_reference(&sot->base.buffer, buffer);394sot->base.context = pipe;395sot->base.buffer = buffer;396sot->base.buffer_offset = buffer_offset;397sot->base.buffer_size = buffer_size;398399return &sot->base;400}401402static void403svga_destroy_stream_output_target(struct pipe_context *pipe,404struct pipe_stream_output_target *target)405{406struct svga_stream_output_target *sot = svga_stream_output_target(target);407408SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__);409410pipe_resource_reference(&sot->base.buffer, NULL);411FREE(sot);412}413414static void415svga_set_stream_output_targets(struct pipe_context *pipe,416unsigned num_targets,417struct pipe_stream_output_target **targets,418const unsigned *offsets)419{420struct svga_context *svga = svga_context(pipe);421struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];422unsigned i;423unsigned num_so_targets;424boolean begin_so_queries = num_targets > 0;425426SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__,427num_targets);428429assert(svga_have_vgpu10(svga));430431/* Mark the streamout buffers as dirty so that we'll issue readbacks432* before mapping.433*/434for (i = 0; i < svga->num_so_targets; i++) {435struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer);436sbuf->dirty = TRUE;437}438439/* Before the currently bound streamout targets are unbound,440* save them in case they need to be referenced to retrieve the441* number of vertices being streamed out.442*/443for (i = 0; i < ARRAY_SIZE(svga->so_targets); i++) {444svga->vcount_so_targets[i] = svga->so_targets[i];445}446447assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);448449for (i = 0; i < num_targets; i++) {450struct svga_stream_output_target *sot451= svga_stream_output_target(targets[i]);452unsigned size;453454svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer,455PIPE_BIND_STREAM_OUTPUT);456457assert(svga_buffer(sot->base.buffer)->key.flags458& SVGA3D_SURFACE_BIND_STREAM_OUTPUT);459460svga->so_targets[i] = &sot->base;461if (offsets[i] == -1) {462soBindings[i].offset = -1;463464/* The streamout is being resumed. There is no need to restart streamout statistics465* queries for the draw-auto fallback since those queries are still active.466*/467begin_so_queries = FALSE;468}469else470soBindings[i].offset = sot->base.buffer_offset + offsets[i];471472/* The size cannot extend beyond the end of the buffer. Clamp it. */473size = MIN2(sot->base.buffer_size,474sot->base.buffer->width0 - sot->base.buffer_offset);475476soBindings[i].sizeInBytes = size;477}478479/* unbind any previously bound stream output buffers */480for (; i < svga->num_so_targets; i++) {481svga->so_surfaces[i] = NULL;482svga->so_targets[i] = NULL;483}484485num_so_targets = MAX2(svga->num_so_targets, num_targets);486SVGA_RETRY(svga, SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,487soBindings, svga->so_surfaces));488svga->num_so_targets = num_targets;489490if (svga_have_sm5(svga) && svga->current_so && begin_so_queries) {491492/* If there are already active queries and we need to start a new streamout,493* we need to stop the current active queries first.494*/495if (svga->in_streamout) {496svga_end_stream_output_queries(svga, svga->current_so->streammask);497}498499/* Start stream out statistics queries for the new streamout */500svga_begin_stream_output_queries(svga, svga->current_so->streammask);501}502}503504/**505* Rebind stream output target surfaces506*/507enum pipe_error508svga_rebind_stream_output_targets(struct svga_context *svga)509{510struct svga_winsys_context *swc = svga->swc;511enum pipe_error ret;512unsigned i;513514for (i = 0; i < svga->num_so_targets; i++) {515ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE);516if (ret != PIPE_OK)517return ret;518}519520return PIPE_OK;521}522523524void525svga_init_stream_output_functions(struct svga_context *svga)526{527svga->pipe.create_stream_output_target = svga_create_stream_output_target;528svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;529svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;530}531532533/**534* A helper function to create stream output statistics queries for each stream.535* These queries are created as a workaround for DrawTransformFeedbackInstanced or536* DrawTransformFeedbackStreamInstanced when auto draw doesn't support537* instancing or non-0 stream. In this case, the vertex count will538* be retrieved from the stream output statistics query.539*/540void541svga_create_stream_output_queries(struct svga_context *svga)542{543unsigned i;544545if (!svga_have_sm5(svga))546return;547548for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {549svga->so_queries[i] = svga->pipe.create_query(&svga->pipe,550PIPE_QUERY_SO_STATISTICS, i);551assert(svga->so_queries[i] != NULL);552}553}554555556/**557* Destroy the stream output statistics queries for the draw-auto workaround.558*/559void560svga_destroy_stream_output_queries(struct svga_context *svga)561{562unsigned i;563564if (!svga_have_sm5(svga))565return;566567for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {568svga->pipe.destroy_query(&svga->pipe, svga->so_queries[i]);569}570}571572573/**574* Start stream output statistics queries for the active streams.575*/576void577svga_begin_stream_output_queries(struct svga_context *svga,578unsigned streammask)579{580assert(svga_have_sm5(svga));581assert(!svga->in_streamout);582583for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {584bool ret;585if (streammask & (1 << i)) {586ret = svga->pipe.begin_query(&svga->pipe, svga->so_queries[i]);587}588(void) ret;589}590svga->in_streamout = TRUE;591592return;593}594595596/**597* Stop stream output statistics queries for the active streams.598*/599void600svga_end_stream_output_queries(struct svga_context *svga,601unsigned streammask)602{603assert(svga_have_sm5(svga));604605if (!svga->in_streamout)606return;607608for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {609bool ret;610if (streammask & (1 << i)) {611ret = svga->pipe.end_query(&svga->pipe, svga->so_queries[i]);612}613(void) ret;614}615svga->in_streamout = FALSE;616617return;618}619620621/**622* Return the primitive count returned from the stream output statistics query623* for the specified stream.624*/625unsigned626svga_get_primcount_from_stream_output(struct svga_context *svga,627unsigned stream)628{629unsigned primcount = 0;630union pipe_query_result result;631bool ret;632633if (svga->current_so) {634svga_end_stream_output_queries(svga, svga->current_so->streammask);635}636637ret = svga->pipe.get_query_result(&svga->pipe,638svga->so_queries[stream],639TRUE, &result);640if (ret)641primcount = result.so_statistics.num_primitives_written;642643return primcount;644}645646647