Path: blob/21.2-virgl/src/gallium/auxiliary/draw/draw_pipe_aaline.c
4565 views
/**************************************************************************1*2* Copyright 2007-2018 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627/**28* AA line stage: AA lines are converted triangles (with extra generic)29*30* Authors: Brian Paul31*/323334#include "pipe/p_context.h"35#include "pipe/p_defines.h"36#include "pipe/p_shader_tokens.h"37#include "util/u_inlines.h"3839#include "util/format/u_format.h"40#include "util/u_math.h"41#include "util/u_memory.h"4243#include "tgsi/tgsi_transform.h"44#include "tgsi/tgsi_dump.h"4546#include "draw_context.h"47#include "draw_private.h"48#include "draw_pipe.h"4950#include "nir.h"51#include "nir/nir_draw_helpers.h"5253/** Approx number of new tokens for instructions in aa_transform_inst() */54#define NUM_NEW_TOKENS 53555657/**58* Subclass of pipe_shader_state to carry extra fragment shader info.59*/60struct aaline_fragment_shader61{62struct pipe_shader_state state;63void *driver_fs;64void *aaline_fs;65int generic_attrib; /**< generic used for distance */66};676869/**70* Subclass of draw_stage71*/72struct aaline_stage73{74struct draw_stage stage;7576float half_line_width;7778/** For AA lines, this is the vertex attrib slot for new generic */79uint coord_slot;80/** position, not necessarily output zero */81uint pos_slot;828384/*85* Currently bound state86*/87struct aaline_fragment_shader *fs;8889/*90* Driver interface/override functions91*/92void * (*driver_create_fs_state)(struct pipe_context *,93const struct pipe_shader_state *);94void (*driver_bind_fs_state)(struct pipe_context *, void *);95void (*driver_delete_fs_state)(struct pipe_context *, void *);96};979899100/**101* Subclass of tgsi_transform_context, used for transforming the102* user's fragment shader to add the special AA instructions.103*/104struct aa_transform_context {105struct tgsi_transform_context base;106uint64_t tempsUsed; /**< bitmask */107int colorOutput; /**< which output is the primary color */108int maxInput, maxGeneric; /**< max input index found */109int colorTemp, aaTemp; /**< temp registers */110};111112/**113* TGSI declaration transform callback.114* Look for a free input attrib, and two free temp regs.115*/116static void117aa_transform_decl(struct tgsi_transform_context *ctx,118struct tgsi_full_declaration *decl)119{120struct aa_transform_context *aactx = (struct aa_transform_context *)ctx;121122if (decl->Declaration.File == TGSI_FILE_OUTPUT &&123decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&124decl->Semantic.Index == 0) {125aactx->colorOutput = decl->Range.First;126}127else if (decl->Declaration.File == TGSI_FILE_INPUT) {128if ((int) decl->Range.Last > aactx->maxInput)129aactx->maxInput = decl->Range.Last;130if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&131(int) decl->Semantic.Index > aactx->maxGeneric) {132aactx->maxGeneric = decl->Semantic.Index;133}134}135else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {136uint i;137for (i = decl->Range.First;138i <= decl->Range.Last; i++) {139/*140* XXX this bitfield doesn't really cut it...141*/142aactx->tempsUsed |= UINT64_C(1) << i;143}144}145146ctx->emit_declaration(ctx, decl);147}148149150/**151* Find the lowest zero bit, or -1 if bitfield is all ones.152*/153static int154free_bit(uint64_t bitfield)155{156return ffsll(~bitfield) - 1;157}158159160/**161* TGSI transform prolog callback.162*/163static void164aa_transform_prolog(struct tgsi_transform_context *ctx)165{166struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;167uint64_t usedTemps = aactx->tempsUsed;168169/* find two free temp regs */170aactx->colorTemp = free_bit(usedTemps);171usedTemps |= UINT64_C(1) << aactx->colorTemp;172aactx->aaTemp = free_bit(usedTemps);173assert(aactx->colorTemp >= 0);174assert(aactx->aaTemp >= 0);175176/* declare new generic input/texcoord */177tgsi_transform_input_decl(ctx, aactx->maxInput + 1,178TGSI_SEMANTIC_GENERIC, aactx->maxGeneric + 1,179TGSI_INTERPOLATE_LINEAR);180181/* declare new temp regs */182tgsi_transform_temp_decl(ctx, aactx->aaTemp);183tgsi_transform_temp_decl(ctx, aactx->colorTemp);184}185186187/**188* TGSI transform epilog callback.189*/190static void191aa_transform_epilog(struct tgsi_transform_context *ctx)192{193struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;194195if (aactx->colorOutput != -1) {196struct tgsi_full_instruction inst;197/* insert distance-based coverage code for antialiasing. */198199/* saturate(linewidth - fabs(interpx), linelength - fabs(interpz) */200inst = tgsi_default_full_instruction();201inst.Instruction.Saturate = true;202inst.Instruction.Opcode = TGSI_OPCODE_ADD;203inst.Instruction.NumDstRegs = 1;204tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,205aactx->aaTemp, TGSI_WRITEMASK_XZ);206inst.Instruction.NumSrcRegs = 2;207tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_INPUT, aactx->maxInput + 1,208TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,209TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);210tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_INPUT, aactx->maxInput + 1,211TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y,212TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);213inst.Src[1].Register.Absolute = true;214inst.Src[1].Register.Negate = true;215ctx->emit_instruction(ctx, &inst);216217/* MUL width / height alpha */218tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,219TGSI_FILE_TEMPORARY, aactx->aaTemp,220TGSI_WRITEMASK_W,221TGSI_FILE_TEMPORARY, aactx->aaTemp,222TGSI_SWIZZLE_X,223TGSI_FILE_TEMPORARY, aactx->aaTemp,224TGSI_SWIZZLE_Z, false);225226/* MOV rgb */227tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,228TGSI_FILE_OUTPUT, aactx->colorOutput,229TGSI_WRITEMASK_XYZ,230TGSI_FILE_TEMPORARY, aactx->colorTemp);231232/* MUL alpha */233tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,234TGSI_FILE_OUTPUT, aactx->colorOutput,235TGSI_WRITEMASK_W,236TGSI_FILE_TEMPORARY, aactx->colorTemp,237TGSI_FILE_TEMPORARY, aactx->aaTemp, false);238}239}240241242/**243* TGSI instruction transform callback.244* Replace writes to result.color w/ a temp reg.245*/246static void247aa_transform_inst(struct tgsi_transform_context *ctx,248struct tgsi_full_instruction *inst)249{250struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;251uint i;252253/*254* Look for writes to result.color and replace with colorTemp reg.255*/256for (i = 0; i < inst->Instruction.NumDstRegs; i++) {257struct tgsi_full_dst_register *dst = &inst->Dst[i];258if (dst->Register.File == TGSI_FILE_OUTPUT &&259dst->Register.Index == aactx->colorOutput) {260dst->Register.File = TGSI_FILE_TEMPORARY;261dst->Register.Index = aactx->colorTemp;262}263}264265ctx->emit_instruction(ctx, inst);266}267268269/**270* Generate the frag shader we'll use for drawing AA lines.271* This will be the user's shader plus some arithmetic instructions.272*/273static boolean274generate_aaline_fs(struct aaline_stage *aaline)275{276struct pipe_context *pipe = aaline->stage.draw->pipe;277const struct pipe_shader_state *orig_fs = &aaline->fs->state;278struct pipe_shader_state aaline_fs;279struct aa_transform_context transform;280const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;281282aaline_fs = *orig_fs; /* copy to init */283aaline_fs.tokens = tgsi_alloc_tokens(newLen);284if (aaline_fs.tokens == NULL)285return FALSE;286287memset(&transform, 0, sizeof(transform));288transform.colorOutput = -1;289transform.maxInput = -1;290transform.maxGeneric = -1;291transform.colorTemp = -1;292transform.aaTemp = -1;293transform.base.prolog = aa_transform_prolog;294transform.base.epilog = aa_transform_epilog;295transform.base.transform_instruction = aa_transform_inst;296transform.base.transform_declaration = aa_transform_decl;297298tgsi_transform_shader(orig_fs->tokens,299(struct tgsi_token *) aaline_fs.tokens,300newLen, &transform.base);301302#if 0 /* DEBUG */303debug_printf("draw_aaline, orig shader:\n");304tgsi_dump(orig_fs->tokens, 0);305debug_printf("draw_aaline, new shader:\n");306tgsi_dump(aaline_fs.tokens, 0);307#endif308309aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs);310if (aaline->fs->aaline_fs != NULL)311aaline->fs->generic_attrib = transform.maxGeneric + 1;312313FREE((void *)aaline_fs.tokens);314return aaline->fs->aaline_fs != NULL;315}316317static boolean318generate_aaline_fs_nir(struct aaline_stage *aaline)319{320struct pipe_context *pipe = aaline->stage.draw->pipe;321const struct pipe_shader_state *orig_fs = &aaline->fs->state;322struct pipe_shader_state aaline_fs;323324aaline_fs = *orig_fs; /* copy to init */325aaline_fs.ir.nir = nir_shader_clone(NULL, orig_fs->ir.nir);326if (!aaline_fs.ir.nir)327return FALSE;328329nir_lower_aaline_fs(aaline_fs.ir.nir, &aaline->fs->generic_attrib);330aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs);331if (aaline->fs->aaline_fs == NULL)332return FALSE;333334return TRUE;335}336337/**338* When we're about to draw our first AA line in a batch, this function is339* called to tell the driver to bind our modified fragment shader.340*/341static boolean342bind_aaline_fragment_shader(struct aaline_stage *aaline)343{344struct draw_context *draw = aaline->stage.draw;345struct pipe_context *pipe = draw->pipe;346347if (!aaline->fs->aaline_fs) {348if (aaline->fs->state.type == PIPE_SHADER_IR_NIR) {349if (!generate_aaline_fs_nir(aaline))350return FALSE;351} else352if (!generate_aaline_fs(aaline))353return FALSE;354}355356draw->suspend_flushing = TRUE;357aaline->driver_bind_fs_state(pipe, aaline->fs->aaline_fs);358draw->suspend_flushing = FALSE;359360return TRUE;361}362363364365static inline struct aaline_stage *366aaline_stage(struct draw_stage *stage)367{368return (struct aaline_stage *) stage;369}370371372/**373* Draw a wide line by drawing a quad, using geometry which will374* fullfill GL's antialiased line requirements.375*/376static void377aaline_line(struct draw_stage *stage, struct prim_header *header)378{379const struct aaline_stage *aaline = aaline_stage(stage);380const float half_width = aaline->half_line_width;381struct prim_header tri;382struct vertex_header *v[8];383uint coordPos = aaline->coord_slot;384uint posPos = aaline->pos_slot;385float *pos, *tex;386float dx = header->v[1]->data[posPos][0] - header->v[0]->data[posPos][0];387float dy = header->v[1]->data[posPos][1] - header->v[0]->data[posPos][1];388float a = atan2f(dy, dx);389float c_a = cosf(a), s_a = sinf(a);390float half_length;391float t_l, t_w;392uint i;393394half_length = 0.5f * sqrtf(dx * dx + dy * dy);395396if (half_length < 0.5f) {397/*398* The logic we use for "normal" sized segments is incorrect399* for very short segments (basically because we only have400* one value to interpolate, not a distance to each endpoint).401* Therefore, we calculate half_length differently, so that for402* original line length (near) 0, we get alpha 0 - otherwise403* max alpha would still be 0.5. This also prevents us from404* artifacts due to degenerated lines (the endpoints being405* identical, which would still receive anywhere from alpha406* 0-0.5 otherwise) (at least the pstipple stage may generate407* such lines due to float inaccuracies if line length is very408* close to a integer).409* Might not be fully accurate neither (because the "strength" of410* the line is going to be determined by how close to the pixel411* center those 1 or 2 fragments are) but it's probably the best412* we can do.413*/414half_length = 2.0f * half_length;415} else {416half_length = half_length + 0.5f;417}418419t_w = half_width;420t_l = 0.5f;421422/* allocate/dup new verts */423for (i = 0; i < 4; i++) {424v[i] = dup_vert(stage, header->v[i/2], i);425}426427/*428* Quad strip for line from v0 to v1 (*=endpoints):429*430* 1 3431* +-----------------------------+432* | |433* | *v0 v1* |434* | |435* +-----------------------------+436* 0 2437*/438439/*440* We increase line length by 0.5 pixels (at each endpoint),441* and calculate the tri endpoints by moving them half-width442* distance away perpendicular to the line.443* XXX: since we change line endpoints (by 0.5 pixel), should444* actually re-interpolate all other values?445*/446447/* new verts */448pos = v[0]->data[posPos];449pos[0] += (-t_l * c_a - t_w * s_a);450pos[1] += (-t_l * s_a + t_w * c_a);451452pos = v[1]->data[posPos];453pos[0] += (-t_l * c_a - -t_w * s_a);454pos[1] += (-t_l * s_a + -t_w * c_a);455456pos = v[2]->data[posPos];457pos[0] += (t_l * c_a - t_w * s_a);458pos[1] += (t_l * s_a + t_w * c_a);459460pos = v[3]->data[posPos];461pos[0] += (t_l * c_a - -t_w * s_a);462pos[1] += (t_l * s_a + -t_w * c_a);463464/* new texcoords */465tex = v[0]->data[coordPos];466ASSIGN_4V(tex, -half_width, half_width, -half_length, half_length);467468tex = v[1]->data[coordPos];469ASSIGN_4V(tex, half_width, half_width, -half_length, half_length);470471tex = v[2]->data[coordPos];472ASSIGN_4V(tex, -half_width, half_width, half_length, half_length);473474tex = v[3]->data[coordPos];475ASSIGN_4V(tex, half_width, half_width, half_length, half_length);476477tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0];478stage->next->tri(stage->next, &tri);479480tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2];481stage->next->tri(stage->next, &tri);482}483484485static void486aaline_first_line(struct draw_stage *stage, struct prim_header *header)487{488auto struct aaline_stage *aaline = aaline_stage(stage);489struct draw_context *draw = stage->draw;490struct pipe_context *pipe = draw->pipe;491const struct pipe_rasterizer_state *rast = draw->rasterizer;492void *r;493494assert(draw->rasterizer->line_smooth && !draw->rasterizer->multisample);495496if (draw->rasterizer->line_width <= 1.0)497aaline->half_line_width = 1.0;498else499aaline->half_line_width = 0.5f * draw->rasterizer->line_width + 0.5f;500501if (!draw->rasterizer->half_pixel_center)502/*503* The tex coords probably would need adjustments?504*/505debug_printf("aa lines without half pixel center may be wrong\n");506507/*508* Bind (generate) our fragprog509*/510if (!bind_aaline_fragment_shader(aaline)) {511stage->line = draw_pipe_passthrough_line;512stage->line(stage, header);513return;514}515516draw_aaline_prepare_outputs(draw, draw->pipeline.aaline);517518draw->suspend_flushing = TRUE;519520/* Disable triangle culling, stippling, unfilled mode etc. */521r = draw_get_rasterizer_no_cull(draw, rast);522pipe->bind_rasterizer_state(pipe, r);523524draw->suspend_flushing = FALSE;525526/* now really draw first line */527stage->line = aaline_line;528stage->line(stage, header);529}530531532static void533aaline_flush(struct draw_stage *stage, unsigned flags)534{535struct draw_context *draw = stage->draw;536struct aaline_stage *aaline = aaline_stage(stage);537struct pipe_context *pipe = draw->pipe;538539stage->line = aaline_first_line;540stage->next->flush(stage->next, flags);541542/* restore original frag shader */543draw->suspend_flushing = TRUE;544aaline->driver_bind_fs_state(pipe, aaline->fs ? aaline->fs->driver_fs : NULL);545546/* restore original rasterizer state */547if (draw->rast_handle) {548pipe->bind_rasterizer_state(pipe, draw->rast_handle);549}550551draw->suspend_flushing = FALSE;552553draw_remove_extra_vertex_attribs(draw);554}555556557static void558aaline_reset_stipple_counter(struct draw_stage *stage)559{560stage->next->reset_stipple_counter(stage->next);561}562563564static void565aaline_destroy(struct draw_stage *stage)566{567struct aaline_stage *aaline = aaline_stage(stage);568struct pipe_context *pipe = stage->draw->pipe;569570draw_free_temp_verts(stage);571572/* restore the old entry points */573pipe->create_fs_state = aaline->driver_create_fs_state;574pipe->bind_fs_state = aaline->driver_bind_fs_state;575pipe->delete_fs_state = aaline->driver_delete_fs_state;576577FREE(stage);578}579580581static struct aaline_stage *582draw_aaline_stage(struct draw_context *draw)583{584struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage);585if (!aaline)586return NULL;587588aaline->stage.draw = draw;589aaline->stage.name = "aaline";590aaline->stage.next = NULL;591aaline->stage.point = draw_pipe_passthrough_point;592aaline->stage.line = aaline_first_line;593aaline->stage.tri = draw_pipe_passthrough_tri;594aaline->stage.flush = aaline_flush;595aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;596aaline->stage.destroy = aaline_destroy;597598if (!draw_alloc_temp_verts(&aaline->stage, 8)) {599aaline->stage.destroy(&aaline->stage);600return NULL;601}602603return aaline;604}605606607static struct aaline_stage *608aaline_stage_from_pipe(struct pipe_context *pipe)609{610struct draw_context *draw = (struct draw_context *) pipe->draw;611612if (draw) {613return aaline_stage(draw->pipeline.aaline);614} else {615return NULL;616}617}618619620/**621* This function overrides the driver's create_fs_state() function and622* will typically be called by the gallium frontend.623*/624static void *625aaline_create_fs_state(struct pipe_context *pipe,626const struct pipe_shader_state *fs)627{628struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);629struct aaline_fragment_shader *aafs = NULL;630631if (!aaline)632return NULL;633634aafs = CALLOC_STRUCT(aaline_fragment_shader);635636if (!aafs)637return NULL;638639aafs->state.type = fs->type;640if (fs->type == PIPE_SHADER_IR_TGSI)641aafs->state.tokens = tgsi_dup_tokens(fs->tokens);642else643aafs->state.ir.nir = nir_shader_clone(NULL, fs->ir.nir);644645/* pass-through */646aafs->driver_fs = aaline->driver_create_fs_state(pipe, fs);647648return aafs;649}650651652static void653aaline_bind_fs_state(struct pipe_context *pipe, void *fs)654{655struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);656struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;657658if (!aaline) {659return;660}661662/* save current */663aaline->fs = aafs;664/* pass-through */665aaline->driver_bind_fs_state(pipe, (aafs ? aafs->driver_fs : NULL));666}667668669static void670aaline_delete_fs_state(struct pipe_context *pipe, void *fs)671{672struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);673struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;674675if (!aafs) {676return;677}678679if (aaline) {680/* pass-through */681aaline->driver_delete_fs_state(pipe, aafs->driver_fs);682683if (aafs->aaline_fs)684aaline->driver_delete_fs_state(pipe, aafs->aaline_fs);685}686687if (aafs->state.type == PIPE_SHADER_IR_TGSI)688FREE((void*)aafs->state.tokens);689else690ralloc_free(aafs->state.ir.nir);691FREE(aafs);692}693694695void696draw_aaline_prepare_outputs(struct draw_context *draw,697struct draw_stage *stage)698{699struct aaline_stage *aaline = aaline_stage(stage);700const struct pipe_rasterizer_state *rast = draw->rasterizer;701702/* update vertex attrib info */703aaline->pos_slot = draw_current_shader_position_output(draw);704705if (!rast->line_smooth || rast->multisample)706return;707708/* allocate the extra post-transformed vertex attribute */709if (aaline->fs && aaline->fs->aaline_fs)710aaline->coord_slot = draw_alloc_extra_vertex_attrib(draw,711TGSI_SEMANTIC_GENERIC,712aaline->fs->generic_attrib);713else714aaline->coord_slot = -1;715}716717/**718* Called by drivers that want to install this AA line prim stage719* into the draw module's pipeline. This will not be used if the720* hardware has native support for AA lines.721*/722boolean723draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)724{725struct aaline_stage *aaline;726727pipe->draw = (void *) draw;728729/*730* Create / install AA line drawing / prim stage731*/732aaline = draw_aaline_stage(draw);733if (!aaline)734return FALSE;735736/* save original driver functions */737aaline->driver_create_fs_state = pipe->create_fs_state;738aaline->driver_bind_fs_state = pipe->bind_fs_state;739aaline->driver_delete_fs_state = pipe->delete_fs_state;740741/* override the driver's functions */742pipe->create_fs_state = aaline_create_fs_state;743pipe->bind_fs_state = aaline_bind_fs_state;744pipe->delete_fs_state = aaline_delete_fs_state;745746/* Install once everything is known to be OK:747*/748draw->pipeline.aaline = &aaline->stage;749750return TRUE;751}752753754