Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_state_msaa.c
4570 views
/*1* Copyright 2014 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#include "si_build_pm4.h"2526/* For MSAA sample positions. */27#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \28((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \29(((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | \30(((unsigned)(s2y)&0xf) << 20) | (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28))3132/* For obtaining location coordinates from registers */33#define SEXT4(x) ((int)((x) | ((x)&0x8 ? 0xfffffff0 : 0)))34#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index)*4)) & 0xf)35#define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2)36#define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1)3738/* The following sample ordering is required by EQAA.39*40* Sample 0 is approx. in the top-left quadrant.41* Sample 1 is approx. in the bottom-right quadrant.42*43* Sample 2 is approx. in the bottom-left quadrant.44* Sample 3 is approx. in the top-right quadrant.45* (sample I={2,3} adds more detail to the vicinity of sample I-2)46*47* Sample 4 is approx. in the same quadrant as sample 0. (top-left)48* Sample 5 is approx. in the same quadrant as sample 1. (bottom-right)49* Sample 6 is approx. in the same quadrant as sample 2. (bottom-left)50* Sample 7 is approx. in the same quadrant as sample 3. (top-right)51* (sample I={4,5,6,7} adds more detail to the vicinity of sample I-4)52*53* The next 8 samples add more detail to the vicinity of the previous samples.54* (sample I (I >= 8) adds more detail to the vicinity of sample I-8)55*56* The ordering is specified such that:57* If we take the first 2 samples, we should get good 2x MSAA.58* If we add 2 more samples, we should get good 4x MSAA with the same sample locations.59* If we add 4 more samples, we should get good 8x MSAA with the same sample locations.60* If we add 8 more samples, we should get perfect 16x MSAA with the same sample locations.61*62* The ordering also allows finding samples in the same vicinity.63*64* Group N of 2 samples in the same vicinity in 16x MSAA: {N,N+8}65* Group N of 2 samples in the same vicinity in 8x MSAA: {N,N+4}66* Group N of 2 samples in the same vicinity in 4x MSAA: {N,N+2}67*68* Groups of 4 samples in the same vicinity in 16x MSAA:69* Top left: {0,4,8,12}70* Bottom right: {1,5,9,13}71* Bottom left: {2,6,10,14}72* Top right: {3,7,11,15}73*74* Groups of 4 samples in the same vicinity in 8x MSAA:75* Left half: {0,2,4,6}76* Right half: {1,3,5,7}77*78* Groups of 8 samples in the same vicinity in 16x MSAA:79* Left half: {0,2,4,6,8,10,12,14}80* Right half: {1,3,5,7,9,11,13,15}81*/8283/* Important note: We have to use the standard DX positions, because84* the primitive discard compute shader relies on them.85*/8687/* 1x MSAA */88static const uint32_t sample_locs_1x =89FILL_SREG(0, 0, 0, 0, 0, 0, 0, 0); /* S1, S2, S3 fields are not used by 1x */90static const uint64_t centroid_priority_1x = 0x0000000000000000ull;9192/* 2x MSAA (the positions are sorted for EQAA) */93static const uint32_t sample_locs_2x =94FILL_SREG(-4, -4, 4, 4, 0, 0, 0, 0); /* S2 & S3 fields are not used by 2x MSAA */95static const uint64_t centroid_priority_2x = 0x1010101010101010ull;9697/* 4x MSAA (the positions are sorted for EQAA) */98static const uint32_t sample_locs_4x = FILL_SREG(-2, -6, 2, 6, -6, 2, 6, -2);99static const uint64_t centroid_priority_4x = 0x3210321032103210ull;100101/* 8x MSAA (the positions are sorted for EQAA) */102static const uint32_t sample_locs_8x[] = {103FILL_SREG(-3, -5, 5, 1, -1, 3, 7, -7),104FILL_SREG(-7, -1, 3, 7, -5, 5, 1, -3),105/* The following are unused by hardware, but we emit them to IBs106* instead of multiple SET_CONTEXT_REG packets. */1070,1080,109};110static const uint64_t centroid_priority_8x = 0x3546012735460127ull;111112/* 16x MSAA (the positions are sorted for EQAA) */113static const uint32_t sample_locs_16x[] = {114FILL_SREG(-5, -2, 5, 3, -2, 6, 3, -5),115FILL_SREG(-4, -6, 1, 1, -6, 4, 7, -4),116FILL_SREG(-1, -3, 6, 7, -3, 2, 0, -7),117FILL_SREG(-7, -8, 2, 5, -8, 0, 4, -1),118};119static const uint64_t centroid_priority_16x = 0xc97e64b231d0fa85ull;120121static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,122unsigned sample_index, float *out_value)123{124const uint32_t *sample_locs;125126switch (sample_count) {127case 1:128default:129sample_locs = &sample_locs_1x;130break;131case 2:132sample_locs = &sample_locs_2x;133break;134case 4:135sample_locs = &sample_locs_4x;136break;137case 8:138sample_locs = sample_locs_8x;139break;140case 16:141sample_locs = sample_locs_16x;142break;143}144145out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f;146out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;147}148149static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,150uint32_t sample_locs)151{152radeon_begin(cs);153radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);154radeon_emit(cs, centroid_priority);155radeon_emit(cs, centroid_priority >> 32);156radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs);157radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);158radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);159radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);160radeon_end();161}162163static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,164const uint32_t *sample_locs, unsigned num_samples)165{166radeon_begin(cs);167radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);168radeon_emit(cs, centroid_priority);169radeon_emit(cs, centroid_priority >> 32);170radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,171num_samples == 8 ? 14 : 16);172radeon_emit_array(cs, sample_locs, 4);173radeon_emit_array(cs, sample_locs, 4);174radeon_emit_array(cs, sample_locs, 4);175radeon_emit_array(cs, sample_locs, num_samples == 8 ? 2 : 4);176radeon_end();177}178179void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)180{181switch (nr_samples) {182default:183case 1:184si_emit_max_4_sample_locs(cs, centroid_priority_1x, sample_locs_1x);185break;186case 2:187si_emit_max_4_sample_locs(cs, centroid_priority_2x, sample_locs_2x);188break;189case 4:190si_emit_max_4_sample_locs(cs, centroid_priority_4x, sample_locs_4x);191break;192case 8:193si_emit_max_16_sample_locs(cs, centroid_priority_8x, sample_locs_8x, 8);194break;195case 16:196si_emit_max_16_sample_locs(cs, centroid_priority_16x, sample_locs_16x, 16);197break;198}199}200201void si_init_msaa_functions(struct si_context *sctx)202{203int i;204205sctx->b.get_sample_position = si_get_sample_position;206207si_get_sample_position(&sctx->b, 1, 0, sctx->sample_positions.x1[0]);208209for (i = 0; i < 2; i++)210si_get_sample_position(&sctx->b, 2, i, sctx->sample_positions.x2[i]);211for (i = 0; i < 4; i++)212si_get_sample_position(&sctx->b, 4, i, sctx->sample_positions.x4[i]);213for (i = 0; i < 8; i++)214si_get_sample_position(&sctx->b, 8, i, sctx->sample_positions.x8[i]);215for (i = 0; i < 16; i++)216si_get_sample_position(&sctx->b, 16, i, sctx->sample_positions.x16[i]);217}218219220