Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
9900 views
// This file is part of the FidelityFX SDK.1//2// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.3//4// Permission is hereby granted, free of charge, to any person obtaining a copy5// of this software and associated documentation files (the "Software"), to deal6// in the Software without restriction, including without limitation the rights7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell8// copies of the Software, and to permit persons to whom the Software is9// furnished to do so, subject to the following conditions:10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN19// THE SOFTWARE.2021#define USE_YCOCG 12223#define fAutogenEpsilon 0.01f2425// EXPERIMENTAL2627FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)28{29FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);30FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);31FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);32FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);3334#if USE_YCOCG35colorPreAlpha = RGBToYCoCg(colorPreAlpha);36colorPostAlpha = RGBToYCoCg(colorPostAlpha);37colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);38colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);39#endif4041FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha;42FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;43bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));44bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));4546FfxFloat32x3 X = colorPreAlpha;47FfxFloat32x3 Y = colorPostAlpha;48FfxFloat32x3 Z = colorPrevPreAlpha;49FfxFloat32x3 W = colorPrevPostAlpha;5051FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));5253// cleanup very small values54retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);5556return retVal;57}5859// works ok: thin edges60FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)61{62FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);63FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);64FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);65FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);6667#if USE_YCOCG68colorPreAlpha = RGBToYCoCg(colorPreAlpha);69colorPostAlpha = RGBToYCoCg(colorPostAlpha);70colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);71colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);72#endif7374FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha;75FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha;76bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));77bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));7879FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha =80FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;8182FfxFloat32x3 X = colorPrevPreAlpha;83FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha;84FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha;85FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha;8687FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0);8889FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) );9091// only pixels that have significantly changed in color shuold be considered92retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) );9394return retVal;95}9697// This function computes the TransparencyAndComposition mask:98// This mask indicates pixels that should discard locks and apply color clamping.99//100// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of101// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization)102// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting.103//104// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame.105// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency.106// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels.107// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation.108//109// In the final step it stores the current textures in internal textures for the next frame110111FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)112{113FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx);114115// [branch]116if (retVal > FFX_MIN16_F(0.01f))117{118retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx);119}120return retVal;121}122123float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)124{125float lum[9];126int i = 0;127for (int y = -1; y < 2; ++y)128{129for (int x = -1; x < 2; ++x)130{131FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb;132FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb;133lum[i++] = length(curCol - prevCol);134}135}136137//float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);138//float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);139140//return sqrt(gradX * gradX + gradY * gradY);141142float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);143float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);144145return sqrt(sqrt(gradX * gradY));146}147148float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)149{150float lum[9];151int i = 0;152for (int y = -1; y < 2; ++y)153{154for (int x = -1; x < 2; ++x)155{156FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb);157FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb);158lum[i++] = length(curCol - prevCol);159}160}161162//float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);163//float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);164165//return sqrt(gradX * gradX + gradY * gradY);166167float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);168float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);169170return sqrt(sqrt(gradX * gradY));171}172173FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)174{175FFX_MIN16_F retVal = FFX_MIN16_F(0.f);176177FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId);178FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);179FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);180FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);181FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);182183#if USE_YCOCG184colorPreAlpha = RGBToYCoCg(colorPreAlpha);185colorPostAlpha = RGBToYCoCg(colorPostAlpha);186colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);187colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);188#endif189FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f);190FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f);191for (int y = -1; y < 2; ++y)192{193for (int x = -1; x < 2; ++x)194{195FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y));196197#if USE_YCOCG198W = RGBToYCoCg(W);199#endif200minPrev = min(minPrev, W);201maxPrev = max(maxPrev, W);202}203}204// instead of computing the overlap: simply count how many samples are outside205// set reactive based on that206FFX_MIN16_F count = FFX_MIN16_F(0.f);207for (int y = -1; y < 2; ++y)208{209for (int x = -1; x < 2; ++x)210{211FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y));212213#if USE_YCOCG214Y = RGBToYCoCg(Y);215#endif216count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);217count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);218count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);219}220}221retVal = count / FFX_MIN16_F(27.f);222223return retVal;224}225226227// This function computes the Reactive mask:228// We want pixels marked where the alpha portion of the frame changes a lot between neighbours229// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...)230// As a result history would not be trustworthy.231// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation232// For mirrors we may assume the pre-alpha is pretty uniform color.233//234// This works well generally, but also marks edge pixels235FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)236{237// we only get here if alpha has a significant contribution and has changed since last frame.238FFX_MIN16_F retVal = FFX_MIN16_F(0.f);239240// mark pixels with huge variance in alpha as reactive241FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx));242FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx));243retVal = ffxSaturate(alphaEdge - opaqueEdge);244245// the above also marks edge pixels due to jitter, so we need to cancel those out246247248return retVal;249}250251252