Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
9900 views
// This file is part of the FidelityFX SDK.1//2// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.3//4// Permission is hereby granted, free of charge, to any person obtaining a copy5// of this software and associated documentation files (the "Software"), to deal6// in the Software without restriction, including without limitation the rights7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell8// copies of the Software, and to permit persons to whom the Software is9// furnished to do so, subject to the following conditions:10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN19// THE SOFTWARE.2021FFX_GROUPSHARED FfxUInt32 spdCounter;2223#ifndef SPD_PACKED_ONLY24FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];25FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];26FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];27FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];2829FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)30{31FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();32fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());33FfxFloat32x3 fRgb = SampleInputColor(fUv);3435fRgb /= PreExposure();3637//compute log luma38const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)));3940// Make sure out of screen pixels contribute no value to the end result41const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;4243return FfxFloat32x4(result, 0, 0, 0);44}4546FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)47{48return SPD_LoadMipmap5(tex);49}5051void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)52{53if (index == LumaMipLevelToUse() || index == 5)54{55SPD_SetMipmap(pix, index, outValue.r);56}5758if (index == MipCount() - 1) { //accumulate on 1x1 level5960if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0))))61{62FfxFloat32 prev = SPD_LoadExposureBuffer().y;63FfxFloat32 result = outValue.r;6465if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values66{67FfxFloat32 rate = 1.0f;68result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));69}70FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);71SPD_SetExposureBuffer(spdOutput);72}73}74}7576void SpdIncreaseAtomicCounter(FfxUInt32 slice)77{78SPD_IncreaseAtomicCounter(spdCounter);79}8081FfxUInt32 SpdGetAtomicCounter()82{83return spdCounter;84}8586void SpdResetAtomicCounter(FfxUInt32 slice)87{88SPD_ResetAtomicCounter();89}9091FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)92{93return FfxFloat32x4(94spdIntermediateR[x][y],95spdIntermediateG[x][y],96spdIntermediateB[x][y],97spdIntermediateA[x][y]);98}99void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)100{101spdIntermediateR[x][y] = value.x;102spdIntermediateG[x][y] = value.y;103spdIntermediateB[x][y] = value.z;104spdIntermediateA[x][y] = value.w;105}106FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)107{108return (v0 + v1 + v2 + v3) * 0.25f;109}110#endif111112// define fetch and store functions Packed113#if FFX_HALF114#error Callback must be implemented115116FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];117FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];118119FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)120{121return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]);122}123FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)124{125return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]);126}127void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)128{129if (index == LumaMipLevelToUse() || index == 5)130{131imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value);132return;133}134imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value);135}136void SpdIncreaseAtomicCounter(FfxUInt32 slice)137{138InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter);139}140FfxUInt32 SpdGetAtomicCounter()141{142return spdCounter;143}144void SpdResetAtomicCounter(FfxUInt32 slice)145{146rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0;147}148FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)149{150return FfxFloat16x4(151spdIntermediateRG[x][y].x,152spdIntermediateRG[x][y].y,153spdIntermediateBA[x][y].x,154spdIntermediateBA[x][y].y);155}156void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)157{158spdIntermediateRG[x][y] = value.xy;159spdIntermediateBA[x][y] = value.zw;160}161FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)162{163return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);164}165#endif166167#include "ffx_spd.h"168169void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)170{171#if FFX_HALF172SpdDownsampleH(173FfxUInt32x2(WorkGroupId.xy),174FfxUInt32(LocalThreadIndex),175FfxUInt32(MipCount()),176FfxUInt32(NumWorkGroups()),177FfxUInt32(WorkGroupId.z),178FfxUInt32x2(WorkGroupOffset()));179#else180SpdDownsample(181FfxUInt32x2(WorkGroupId.xy),182FfxUInt32(LocalThreadIndex),183FfxUInt32(MipCount()),184FfxUInt32(NumWorkGroups()),185FfxUInt32(WorkGroupId.z),186FfxUInt32x2(WorkGroupOffset()));187#endif188}189190