Path: blob/main/crates/bevy_post_process/src/bloom/bloom.wgsl
6596 views
// Bloom works by creating an intermediate texture with a bunch of mip levels, each half the size of the previous. // You then downsample each mip (starting with the original texture) to the lower resolution mip under it, going in order. // You then upsample each mip (starting from the smallest mip) and blend with the higher resolution mip above it (ending on the original texture). // // References: // * [COD] - Next Generation Post Processing in Call of Duty - http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare // * [PBB] - Physically Based Bloom - https://learnopengl.com/Guest-Articles/2022/Phys.-Based-Bloom struct BloomUniforms { threshold_precomputations: vec4<f32>, viewport: vec4<f32>, scale: vec2<f32>, aspect: f32, }; @group(0) @binding(0) var input_texture: texture_2d<f32>; @group(0) @binding(1) var s: sampler; @group(0) @binding(2) var<uniform> uniforms: BloomUniforms; #ifdef FIRST_DOWNSAMPLE // https://catlikecoding.com/unity/tutorials/advanced-rendering/bloom/#3.4 fn soft_threshold(color: vec3<f32>) -> vec3<f32> { let brightness = max(color.r, max(color.g, color.b)); var softness = brightness - uniforms.threshold_precomputations.y; softness = clamp(softness, 0.0, uniforms.threshold_precomputations.z); softness = softness * softness * uniforms.threshold_precomputations.w; var contribution = max(brightness - uniforms.threshold_precomputations.x, softness); contribution /= max(brightness, 0.00001); // Prevent division by 0 return color * contribution; } #endif // luminance coefficients from Rec. 709. // https://en.wikipedia.org/wiki/Rec._709 fn tonemapping_luminance(v: vec3<f32>) -> f32 { return dot(v, vec3<f32>(0.2126, 0.7152, 0.0722)); } fn rgb_to_srgb_simple(color: vec3<f32>) -> vec3<f32> { return pow(color, vec3<f32>(1.0 / 2.2)); } // http://graphicrants.blogspot.com/2013/12/tone-mapping.html fn karis_average(color: vec3<f32>) -> f32 { // Luminance calculated by gamma-correcting linear RGB to non-linear sRGB using pow(color, 1.0 / 2.2) // and then calculating luminance based on Rec. 709 color primaries. let luma = tonemapping_luminance(rgb_to_srgb_simple(color)) / 4.0; return 1.0 / (1.0 + luma); } // [COD] slide 153 fn sample_input_13_tap(uv: vec2<f32>) -> vec3<f32> { #ifdef UNIFORM_SCALE // This is the fast path. When the bloom scale is uniform, the 13 tap sampling kernel can be // expressed with constant offsets. // // It's possible that this isn't meaningfully faster than the "slow" path. However, because it // is hard to test performance on all platforms, and uniform bloom is the most common case, this // path was retained when adding non-uniform (anamorphic) bloom. This adds a small, but nonzero, // cost to maintainability, but it does help me sleep at night. let a = textureSample(input_texture, s, uv, vec2<i32>(-2, 2)).rgb; let b = textureSample(input_texture, s, uv, vec2<i32>(0, 2)).rgb; let c = textureSample(input_texture, s, uv, vec2<i32>(2, 2)).rgb; let d = textureSample(input_texture, s, uv, vec2<i32>(-2, 0)).rgb; let e = textureSample(input_texture, s, uv).rgb; let f = textureSample(input_texture, s, uv, vec2<i32>(2, 0)).rgb; let g = textureSample(input_texture, s, uv, vec2<i32>(-2, -2)).rgb; let h = textureSample(input_texture, s, uv, vec2<i32>(0, -2)).rgb; let i = textureSample(input_texture, s, uv, vec2<i32>(2, -2)).rgb; let j = textureSample(input_texture, s, uv, vec2<i32>(-1, 1)).rgb; let k = textureSample(input_texture, s, uv, vec2<i32>(1, 1)).rgb; let l = textureSample(input_texture, s, uv, vec2<i32>(-1, -1)).rgb; let m = textureSample(input_texture, s, uv, vec2<i32>(1, -1)).rgb; #else // This is the flexible, but potentially slower, path for non-uniform sampling. Because the // sample is not a constant, and it can fall outside of the limits imposed on constant sample // offsets (-8..8), we have to compute the pixel offset in uv coordinates using the size of the // texture. // // It isn't clear if this is meaningfully slower than using the offset syntax, the spec doesn't // mention it anywhere: https://www.w3.org/TR/WGSL/#texturesample, but the fact that the offset // syntax uses a const-expr implies that it allows some compiler optimizations - maybe more // impactful on mobile? let scale = uniforms.scale; let ps = scale / vec2<f32>(textureDimensions(input_texture)); let pl = 2.0 * ps; let ns = -1.0 * ps; let nl = -2.0 * ps; let a = textureSample(input_texture, s, uv + vec2<f32>(nl.x, pl.y)).rgb; let b = textureSample(input_texture, s, uv + vec2<f32>(0.00, pl.y)).rgb; let c = textureSample(input_texture, s, uv + vec2<f32>(pl.x, pl.y)).rgb; let d = textureSample(input_texture, s, uv + vec2<f32>(nl.x, 0.00)).rgb; let e = textureSample(input_texture, s, uv).rgb; let f = textureSample(input_texture, s, uv + vec2<f32>(pl.x, 0.00)).rgb; let g = textureSample(input_texture, s, uv + vec2<f32>(nl.x, nl.y)).rgb; let h = textureSample(input_texture, s, uv + vec2<f32>(0.00, nl.y)).rgb; let i = textureSample(input_texture, s, uv + vec2<f32>(pl.x, nl.y)).rgb; let j = textureSample(input_texture, s, uv + vec2<f32>(ns.x, ps.y)).rgb; let k = textureSample(input_texture, s, uv + vec2<f32>(ps.x, ps.y)).rgb; let l = textureSample(input_texture, s, uv + vec2<f32>(ns.x, ns.y)).rgb; let m = textureSample(input_texture, s, uv + vec2<f32>(ps.x, ns.y)).rgb; #endif #ifdef FIRST_DOWNSAMPLE // [COD] slide 168 // // The first downsample pass reads from the rendered frame which may exhibit // 'fireflies' (individual very bright pixels) that should not cause the bloom effect. // // The first downsample uses a firefly-reduction method proposed by Brian Karis // which takes a weighted-average of the samples to limit their luma range to [0, 1]. // This implementation matches the LearnOpenGL article [PBB]. var group0 = (a + b + d + e) * (0.125f / 4.0f); var group1 = (b + c + e + f) * (0.125f / 4.0f); var group2 = (d + e + g + h) * (0.125f / 4.0f); var group3 = (e + f + h + i) * (0.125f / 4.0f); var group4 = (j + k + l + m) * (0.5f / 4.0f); group0 *= karis_average(group0); group1 *= karis_average(group1); group2 *= karis_average(group2); group3 *= karis_average(group3); group4 *= karis_average(group4); return group0 + group1 + group2 + group3 + group4; #else var sample = (a + c + g + i) * 0.03125; sample += (b + d + f + h) * 0.0625; sample += (e + j + k + l + m) * 0.125; return sample; #endif } // [COD] slide 162 fn sample_input_3x3_tent(uv: vec2<f32>) -> vec3<f32> { // While this is probably technically incorrect, it makes nonuniform bloom smoother, without // having any impact on uniform bloom, which simply evaluates to 1.0 here. let frag_size = uniforms.scale / vec2<f32>(textureDimensions(input_texture)); let x = frag_size.x; let y = frag_size.y; let a = textureSample(input_texture, s, vec2<f32>(uv.x - x, uv.y + y)).rgb; let b = textureSample(input_texture, s, vec2<f32>(uv.x, uv.y + y)).rgb; let c = textureSample(input_texture, s, vec2<f32>(uv.x + x, uv.y + y)).rgb; let d = textureSample(input_texture, s, vec2<f32>(uv.x - x, uv.y)).rgb; let e = textureSample(input_texture, s, vec2<f32>(uv.x, uv.y)).rgb; let f = textureSample(input_texture, s, vec2<f32>(uv.x + x, uv.y)).rgb; let g = textureSample(input_texture, s, vec2<f32>(uv.x - x, uv.y - y)).rgb; let h = textureSample(input_texture, s, vec2<f32>(uv.x, uv.y - y)).rgb; let i = textureSample(input_texture, s, vec2<f32>(uv.x + x, uv.y - y)).rgb; var sample = e * 0.25; sample += (b + d + f + h) * 0.125; sample += (a + c + g + i) * 0.0625; return sample; } #ifdef FIRST_DOWNSAMPLE @fragment fn downsample_first(@location(0) output_uv: vec2<f32>) -> @location(0) vec4<f32> { let sample_uv = uniforms.viewport.xy + output_uv * uniforms.viewport.zw; var sample = sample_input_13_tap(sample_uv); // Lower bound of 0.0001 is to avoid propagating multiplying by 0.0 through the // downscaling and upscaling which would result in black boxes. // The upper bound is to prevent NaNs. // with f32::MAX (E+38) Chrome fails with ":value 340282346999999984391321947108527833088.0 cannot be represented as 'f32'" sample = clamp(sample, vec3<f32>(0.0001), vec3<f32>(3.40282347E+37)); #ifdef USE_THRESHOLD sample = soft_threshold(sample); #endif return vec4<f32>(sample, 1.0); } #endif @fragment fn downsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { return vec4<f32>(sample_input_13_tap(uv), 1.0); } @fragment fn upsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { return vec4<f32>(sample_input_3x3_tent(uv), 1.0); }