Path: blob/main/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
5591 views
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).1# Since these FMAs can increase the performance of many numerical algorithms,2# we need to opt-in explicitly.3# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.4@muladd begin5#! format: noindent67function create_cache_subcell_limiting(mesh::Union{TreeMesh{3}, P4estMesh{3}},8equations,9volume_integral::VolumeIntegralSubcellLimiting,10dg::DG, cache_containers, uEltype)11cache = NamedTuple()1213fhat1_L_threaded, fhat1_R_threaded,14fhat2_L_threaded, fhat2_R_threaded,15fhat3_L_threaded, fhat3_R_threaded = create_f_threaded(mesh, equations, dg, uEltype)1617A4d = Array{uEltype, 4}18flux_temp_threaded = A4d[A4d(undef, nvariables(equations),19nnodes(dg), nnodes(dg), nnodes(dg))20for _ in 1:Threads.maxthreadid()]21fhat_temp_threaded = A4d[A4d(undef, nvariables(equations),22nnodes(dg), nnodes(dg), nnodes(dg))23for _ in 1:Threads.maxthreadid()]2425n_elements = nelements(cache_containers.elements)26antidiffusive_fluxes = ContainerAntidiffusiveFlux3D{uEltype}(n_elements,27nvariables(equations),28nnodes(dg))2930if have_nonconservative_terms(equations) == true31A5d = Array{uEltype, 5}32# Extract the nonconservative flux as a dispatch argument for `n_nonconservative_terms`33_, volume_flux_noncons = volume_integral.volume_flux_dg3435flux_nonconservative_temp_threaded = A5d[A5d(undef, nvariables(equations),36n_nonconservative_terms(volume_flux_noncons),37nnodes(dg), nnodes(dg),38nnodes(dg))39for _ in 1:Threads.maxthreadid()]40fhat_nonconservative_temp_threaded = A5d[A5d(undef, nvariables(equations),41n_nonconservative_terms(volume_flux_noncons),42nnodes(dg), nnodes(dg),43nnodes(dg))44for _ in 1:Threads.maxthreadid()]45phi_threaded = A5d[A5d(undef, nvariables(equations),46n_nonconservative_terms(volume_flux_noncons),47nnodes(dg), nnodes(dg), nnodes(dg))48for _ in 1:Threads.maxthreadid()]49cache = (; cache..., flux_nonconservative_temp_threaded,50fhat_nonconservative_temp_threaded, phi_threaded)51end5253# The limiter cache was created with 0 elements54resize_subcell_limiter_cache!(dg.volume_integral.limiter, n_elements)5556return (; cache..., antidiffusive_fluxes,57fhat1_L_threaded, fhat1_R_threaded,58fhat2_L_threaded, fhat2_R_threaded,59fhat3_L_threaded, fhat3_R_threaded,60flux_temp_threaded, fhat_temp_threaded)61end6263# Subcell limiting currently only implemented for certain mesh types64@inline function volume_integral_kernel!(du, u, element,65MeshT::Type{<:Union{TreeMesh{3}, P4estMesh{3}}},66nonconservative_terms, equations,67volume_integral::VolumeIntegralSubcellLimiting,68dg::DGSEM, cache)69@unpack inverse_weights = dg.basis # Plays role of DG subcell sizes70@unpack volume_flux_dg, volume_flux_fv, limiter = volume_integral7172# high-order DG fluxes73@unpack fhat1_L_threaded, fhat1_R_threaded, fhat2_L_threaded, fhat2_R_threaded, fhat3_L_threaded, fhat3_R_threaded = cache7475fhat1_L = fhat1_L_threaded[Threads.threadid()]76fhat1_R = fhat1_R_threaded[Threads.threadid()]77fhat2_L = fhat2_L_threaded[Threads.threadid()]78fhat2_R = fhat2_R_threaded[Threads.threadid()]79fhat3_L = fhat3_L_threaded[Threads.threadid()]80fhat3_R = fhat3_R_threaded[Threads.threadid()]81calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R,82u, MeshT, nonconservative_terms, equations, volume_flux_dg,83dg, element, cache)8485# low-order FV fluxes86@unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache8788fstar1_L = fstar1_L_threaded[Threads.threadid()]89fstar1_R = fstar1_R_threaded[Threads.threadid()]90fstar2_L = fstar2_L_threaded[Threads.threadid()]91fstar2_R = fstar2_R_threaded[Threads.threadid()]92fstar3_L = fstar3_L_threaded[Threads.threadid()]93fstar3_R = fstar3_R_threaded[Threads.threadid()]94calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R,95u, MeshT, nonconservative_terms, equations, volume_flux_fv,96dg, element, cache)9798# antidiffusive flux99calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R,100fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R,101u, MeshT, nonconservative_terms, equations, limiter,102dg, element, cache)103104# Calculate volume integral contribution of low-order FV flux105for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)106for v in eachvariable(equations)107du[v, i, j, k, element] += inverse_weights[i] *108(fstar1_L[v, i + 1, j, k] - fstar1_R[v, i, j, k]) +109inverse_weights[j] *110(fstar2_L[v, i, j + 1, k] - fstar2_R[v, i, j, k]) +111inverse_weights[k] *112(fstar3_L[v, i, j, k + 1] - fstar3_R[v, i, j, k])113end114end115116return nothing117end118119# Calculate the DG staggered volume fluxes `fhat` in subcell FV-form inside the element120# (**without non-conservative terms**).121#122# See also `flux_differencing_kernel!`.123@inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R,124u, ::Type{<:TreeMesh{3}},125have_nonconservative_terms::False, equations,126volume_flux, dg::DGSEM, element, cache)127@unpack weights, derivative_split = dg.basis128@unpack flux_temp_threaded = cache129130flux_temp = flux_temp_threaded[Threads.threadid()]131132# The FV-form fluxes are calculated in a recursive manner, i.e.:133# fhat_(0,1) = w_0 * FVol_0,134# fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j, for j=1,...,N-1,135# with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).136137# To use the symmetry of the `volume_flux`, the split form volume flux is precalculated138# like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`139# and saved in in `flux_temp`.140141# Split form volume flux in orientation 1: x direction142flux_temp .= zero(eltype(flux_temp))143144for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)145u_node = get_node_vars(u, equations, dg, i, j, k, element)146147# All diagonal entries of `derivative_split` are zero. Thus, we can skip148# the computation of the diagonal terms. In addition, we use the symmetry149# of the `volume_flux` to save half of the possible two-point flux150# computations.151for ii in (i + 1):nnodes(dg)152u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)153flux1 = volume_flux(u_node, u_node_ii, 1, equations)154multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], flux1,155equations, dg, i, j, k)156multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], flux1,157equations, dg, ii, j, k)158end159end160161# FV-form flux `fhat` in x direction162for k in eachnode(dg), j in eachnode(dg), i in 1:(nnodes(dg) - 1)163for v in eachvariable(equations)164fhat1_L[v, i + 1, j, k] = fhat1_L[v, i, j, k] +165weights[i] * flux_temp[v, i, j, k]166fhat1_R[v, i + 1, j, k] = fhat1_L[v, i + 1, j, k]167end168end169170# Split form volume flux in orientation 2: y direction171flux_temp .= zero(eltype(flux_temp))172173for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)174u_node = get_node_vars(u, equations, dg, i, j, k, element)175for jj in (j + 1):nnodes(dg)176u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)177flux2 = volume_flux(u_node, u_node_jj, 2, equations)178multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], flux2,179equations, dg, i, j, k)180multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], flux2,181equations, dg, i, jj, k)182end183end184185# FV-form flux `fhat` in y direction186for k in eachnode(dg), j in 1:(nnodes(dg) - 1), i in eachnode(dg)187for v in eachvariable(equations)188fhat2_L[v, i, j + 1, k] = fhat2_L[v, i, j, k] +189weights[j] * flux_temp[v, i, j, k]190fhat2_R[v, i, j + 1, k] = fhat2_L[v, i, j + 1, k]191end192end193194# Split form volume flux in orientation 3: z direction195flux_temp .= zero(eltype(flux_temp))196197for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)198u_node = get_node_vars(u, equations, dg, i, j, k, element)199for kk in (k + 1):nnodes(dg)200u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)201flux3 = volume_flux(u_node, u_node_kk, 3, equations)202multiply_add_to_node_vars!(flux_temp, derivative_split[k, kk], flux3,203equations, dg, i, j, k)204multiply_add_to_node_vars!(flux_temp, derivative_split[kk, k], flux3,205equations, dg, i, j, kk)206end207end208209# FV-form flux `fhat` in z direction210for k in 1:(nnodes(dg) - 1), j in eachnode(dg), i in eachnode(dg)211for v in eachvariable(equations)212fhat3_L[v, i, j, k + 1] = fhat3_L[v, i, j, k] +213weights[k] * flux_temp[v, i, j, k]214fhat3_R[v, i, j, k + 1] = fhat3_L[v, i, j, k + 1]215end216end217218return nothing219end220221# Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems.222@inline function calcflux_antidiffusive!(fhat1_L, fhat1_R,223fhat2_L, fhat2_R,224fhat3_L, fhat3_R,225fstar1_L, fstar1_R,226fstar2_L, fstar2_R,227fstar3_L, fstar3_R,228u, ::Type{<:Union{TreeMesh{3}, P4estMesh{3}}},229nonconservative_terms::False, equations,230limiter::SubcellLimiterIDP, dg, element, cache)231@unpack antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes232233# Due to the use of LGL nodes, the DG staggered fluxes `fhat` and FV fluxes `fstar` are equal234# on the element interfaces. So, they are not computed in the volume integral and set to zero235# in their respective computation.236# The antidiffusive fluxes are therefore zero on the element interfaces and don't need to be237# computed either. They are set to zero directly after resizing the container.238# This applies to the indices `i=1` and `i=nnodes(dg)+1` for `antidiffusive_flux1_L` and239# `antidiffusive_flux1_R` and analogously for the other two directions.240241for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)242for v in eachvariable(equations)243antidiffusive_flux1_L[v, i, j, k, element] = fhat1_L[v, i, j, k] -244fstar1_L[v, i, j, k]245antidiffusive_flux1_R[v, i, j, k, element] = antidiffusive_flux1_L[v,246i, j, k,247element]248end249end250for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)251for v in eachvariable(equations)252antidiffusive_flux2_L[v, i, j, k, element] = fhat2_L[v, i, j, k] -253fstar2_L[v, i, j, k]254antidiffusive_flux2_R[v, i, j, k, element] = antidiffusive_flux2_L[v,255i, j, k,256element]257end258end259for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)260for v in eachvariable(equations)261antidiffusive_flux3_L[v, i, j, k, element] = fhat3_L[v, i, j, k] -262fstar3_L[v, i, j, k]263antidiffusive_flux3_R[v, i, j, k, element] = antidiffusive_flux3_L[v,264i, j, k,265element]266end267end268269return nothing270end271272# Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems.273@inline function calcflux_antidiffusive!(fhat1_L, fhat1_R,274fhat2_L, fhat2_R,275fhat3_L, fhat3_R,276fstar1_L, fstar1_R,277fstar2_L, fstar2_R,278fstar3_L, fstar3_R,279u, ::Type{<:Union{TreeMesh{3}, P4estMesh{3}}},280nonconservative_terms::True, equations,281limiter::SubcellLimiterIDP, dg, element, cache)282@unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes283284# Due to the use of LGL nodes, the DG staggered fluxes `fhat` and FV fluxes `fstar` are equal285# on the element interfaces. So, they are not computed in the volume integral and set to zero286# in their respective computation.287# The antidiffusive fluxes are therefore zero on the element interfaces and don't need to be288# computed either. They are set to zero directly after resizing the container.289# This applies to the indices `i=1` and `i=nnodes(dg)+1` for `antidiffusive_flux1_L` and290# `antidiffusive_flux1_R` and analogously for the other two directions.291292for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)293for v in eachvariable(equations)294antidiffusive_flux1_L[v, i, j, k, element] = fhat1_L[v, i, j, k] -295fstar1_L[v, i, j, k]296antidiffusive_flux1_R[v, i, j, k, element] = fhat1_R[v, i, j, k] -297fstar1_R[v, i, j, k]298end299end300for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)301for v in eachvariable(equations)302antidiffusive_flux2_L[v, i, j, k, element] = fhat2_L[v, i, j, k] -303fstar2_L[v, i, j, k]304antidiffusive_flux2_R[v, i, j, k, element] = fhat2_R[v, i, j, k] -305fstar2_R[v, i, j, k]306end307end308for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)309for v in eachvariable(equations)310antidiffusive_flux3_L[v, i, j, k, element] = fhat3_L[v, i, j, k] -311fstar3_L[v, i, j, k]312antidiffusive_flux3_R[v, i, j, k, element] = fhat3_R[v, i, j, k] -313fstar3_R[v, i, j, k]314end315end316317return nothing318end319end # @muladd320321322