Path: blob/main/src/callbacks_step/stepsize_dg3d.jl
5586 views
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).1# Since these FMAs can increase the performance of many numerical algorithms,2# we need to opt-in explicitly.3# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.4@muladd begin5#! format: noindent67function max_dt(u, t, mesh::TreeMesh{3},8constant_speed::False, equations, dg::DG, cache)9# Avoid division by zero if the speed vanishes everywhere,10# e.g. for steady-state linear advection11max_scaled_speed = nextfloat(zero(t))1213@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)14max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)15for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)16u_node = get_node_vars(u, equations, dg, i, j, k, element)17lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)18max_lambda1 = Base.max(max_lambda1, lambda1)19max_lambda2 = Base.max(max_lambda2, lambda2)20max_lambda3 = Base.max(max_lambda3, lambda3)21end22inv_jacobian = cache.elements.inverse_jacobian[element]23# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate24# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r233681232325max_scaled_speed = Base.max(max_scaled_speed,26inv_jacobian *27(max_lambda1 + max_lambda2 + max_lambda3))28end2930return 2 / (nnodes(dg) * max_scaled_speed)31end3233function max_dt(u, t, mesh::TreeMesh{3},34constant_diffusivity::False, equations,35equations_parabolic::AbstractEquationsParabolic,36dg::DG, cache)37# Avoid division by zero if the diffusivity vanishes everywhere38max_scaled_diffusivity = nextfloat(zero(t))3940@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)41max_diffusivity_ = zero(max_scaled_diffusivity)42for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)43u_node = get_node_vars(u, equations, dg, i, j, k, element)44# Note: For the currently supported parabolic equations45# Diffusion & Navier-Stokes, we only have one diffusivity.46diffusivity = max_diffusivity(u_node, equations_parabolic)47max_diffusivity_ = max(max_diffusivity_, diffusivity)48end49inv_jacobian = cache.elements.inverse_jacobian[element] # 2 / Δx50max_scaled_diffusivity = max(max_scaled_diffusivity,51inv_jacobian^2 * max_diffusivity_)52end5354# Factor 4 cancels with 2^2 from `inv_jacobian^2`, resulting in Δx^255return 4 / (nnodes(dg) * max_scaled_diffusivity)56end5758function max_dt(u, t, mesh::TreeMesh{3},59constant_speed::True, equations, dg::DG, cache)60# Avoid division by zero if the speed vanishes everywhere,61# e.g. for steady-state linear advection62max_scaled_speed = nextfloat(zero(t))6364max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)6566@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)67inv_jacobian = cache.elements.inverse_jacobian[element]68# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate69# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r233681232370max_scaled_speed = Base.max(max_scaled_speed,71inv_jacobian *72(max_lambda1 + max_lambda2 + max_lambda3))73end7475return 2 / (nnodes(dg) * max_scaled_speed)76end7778function max_dt(u, t,79mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},80constant_speed, equations, dg::DG, cache)81backend = trixi_backend(u)8283max_lambda = calc_max_scaled_speed(backend, u, mesh, constant_speed, equations, dg,84cache)8586# Avoid division by zero if the speed vanishes everywhere,87# e.g. for steady-state linear advection88max_scaled_speed = Base.max(nextfloat(zero(t)), max_lambda)8990return 2 / (nnodes(dg) * max_scaled_speed)91end9293@inline function max_scaled_speed_per_element(u,94::Type{<:Union{StructuredMesh{3},95P4estMesh{3},96T8codeMesh{3}}},97constant_speed::False, equations, dg,98contravariant_vectors, inverse_jacobian,99element)100max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u))101for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)102u_node = get_node_vars(u, equations, dg, i, j, k, element)103lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)104105Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,106i, j, k, element)107lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3)108Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,109i, j, k, element)110lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3)111Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,112i, j, k, element)113lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3)114115inv_jacobian = abs(inverse_jacobian[i, j, k, element])116117max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed)118max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed)119max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed)120end121return max_lambda1 + max_lambda2 + max_lambda3122end123124function max_dt(u, t,125mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`126constant_diffusivity::False, equations,127equations_parabolic::AbstractEquationsParabolic,128dg::DG, cache)129# Avoid division by zero if the diffusivity vanishes everywhere130max_scaled_diffusivity = nextfloat(zero(t))131132@unpack contravariant_vectors, inverse_jacobian = cache.elements133134@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)135max_diffusivity1 = max_diffusivity2 = max_diffusivity3 = zero(max_scaled_diffusivity)136for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)137u_node = get_node_vars(u, equations, dg, i, j, k, element)138diffusivity = max_diffusivity(u_node, equations_parabolic)139140# Local diffusivity transformed to the reference element141Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,142i, j, k, element)143# The metric terms are squared due to the second derivatives in the parabolic terms,144# which lead to application of the chain rule (coordinate transformation) twice.145# See for instance also the implementation in FLEXI146# https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77147# or FLUXO:148# https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132149diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2 + Ja13^2)150Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,151i, j, k, element)152diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2 + Ja23^2)153Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,154i, j, k, element)155diffusivity3_transformed = diffusivity * (Ja31^2 + Ja32^2 + Ja33^2)156157inv_jacobian = abs(inverse_jacobian[i, j, k, element])158159max_diffusivity1 = Base.max(max_diffusivity1,160diffusivity1_transformed * inv_jacobian^2)161max_diffusivity2 = Base.max(max_diffusivity2,162diffusivity2_transformed * inv_jacobian^2)163max_diffusivity3 = Base.max(max_diffusivity3,164diffusivity3_transformed * inv_jacobian^2)165end166# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate167# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323168max_scaled_diffusivity = Base.max(max_scaled_diffusivity,169max_diffusivity1 + max_diffusivity2 +170max_diffusivity3)171end172173return 4 / (nnodes(dg) * max_scaled_diffusivity)174end175176@inline function max_scaled_speed_per_element(u,177::Type{<:Union{StructuredMesh{3},178P4estMesh{3},179T8codeMesh{3}}},180constant_speed::True, equations, dg::DG,181contravariant_vectors, inverse_jacobian,182element)183max_scaled_speed = zero(eltype(u))184max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)185for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)186Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,187i, j, k, element)188lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 +189Ja13 * max_lambda3)190Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,191i, j, k, element)192lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 +193Ja23 * max_lambda3)194Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,195i, j, k, element)196lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 +197Ja33 * max_lambda3)198199inv_jacobian = abs(inverse_jacobian[i, j, k, element])200201# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate202# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323203max_scaled_speed = Base.max(max_scaled_speed,204inv_jacobian *205(lambda1_transformed + lambda2_transformed +206lambda3_transformed))207end208209return max_scaled_speed210end211212function max_dt(u, t,213mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`214constant_diffusivity::True, equations,215equations_parabolic::AbstractEquationsParabolic,216dg::DG, cache)217# Avoid division by zero if the diffusivity vanishes everywhere218max_scaled_diffusivity = nextfloat(zero(t))219220diffusivity = max_diffusivity(equations_parabolic)221222@unpack contravariant_vectors, inverse_jacobian = cache.elements223224@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)225max_diffusivity1 = max_diffusivity2 = max_diffusivity3 = zero(max_scaled_diffusivity)226for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)227# Local diffusivity transformed to the reference element228Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,229i, j, k, element)230# The metric terms are squared due to the second derivatives in the parabolic terms,231# which lead to application of the chain rule (coordinate transformation) twice.232# See for instance also the implementation in FLEXI233# https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77234# or FLUXO:235# https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132236diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2 + Ja13^2)237Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,238i, j, k, element)239diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2 + Ja23^2)240Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,241i, j, k, element)242diffusivity3_transformed = diffusivity * (Ja31^2 + Ja32^2 + Ja33^2)243244inv_jacobian = abs(inverse_jacobian[i, j, k, element])245246max_diffusivity1 = Base.max(max_diffusivity1,247diffusivity1_transformed * inv_jacobian^2)248max_diffusivity2 = Base.max(max_diffusivity2,249diffusivity2_transformed * inv_jacobian^2)250max_diffusivity3 = Base.max(max_diffusivity3,251diffusivity3_transformed * inv_jacobian^2)252end253# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate254# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323255max_scaled_diffusivity = Base.max(max_scaled_diffusivity,256max_diffusivity1 + max_diffusivity2 +257max_diffusivity3)258end259260return 4 / (nnodes(dg) * max_scaled_diffusivity)261end262263function max_dt(u, t, mesh::P4estMeshParallel{3},264constant_speed::False, equations, dg::DG, cache)265# call the method accepting a general `mesh::P4estMesh{3}`266# TODO: MPI, we should improve this; maybe we should dispatch on `u`267# and create some MPI array type, overloading broadcasting and mapreduce etc.268# Then, this specific array type should also work well with DiffEq etc.269dt = invoke(max_dt,270Tuple{typeof(u), typeof(t), P4estMesh{3},271typeof(constant_speed), typeof(equations), typeof(dg),272typeof(cache)},273u, t, mesh, constant_speed, equations, dg, cache)274# Base.min instead of min needed, see comment in src/auxiliary/math.jl275dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]276277return dt278end279280function max_dt(u, t, mesh::P4estMeshParallel{3},281constant_speed::True, equations, dg::DG, cache)282# call the method accepting a general `mesh::P4estMesh{3}`283# TODO: MPI, we should improve this; maybe we should dispatch on `u`284# and create some MPI array type, overloading broadcasting and mapreduce etc.285# Then, this specific array type should also work well with DiffEq etc.286dt = invoke(max_dt,287Tuple{typeof(u), typeof(t), P4estMesh{3},288typeof(constant_speed), typeof(equations), typeof(dg),289typeof(cache)},290u, t, mesh, constant_speed, equations, dg, cache)291# Base.min instead of min needed, see comment in src/auxiliary/math.jl292dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]293294return dt295end296297function max_dt(u, t, mesh::T8codeMeshParallel{3},298constant_speed::False, equations, dg::DG, cache)299# call the method accepting a general `mesh::T8codeMesh{3}`300# TODO: MPI, we should improve this; maybe we should dispatch on `u`301# and create some MPI array type, overloading broadcasting and mapreduce etc.302# Then, this specific array type should also work well with DiffEq etc.303dt = invoke(max_dt,304Tuple{typeof(u), typeof(t), T8codeMesh{3},305typeof(constant_speed), typeof(equations), typeof(dg),306typeof(cache)},307u, t, mesh, constant_speed, equations, dg, cache)308# Base.min instead of min needed, see comment in src/auxiliary/math.jl309dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]310311return dt312end313314function max_dt(u, t, mesh::T8codeMeshParallel{3},315constant_speed::True, equations, dg::DG, cache)316# call the method accepting a general `mesh::T8codeMesh{3}`317# TODO: MPI, we should improve this; maybe we should dispatch on `u`318# and create some MPI array type, overloading broadcasting and mapreduce etc.319# Then, this specific array type should also work well with DiffEq etc.320dt = invoke(max_dt,321Tuple{typeof(u), typeof(t), T8codeMesh{3},322typeof(constant_speed), typeof(equations), typeof(dg),323typeof(cache)},324u, t, mesh, constant_speed, equations, dg, cache)325# Base.min instead of min needed, see comment in src/auxiliary/math.jl326dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]327328return dt329end330end # @muladd331332333