Path: blob/main/src/callbacks_step/stepsize_dg2d.jl
5586 views
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).1# Since these FMAs can increase the performance of many numerical algorithms,2# we need to opt-in explicitly.3# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.4@muladd begin5#! format: noindent67function max_dt(u, t, mesh::TreeMesh{2},8constant_speed::False, equations, dg::DG, cache)9# Avoid division by zero if the speed vanishes everywhere,10# e.g. for steady-state linear advection11max_scaled_speed = nextfloat(zero(t))1213@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)14max_lambda1 = max_lambda2 = zero(max_scaled_speed)15for j in eachnode(dg), i in eachnode(dg)16u_node = get_node_vars(u, equations, dg, i, j, element)17lambda1, lambda2 = max_abs_speeds(u_node, equations)18max_lambda1 = Base.max(max_lambda1, lambda1)19max_lambda2 = Base.max(max_lambda2, lambda2)20end21inv_jacobian = cache.elements.inverse_jacobian[element]22# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate23# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r233681232324max_scaled_speed = Base.max(max_scaled_speed,25inv_jacobian * (max_lambda1 + max_lambda2))26end2728return 2 / (nnodes(dg) * max_scaled_speed)29end3031function max_dt(u, t, mesh::TreeMesh{2},32constant_diffusivity::False, equations,33equations_parabolic::AbstractEquationsParabolic,34dg::DG, cache)35# Avoid division by zero if the diffusivity vanishes everywhere36max_scaled_speed = nextfloat(zero(t))3738@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)39max_diffusivity_ = zero(max_scaled_speed)40for j in eachnode(dg), i in eachnode(dg)41u_node = get_node_vars(u, equations, dg, i, j, element)42# Note: For the currently supported parabolic equations43# Diffusion & Navier-Stokes, we only have one diffusivity.44diffusivity = max_diffusivity(u_node, equations_parabolic)45max_diffusivity_ = max(max_diffusivity_, diffusivity)46end47inv_jacobian = cache.elements.inverse_jacobian[element] # 2 / Δx48max_scaled_speed = max(max_scaled_speed, inv_jacobian^2 * max_diffusivity_)49end5051# Factor 4 cancels with 2^2 from `inv_jacobian^2`, resulting in Δx^252return 4 / (nnodes(dg) * max_scaled_speed)53end5455function max_dt(u, t, mesh::TreeMesh{2},56constant_speed::True, equations, dg::DG, cache)57# Avoid division by zero if the speed vanishes everywhere,58# e.g. for steady-state linear advection59max_scaled_speed = nextfloat(zero(t))6061max_lambda1, max_lambda2 = max_abs_speeds(equations)6263@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)64inv_jacobian = cache.elements.inverse_jacobian[element]65# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate66# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r233681232367max_scaled_speed = Base.max(max_scaled_speed,68inv_jacobian * (max_lambda1 + max_lambda2))69end7071return 2 / (nnodes(dg) * max_scaled_speed)72end7374function max_dt(u, t, mesh::TreeMeshParallel{2},75constant_speed::False, equations, dg::DG, cache)76# call the method accepting a general `mesh::TreeMesh{2}`77# TODO: MPI, we should improve this; maybe we should dispatch on `u`78# and create some MPI array type, overloading broadcasting and mapreduce etc.79# Then, this specific array type should also work well with DiffEq etc.80dt = invoke(max_dt,81Tuple{typeof(u), typeof(t), TreeMesh{2},82typeof(constant_speed), typeof(equations), typeof(dg),83typeof(cache)},84u, t, mesh, constant_speed, equations, dg, cache)85# Base.min instead of min needed, see comment in src/auxiliary/math.jl86dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]8788return dt89end9091function max_dt(u, t, mesh::TreeMeshParallel{2},92constant_speed::True, equations, dg::DG, cache)93# call the method accepting a general `mesh::TreeMesh{2}`94# TODO: MPI, we should improve this; maybe we should dispatch on `u`95# and create some MPI array type, overloading broadcasting and mapreduce etc.96# Then, this specific array type should also work well with DiffEq etc.97dt = invoke(max_dt,98Tuple{typeof(u), typeof(t), TreeMesh{2},99typeof(constant_speed), typeof(equations), typeof(dg),100typeof(cache)},101u, t, mesh, constant_speed, equations, dg, cache)102# Base.min instead of min needed, see comment in src/auxiliary/math.jl103dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]104105return dt106end107108# Hyperbolic-parabolic simulations are not yet supported on MPI-parallel meshes.109# Thus, there is no `max_dt` function for `TreeMeshParallel{2}` and110# `equations_parabolic::AbstractEquationsParabolic` implemented.111112function max_dt(u, t,113mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},114P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}},115constant_speed, equations, dg::DG, cache)116backend = trixi_backend(u)117118max_lambda = calc_max_scaled_speed(backend, u, mesh, constant_speed, equations, dg,119cache)120121# Avoid division by zero if the speed vanishes everywhere,122# e.g. for steady-state linear advection123max_scaled_speed = Base.max(nextfloat(zero(t)), max_lambda)124125return 2 / (nnodes(dg) * max_scaled_speed)126end127128@inline function max_scaled_speed_per_element(u,129::Type{<:Union{StructuredMesh{2},130UnstructuredMesh2D,131P4estMesh{2},132T8codeMesh{2},133StructuredMeshView{2}}},134constant_speed::False, equations, dg::DG,135contravariant_vectors, inverse_jacobian,136element)137max_lambda1 = max_lambda2 = zero(eltype(u))138for j in eachnode(dg), i in eachnode(dg)139u_node = get_node_vars(u, equations, dg, i, j, element)140lambda1, lambda2 = max_abs_speeds(u_node, equations)141142# Local speeds transformed to the reference element143Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,144i, j, element)145lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2)146Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,147i, j, element)148lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2)149150inv_jacobian = abs(inverse_jacobian[i, j, element])151152max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian)153max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian)154end155return max_lambda1 + max_lambda2156end157158function max_dt(u, t,159mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`160constant_diffusivity::False, equations,161equations_parabolic::AbstractEquationsParabolic,162dg::DG, cache)163# Avoid division by zero if the diffusivity vanishes everywhere164max_scaled_diffusivity = nextfloat(zero(t))165166@unpack contravariant_vectors, inverse_jacobian = cache.elements167168@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)169max_diffusivity1 = max_diffusivity2 = zero(max_scaled_diffusivity)170for j in eachnode(dg), i in eachnode(dg)171u_node = get_node_vars(u, equations, dg, i, j, element)172diffusivity = max_diffusivity(u_node, equations_parabolic)173174# Local diffusivity transformed to the reference element175Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,176i, j, element)177# The metric terms are squared due to the second derivatives in the parabolic terms,178# which lead to application of the chain rule (coordinate transformation) twice.179# See for instance also the implementation in FLEXI180# https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77181# or FLUXO:182# https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132183diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2)184Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,185i, j, element)186diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2)187188inv_jacobian = abs(inverse_jacobian[i, j, element])189190max_diffusivity1 = Base.max(max_diffusivity1,191diffusivity1_transformed * inv_jacobian^2)192max_diffusivity2 = Base.max(max_diffusivity2,193diffusivity2_transformed * inv_jacobian^2)194end195# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate196# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323197max_scaled_diffusivity = Base.max(max_scaled_diffusivity,198max_diffusivity1 + max_diffusivity2)199end200201return 4 / (nnodes(dg) * max_scaled_diffusivity)202end203204@inline function max_scaled_speed_per_element(u,205::Type{<:Union{StructuredMesh{2},206UnstructuredMesh2D,207P4estMesh{2},208P4estMeshView{2},209T8codeMesh{2},210StructuredMeshView{2}}},211constant_speed::True, equations, dg::DG,212contravariant_vectors, inverse_jacobian,213element)214max_scaled_speed = zero(eltype(u))215max_lambda1, max_lambda2 = max_abs_speeds(equations)216for j in eachnode(dg), i in eachnode(dg)217# Local speeds transformed to the reference element218Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,219i, j, element)220lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2)221Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,222i, j, element)223lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2)224225inv_jacobian = abs(inverse_jacobian[i, j, element])226227max_scaled_speed = Base.max(max_scaled_speed,228inv_jacobian *229(lambda1_transformed + lambda2_transformed))230end231232return max_scaled_speed233end234235function max_dt(u, t,236mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`237constant_diffusivity::True, equations,238equations_parabolic::AbstractEquationsParabolic,239dg::DG, cache)240# Avoid division by zero if the diffusivity vanishes everywhere241max_scaled_diffusivity = nextfloat(zero(t))242243diffusivity = max_diffusivity(equations_parabolic)244245@unpack contravariant_vectors, inverse_jacobian = cache.elements246247@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)248max_diffusivity1 = max_diffusivity2 = zero(max_scaled_diffusivity)249for j in eachnode(dg), i in eachnode(dg)250# Local diffusivity transformed to the reference element251Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,252i, j, element)253# The metric terms are squared due to the second derivatives in the parabolic terms,254# which lead to application of the chain rule (coordinate transformation) twice.255# See for instance also the implementation in FLEXI256# https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77257# or FLUXO:258# https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132259diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2)260Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,261i, j, element)262diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2)263264inv_jacobian = abs(inverse_jacobian[i, j, element])265266max_diffusivity1 = Base.max(max_diffusivity1,267diffusivity1_transformed * inv_jacobian^2)268max_diffusivity2 = Base.max(max_diffusivity2,269diffusivity2_transformed * inv_jacobian^2)270end271# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate272# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323273max_scaled_diffusivity = Base.max(max_scaled_diffusivity,274max_diffusivity1 + max_diffusivity2)275end276277return 4 / (nnodes(dg) * max_scaled_diffusivity)278end279280function max_dt(u, t, mesh::P4estMeshParallel{2},281constant_speed::False, equations, dg::DG, cache)282# call the method accepting a general `mesh::P4estMesh{2}`283# TODO: MPI, we should improve this; maybe we should dispatch on `u`284# and create some MPI array type, overloading broadcasting and mapreduce etc.285# Then, this specific array type should also work well with DiffEq etc.286dt = invoke(max_dt,287Tuple{typeof(u), typeof(t), P4estMesh{2},288typeof(constant_speed), typeof(equations), typeof(dg),289typeof(cache)},290u, t, mesh, constant_speed, equations, dg, cache)291# Base.min instead of min needed, see comment in src/auxiliary/math.jl292dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]293294return dt295end296297function max_dt(u, t, mesh::P4estMeshParallel{2},298constant_speed::True, equations, dg::DG, cache)299# call the method accepting a general `mesh::P4estMesh{2}`300# TODO: MPI, we should improve this; maybe we should dispatch on `u`301# and create some MPI array type, overloading broadcasting and mapreduce etc.302# Then, this specific array type should also work well with DiffEq etc.303dt = invoke(max_dt,304Tuple{typeof(u), typeof(t), P4estMesh{2},305typeof(constant_speed), typeof(equations), typeof(dg),306typeof(cache)},307u, t, mesh, constant_speed, equations, dg, cache)308# Base.min instead of min needed, see comment in src/auxiliary/math.jl309dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]310311return dt312end313314function max_dt(u, t, mesh::T8codeMeshParallel{2},315constant_speed::False, equations, dg::DG, cache)316# call the method accepting a general `mesh::T8codeMesh{2}`317# TODO: MPI, we should improve this; maybe we should dispatch on `u`318# and create some MPI array type, overloading broadcasting and mapreduce etc.319# Then, this specific array type should also work well with DiffEq etc.320dt = invoke(max_dt,321Tuple{typeof(u), typeof(t), T8codeMesh{2},322typeof(constant_speed), typeof(equations), typeof(dg),323typeof(cache)},324u, t, mesh, constant_speed, equations, dg, cache)325# Base.min instead of min needed, see comment in src/auxiliary/math.jl326dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]327328return dt329end330331function max_dt(u, t, mesh::T8codeMeshParallel{2},332constant_speed::True, equations, dg::DG, cache)333# call the method accepting a general `mesh::T8codeMesh{2}`334# TODO: MPI, we should improve this; maybe we should dispatch on `u`335# and create some MPI array type, overloading broadcasting and mapreduce etc.336# Then, this specific array type should also work well with DiffEq etc.337dt = invoke(max_dt,338Tuple{typeof(u), typeof(t), T8codeMesh{2},339typeof(constant_speed), typeof(equations), typeof(dg),340typeof(cache)},341u, t, mesh, constant_speed, equations, dg, cache)342# Base.min instead of min needed, see comment in src/auxiliary/math.jl343dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]344345return dt346end347end # @muladd348349350