Path: blob/main/src/solvers/dgsem_p4est/dg_3d_parallel.jl
5616 views
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).1# Since these FMAs can increase the performance of many numerical algorithms,2# we need to opt-in explicitly.3# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.4@muladd begin5#! format: noindent67function rhs!(du, u, t,8mesh::Union{P4estMeshParallel{3}, T8codeMeshParallel{3}}, equations,9boundary_conditions, source_terms::Source,10dg::DG, cache) where {Source}11backend = trixi_backend(u)1213# Start to receive MPI data14@trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache)1516# Prolong solution to MPI interfaces17@trixi_timeit timer() "prolong2mpiinterfaces" begin18prolong2mpiinterfaces!(cache, u, mesh, equations, dg.surface_integral, dg)19end2021# Prolong solution to MPI mortars22@trixi_timeit timer() "prolong2mpimortars" begin23prolong2mpimortars!(cache, u, mesh, equations,24dg.mortar, dg)25end2627# Start to send MPI data28@trixi_timeit timer() "start MPI send" begin29start_mpi_send!(cache.mpi_cache, mesh, equations, dg, cache)30end3132# Reset du33@trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache)3435# Calculate volume integral36@trixi_timeit timer() "volume integral" begin37calc_volume_integral!(backend, du, u, mesh,38have_nonconservative_terms(equations), equations,39dg.volume_integral, dg, cache)40end4142# Prolong solution to interfaces43@trixi_timeit timer() "prolong2interfaces" begin44prolong2interfaces!(backend, cache, u, mesh, equations, dg)45end4647# Calculate interface fluxes48@trixi_timeit timer() "interface flux" begin49calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh,50have_nonconservative_terms(equations), equations,51dg.surface_integral, dg, cache)52end5354# Prolong solution to boundaries55@trixi_timeit timer() "prolong2boundaries" begin56prolong2boundaries!(cache, u, mesh, equations, dg)57end5859# Calculate boundary fluxes60@trixi_timeit timer() "boundary flux" begin61calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,62dg.surface_integral, dg)63end6465# Prolong solution to mortars66@trixi_timeit timer() "prolong2mortars" begin67prolong2mortars!(cache, u, mesh, equations,68dg.mortar, dg)69end7071# Calculate mortar fluxes72@trixi_timeit timer() "mortar flux" begin73calc_mortar_flux!(cache.elements.surface_flux_values, mesh,74have_nonconservative_terms(equations), equations,75dg.mortar, dg.surface_integral, dg, cache)76end7778# Finish to receive MPI data79@trixi_timeit timer() "finish MPI receive" begin80finish_mpi_receive!(cache.mpi_cache, mesh, equations, dg, cache)81end8283# Calculate MPI interface fluxes84@trixi_timeit timer() "MPI interface flux" begin85calc_mpi_interface_flux!(cache.elements.surface_flux_values, mesh,86have_nonconservative_terms(equations), equations,87dg.surface_integral, dg, cache)88end8990# Calculate MPI mortar fluxes91@trixi_timeit timer() "MPI mortar flux" begin92calc_mpi_mortar_flux!(cache.elements.surface_flux_values, mesh,93have_nonconservative_terms(equations), equations,94dg.mortar, dg.surface_integral, dg, cache)95end9697# Calculate surface integrals98@trixi_timeit timer() "surface integral" begin99calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg,100cache)101end102103# Apply Jacobian from mapping to reference element104@trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg,105cache)106107# Calculate source terms108@trixi_timeit timer() "source terms" begin109calc_sources!(du, u, t, source_terms, equations, dg, cache)110end111112# Finish to send MPI data113@trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache)114115return nothing116end117118function prolong2mpiinterfaces!(cache, u,119mesh::Union{P4estMeshParallel{3},120T8codeMeshParallel{3}},121equations, surface_integral, dg::DG)122@unpack mpi_interfaces = cache123index_range = eachnode(dg)124125@threaded for interface in eachmpiinterface(dg, cache)126# Copy solution data from the local element using "delayed indexing" with127# a start value and a step size to get the correct face and orientation.128# Note that in the current implementation, the interface will be129# "aligned at the primary element", i.e., the index of the primary side130# will always run forwards.131local_side = mpi_interfaces.local_sides[interface]132local_element = mpi_interfaces.local_neighbor_ids[interface]133local_indices = mpi_interfaces.node_indices[interface]134135i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1],136index_range)137j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2],138index_range)139k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3],140index_range)141142i_element = i_element_start143j_element = j_element_start144k_element = k_element_start145for j in eachnode(dg)146for i in eachnode(dg)147for v in eachvariable(equations)148mpi_interfaces.u[local_side, v, i, j, interface] = u[v, i_element,149j_element,150k_element,151local_element]152end153i_element += i_element_step_i154j_element += j_element_step_i155k_element += k_element_step_i156end157i_element += i_element_step_j158j_element += j_element_step_j159k_element += k_element_step_j160end161end162163return nothing164end165166function calc_mpi_interface_flux!(surface_flux_values,167mesh::Union{P4estMeshParallel{3},168T8codeMeshParallel{3}},169have_nonconservative_terms,170equations, surface_integral, dg::DG, cache)171@unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces172@unpack contravariant_vectors = cache.elements173index_range = eachnode(dg)174175@threaded for interface in eachmpiinterface(dg, cache)176# Get element and side index information on the local element177local_element = local_neighbor_ids[interface]178local_indices = node_indices[interface]179local_direction = indices2direction(local_indices)180local_side = local_sides[interface]181182# Create the local i,j,k indexing on the local element used to pull normal direction information183i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1],184index_range)185j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2],186index_range)187k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3],188index_range)189190i_element = i_element_start191j_element = j_element_start192k_element = k_element_start193194# Initiate the node indices to be used in the surface for loop,195# the surface flux storage must be indexed in alignment with the local element indexing196local_surface_indices = surface_indices(local_indices)197i_surface_start, i_surface_step_i, i_surface_step_j = index_to_start_step_3d(local_surface_indices[1],198index_range)199j_surface_start, j_surface_step_i, j_surface_step_j = index_to_start_step_3d(local_surface_indices[2],200index_range)201i_surface = i_surface_start202j_surface = j_surface_start203204for j in eachnode(dg)205for i in eachnode(dg)206# Get the normal direction on the local element207# Contravariant vectors at interfaces in negative coordinate direction208# are pointing inwards. This is handled by `get_normal_direction`.209normal_direction = get_normal_direction(local_direction,210contravariant_vectors,211i_element, j_element, k_element,212local_element)213214calc_mpi_interface_flux!(surface_flux_values, mesh,215have_nonconservative_terms, equations,216surface_integral, dg, cache,217interface, normal_direction,218i, j, local_side,219i_surface, j_surface, local_direction,220local_element)221222# Increment local element indices to pull the normal direction223i_element += i_element_step_i224j_element += j_element_step_i225k_element += k_element_step_i226# Increment the surface node indices along the local element227i_surface += i_surface_step_i228j_surface += j_surface_step_i229end230# Increment local element indices to pull the normal direction231i_element += i_element_step_j232j_element += j_element_step_j233k_element += k_element_step_j234# Increment the surface node indices along the local element235i_surface += i_surface_step_j236j_surface += j_surface_step_j237end238end239240return nothing241end242243# Inlined version of the interface flux computation for conservation laws244@inline function calc_mpi_interface_flux!(surface_flux_values,245mesh::Union{P4estMeshParallel{3},246T8codeMeshParallel{3}},247have_nonconservative_terms::False, equations,248surface_integral, dg::DG, cache,249interface_index, normal_direction,250interface_i_node_index,251interface_j_node_index, local_side,252surface_i_node_index, surface_j_node_index,253local_direction_index, local_element_index)254@unpack u = cache.mpi_interfaces255@unpack surface_flux = surface_integral256257u_ll, u_rr = get_surface_node_vars(u, equations, dg,258interface_i_node_index, interface_j_node_index,259interface_index)260261if local_side == 1262flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)263else # local_side == 2264flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)265end266267for v in eachvariable(equations)268surface_flux_values[v, surface_i_node_index, surface_j_node_index,269local_direction_index, local_element_index] = flux_[v]270end271272return nothing273end274275# Inlined version of the interface flux computation for non-conservative equations276@inline function calc_mpi_interface_flux!(surface_flux_values,277mesh::Union{P4estMeshParallel{3},278T8codeMeshParallel{3}},279have_nonconservative_terms::True, equations,280surface_integral, dg::DG, cache,281interface_index, normal_direction,282interface_i_node_index,283interface_j_node_index, local_side,284surface_i_node_index, surface_j_node_index,285local_direction_index, local_element_index)286@unpack u = cache.mpi_interfaces287surface_flux, nonconservative_flux = surface_integral.surface_flux288289u_ll, u_rr = get_surface_node_vars(u, equations, dg,290interface_i_node_index, interface_j_node_index,291interface_index)292293# Compute flux and non-conservative term for this side of the interface294if local_side == 1295flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)296noncons_flux_ = nonconservative_flux(u_ll, u_rr, normal_direction, equations)297else # local_side == 2298flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)299noncons_flux_ = -nonconservative_flux(u_rr, u_ll, -normal_direction, equations)300end301302for v in eachvariable(equations)303surface_flux_values[v, surface_i_node_index, surface_j_node_index,304local_direction_index, local_element_index] = flux_[v] +3050.5f0 * noncons_flux_[v]306end307308return nothing309end310311function prolong2mpimortars!(cache, u,312mesh::Union{P4estMeshParallel{3}, T8codeMeshParallel{3}},313equations,314mortar_l2::LobattoLegendreMortarL2,315dg::DGSEM)316@unpack node_indices = cache.mpi_mortars317index_range = eachnode(dg)318319@threaded for mortar in eachmpimortar(dg, cache)320local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar]321local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar]322323# Get start value and step size for indices on both sides to get the correct face324# and orientation325small_indices = node_indices[1, mortar]326i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],327index_range)328j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],329index_range)330k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],331index_range)332333large_indices = node_indices[2, mortar]334i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1],335index_range)336j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2],337index_range)338k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3],339index_range)340341for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)342if position == 5 # -> large element343# Buffer to copy solution values of the large element in the correct orientation344# before interpolating345u_buffer = cache.u_threaded[Threads.threadid()]346# temporary buffer for projections347fstar_tmp = cache.fstar_tmp_threaded[Threads.threadid()]348349i_large = i_large_start350j_large = j_large_start351k_large = k_large_start352for j in eachnode(dg)353for i in eachnode(dg)354for v in eachvariable(equations)355u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element]356end357358i_large += i_large_step_i359j_large += j_large_step_i360k_large += k_large_step_i361end362i_large += i_large_step_j363j_large += j_large_step_j364k_large += k_large_step_j365end366367# Interpolate large element face data from buffer to small face locations368multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, :,369mortar),370mortar_l2.forward_lower,371mortar_l2.forward_lower,372u_buffer,373fstar_tmp)374multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, :,375mortar),376mortar_l2.forward_upper,377mortar_l2.forward_lower,378u_buffer,379fstar_tmp)380multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 3, :, :,381mortar),382mortar_l2.forward_lower,383mortar_l2.forward_upper,384u_buffer,385fstar_tmp)386multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 4, :, :,387mortar),388mortar_l2.forward_upper,389mortar_l2.forward_upper,390u_buffer,391fstar_tmp)392else # position in (1, 2, 3, 4) -> small element393# Copy solution data from the small elements394i_small = i_small_start395j_small = j_small_start396k_small = k_small_start397for j in eachnode(dg)398for i in eachnode(dg)399for v in eachvariable(equations)400cache.mpi_mortars.u[1, v, position, i, j, mortar] = u[v,401i_small,402j_small,403k_small,404element]405end406i_small += i_small_step_i407j_small += j_small_step_i408k_small += k_small_step_i409end410i_small += i_small_step_j411j_small += j_small_step_j412k_small += k_small_step_j413end414end415end416end417418return nothing419end420421function calc_mpi_mortar_flux!(surface_flux_values,422mesh::Union{P4estMeshParallel{3}, T8codeMeshParallel{3}},423have_nonconservative_terms, equations,424mortar_l2::LobattoLegendreMortarL2,425surface_integral, dg::DG, cache)426@unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars427@unpack contravariant_vectors = cache.elements428@unpack fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded = cache429index_range = eachnode(dg)430431@threaded for mortar in eachmpimortar(dg, cache)432# Choose thread-specific pre-allocated container433fstar_primary = fstar_primary_threaded[Threads.threadid()]434fstar_secondary = fstar_secondary_threaded[Threads.threadid()]435fstar_tmp = fstar_tmp_threaded[Threads.threadid()]436437# Get index information on the small elements438small_indices = node_indices[1, mortar]439440i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],441index_range)442j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],443index_range)444k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],445index_range)446447for position in 1:4448i_small = i_small_start449j_small = j_small_start450k_small = k_small_start451for j in eachnode(dg)452for i in eachnode(dg)453# Get the normal direction on the small element.454normal_direction = get_normal_direction(cache.mpi_mortars, i, j,455position, mortar)456457calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, mesh,458have_nonconservative_terms, equations,459surface_integral, dg, cache,460mortar, position, normal_direction,461i, j)462463i_small += i_small_step_i464j_small += j_small_step_i465k_small += k_small_step_i466end467end468i_small += i_small_step_j469j_small += j_small_step_j470k_small += k_small_step_j471end472473# Buffer to interpolate flux values of the large element to before474# copying in the correct orientation475u_buffer = cache.u_threaded[Threads.threadid()]476477mpi_mortar_fluxes_to_elements!(surface_flux_values,478mesh, equations, mortar_l2, dg, cache,479mortar, fstar_primary, fstar_secondary, u_buffer,480fstar_tmp)481end482483return nothing484end485486# Inlined version of the mortar flux computation on small elements for conservation laws487@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary,488mesh::Union{P4estMeshParallel{3},489T8codeMeshParallel{3}},490have_nonconservative_terms::False, equations,491surface_integral, dg::DG, cache,492mortar_index, position_index, normal_direction,493i_node_index, j_node_index)494@unpack u = cache.mpi_mortars495@unpack surface_flux = surface_integral496497u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index,498i_node_index, j_node_index, mortar_index)499500flux = surface_flux(u_ll, u_rr, normal_direction, equations)501502# Copy flux to buffer503set_node_vars!(fstar_primary, flux, equations, dg,504i_node_index, j_node_index, position_index)505set_node_vars!(fstar_secondary, flux, equations, dg,506i_node_index, j_node_index, position_index)507508return nothing509end510511# Inlined version of the mortar flux computation on small elements for non-conservative equations512@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary,513mesh::Union{P4estMeshParallel{3},514T8codeMeshParallel{3}},515have_nonconservative_terms::True, equations,516surface_integral, dg::DG, cache,517mortar_index, position_index, normal_direction,518i_node_index, j_node_index)519@unpack u = cache.mpi_mortars520surface_flux, nonconservative_flux = surface_integral.surface_flux521522u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index,523j_node_index, mortar_index)524525flux = surface_flux(u_ll, u_rr, normal_direction, equations)526noncons_flux_primary = nonconservative_flux(u_ll, u_rr, normal_direction, equations)527noncons_flux_secondary = nonconservative_flux(u_rr, u_ll, normal_direction,528equations)529530for v in eachvariable(equations)531fstar_primary[v, i_node_index, j_node_index, position_index] = flux[v] +5320.5f0 *533noncons_flux_primary[v]534fstar_secondary[v, i_node_index, j_node_index, position_index] = flux[v] +5350.5f0 *536noncons_flux_secondary[v]537end538539return nothing540end541542@inline function mpi_mortar_fluxes_to_elements!(surface_flux_values,543mesh::Union{P4estMeshParallel{3},544T8codeMeshParallel{3}},545equations,546mortar_l2::LobattoLegendreMortarL2,547dg::DGSEM, cache, mortar, fstar_primary,548fstar_secondary,549u_buffer, fstar_tmp)550@unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars551index_range = eachnode(dg)552553small_indices = node_indices[1, mortar]554small_direction = indices2direction(small_indices)555large_indices = node_indices[2, mortar]556large_direction = indices2direction(large_indices)557large_surface_indices = surface_indices(large_indices)558559i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1],560index_range)561j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2],562index_range)563564for (element, position) in zip(local_neighbor_ids[mortar],565local_neighbor_positions[mortar])566if position == 5 # -> large element567# Project small fluxes to large element.568multiply_dimensionwise!(u_buffer,569mortar_l2.reverse_lower, mortar_l2.reverse_lower,570view(fstar_secondary, .., 1),571fstar_tmp)572add_multiply_dimensionwise!(u_buffer,573mortar_l2.reverse_upper,574mortar_l2.reverse_lower,575view(fstar_secondary, .., 2),576fstar_tmp)577add_multiply_dimensionwise!(u_buffer,578mortar_l2.reverse_lower,579mortar_l2.reverse_upper,580view(fstar_secondary, .., 3),581fstar_tmp)582add_multiply_dimensionwise!(u_buffer,583mortar_l2.reverse_upper,584mortar_l2.reverse_upper,585view(fstar_secondary, .., 4),586fstar_tmp)587# The flux is calculated in the outward direction of the small elements,588# so the sign must be switched to get the flux in outward direction589# of the large element.590# The contravariant vectors of the large element (and therefore the normal591# vectors of the large element as well) are four times as large as the592# contravariant vectors of the small elements. Therefore, the flux needs593# to be scaled by a factor of 4 to obtain the flux of the large element.594u_buffer .*= -4595# Copy interpolated flux values from buffer to large element face in the596# correct orientation.597# Note that the index of the small sides will always run forward but598# the index of the large side might need to run backwards for flipped sides.599i_large = i_large_start600j_large = j_large_start601for j in eachnode(dg)602for i in eachnode(dg)603for v in eachvariable(equations)604surface_flux_values[v, i_large, j_large, large_direction, element] = u_buffer[v,605i,606j]607end608i_large += i_large_step_i609j_large += j_large_step_i610end611i_large += i_large_step_j612j_large += j_large_step_j613end614else # position in (1, 2, 3, 4) -> small element615# Copy solution small to small616for j in eachnode(dg)617for i in eachnode(dg)618for v in eachvariable(equations)619surface_flux_values[v, i, j, small_direction, element] = fstar_primary[v,620i,621j,622position]623end624end625end626end627end628629return nothing630end631end # muladd632633634