Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
trixi-framework
GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/solvers/dgsem_p4est/dg_parallel.jl
5616 views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7
8
mutable struct P4estMPICache{BufferType <: DenseVector,
9
VecInt <: DenseVector{<:Integer}}
10
mpi_neighbor_ranks::Vector{Int}
11
mpi_neighbor_interfaces::VecOfArrays{VecInt}
12
mpi_neighbor_mortars::VecOfArrays{VecInt}
13
mpi_send_buffers::VecOfArrays{BufferType}
14
mpi_recv_buffers::VecOfArrays{BufferType}
15
mpi_send_requests::Vector{MPI.Request}
16
mpi_recv_requests::Vector{MPI.Request}
17
n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}}
18
n_elements_global::Int
19
first_element_global_id::Int
20
end
21
22
function P4estMPICache(uEltype)
23
# MPI communication "just works" for bitstypes only
24
if !isbitstype(uEltype)
25
throw(ArgumentError("P4estMPICache only supports bitstypes, $uEltype is not a bitstype."))
26
end
27
28
mpi_neighbor_ranks = Vector{Int}(undef, 0)
29
mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays
30
mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays
31
mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays
32
mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays
33
mpi_send_requests = Vector{MPI.Request}(undef, 0)
34
mpi_recv_requests = Vector{MPI.Request}(undef, 0)
35
n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1)
36
n_elements_global = 0
37
first_element_global_id = 0
38
39
return P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks,
40
mpi_neighbor_interfaces,
41
mpi_neighbor_mortars,
42
mpi_send_buffers,
43
mpi_recv_buffers,
44
mpi_send_requests,
45
mpi_recv_requests,
46
n_elements_by_rank,
47
n_elements_global,
48
first_element_global_id)
49
end
50
51
@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType)
52
53
# @eval due to @muladd
54
@eval Adapt.@adapt_structure(P4estMPICache)
55
56
##
57
# Note that the code in `start_mpi_send`/`finish_mpi_receive!` is sensitive to inference on (at least) Julia 1.10.
58
# Julia's inference is bi-stable, it can sometimes depend on what code has been looked at already, and
59
# the presence of an inference result in the cache can have an impact on the inference of code.
60
# In this case the `send_buffer[first:last] .= vec(cache.mpi_mortars.u[2, :, :, ..,mortar])`,
61
# can fail to be inferred due to heuristics if this function is not in the cache...
62
precompile(Base.reindex,
63
(Tuple{Base.Slice{Base.OneTo{Int64}}, Int64, Base.Slice{Base.OneTo{Int64}},
64
Base.Slice{Base.OneTo{Int64}}, Int64}, Tuple{Int64, Int64, Int64}))
65
66
function start_mpi_send!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
67
data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
68
n_small_elements = 2^(ndims(mesh) - 1)
69
70
for rank in 1:length(mpi_cache.mpi_neighbor_ranks)
71
send_buffer = mpi_cache.mpi_send_buffers[rank]
72
73
for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[rank])
74
first = (index - 1) * data_size + 1
75
last = (index - 1) * data_size + data_size
76
local_side = cache.mpi_interfaces.local_sides[interface]
77
@views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[local_side, ..,
78
interface])
79
end
80
81
# Set send_buffer corresponding to mortar data to NaN and overwrite the parts where local
82
# data exists
83
interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[rank]) *
84
data_size
85
mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[rank]) *
86
n_small_elements * 2 * data_size
87
# `NaN |> eltype(...)` ensures that the NaN's are of the appropriate floating point type
88
send_buffer[(interfaces_data_size + 1):(interfaces_data_size + mortars_data_size)] .= NaN |>
89
eltype(mpi_cache)
90
91
for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[rank])
92
index_base = interfaces_data_size +
93
(index - 1) * n_small_elements * 2 * data_size
94
indices = buffer_mortar_indices(mesh, index_base, data_size)
95
96
for position in cache.mpi_mortars.local_neighbor_positions[mortar]
97
first, last = indices[position]
98
if position > n_small_elements # large element
99
@views send_buffer[first:last] .= vec(cache.mpi_mortars.u[2, :, :,
100
..,
101
mortar])
102
else # small element
103
@views send_buffer[first:last] .= vec(cache.mpi_mortars.u[1, :,
104
position,
105
..,
106
mortar])
107
end
108
end
109
end
110
end
111
112
# Start sending
113
for (index, rank) in enumerate(mpi_cache.mpi_neighbor_ranks)
114
mpi_cache.mpi_send_requests[index] = MPI.Isend(mpi_cache.mpi_send_buffers[index],
115
rank, mpi_rank(), mpi_comm())
116
end
117
118
return nothing
119
end
120
121
function start_mpi_receive!(mpi_cache::P4estMPICache)
122
for (index, rank) in enumerate(mpi_cache.mpi_neighbor_ranks)
123
mpi_cache.mpi_recv_requests[index] = MPI.Irecv!(mpi_cache.mpi_recv_buffers[index],
124
rank, rank, mpi_comm())
125
end
126
127
return nothing
128
end
129
130
function finish_mpi_send!(mpi_cache::P4estMPICache)
131
return MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status)
132
end
133
134
function finish_mpi_receive!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
135
data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
136
n_small_elements = 2^(ndims(mesh) - 1)
137
n_positions = n_small_elements + 1
138
139
# Start receiving and unpack received data until all communication is finished
140
data = MPI.Waitany(mpi_cache.mpi_recv_requests)
141
while data !== nothing
142
recv_buffer = mpi_cache.mpi_recv_buffers[data]
143
144
for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[data])
145
first = (index - 1) * data_size + 1
146
last = (index - 1) * data_size + data_size
147
148
if cache.mpi_interfaces.local_sides[interface] == 1 # local element on primary side
149
@views vec(cache.mpi_interfaces.u[2, .., interface]) .= recv_buffer[first:last]
150
else # local element at secondary side
151
@views vec(cache.mpi_interfaces.u[1, .., interface]) .= recv_buffer[first:last]
152
end
153
end
154
155
interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[data]) *
156
data_size
157
for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[data])
158
index_base = interfaces_data_size +
159
(index - 1) * n_small_elements * 2 * data_size
160
indices = buffer_mortar_indices(mesh, index_base, data_size)
161
162
for position in 1:n_positions
163
# Skip if received data for `position` is NaN as no real data has been sent for the
164
# corresponding element
165
if isnan(recv_buffer[Base.first(indices[position])])
166
continue
167
end
168
169
first, last = indices[position]
170
if position == n_positions # large element
171
@views vec(cache.mpi_mortars.u[2, :, :, .., mortar]) .= recv_buffer[first:last]
172
else # small element
173
@views vec(cache.mpi_mortars.u[1, :, position, .., mortar]) .= recv_buffer[first:last]
174
end
175
end
176
end
177
178
data = MPI.Waitany(mpi_cache.mpi_recv_requests)
179
end
180
181
return nothing
182
end
183
184
# Return a tuple `indices` where indices[position] is a `(first, last)` tuple for accessing the
185
# data corresponding to the `position` part of a mortar in an MPI buffer. The mortar data must begin
186
# at `index_base`+1 in the MPI buffer. `data_size` is the data size associated with each small
187
# position (i.e. position 1 or 2). The data corresponding to the large side (i.e. position 3) has
188
# size `2 * data_size`.
189
@inline function buffer_mortar_indices(mesh::Union{P4estMeshParallel{2},
190
T8codeMeshParallel{2}}, index_base,
191
data_size)
192
return (
193
# first, last for local element in position 1 (small element)
194
(index_base + 1,
195
index_base + 1 * data_size),
196
# first, last for local element in position 2 (small element)
197
(index_base + 1 * data_size + 1,
198
index_base + 2 * data_size),
199
# first, last for local element in position 3 (large element)
200
(index_base + 2 * data_size + 1,
201
index_base + 4 * data_size))
202
end
203
204
# Return a tuple `indices` where indices[position] is a `(first, last)` tuple for accessing the
205
# data corresponding to the `position` part of a mortar in an MPI buffer. The mortar data must begin
206
# at `index_base`+1 in the MPI buffer. `data_size` is the data size associated with each small
207
# position (i.e. position 1 to 4). The data corresponding to the large side (i.e. position 5) has
208
# size `4 * data_size`.
209
@inline function buffer_mortar_indices(mesh::Union{P4estMeshParallel{3},
210
T8codeMeshParallel{3}}, index_base,
211
data_size)
212
return (
213
# first, last for local element in position 1 (small element)
214
(index_base + 1,
215
index_base + 1 * data_size),
216
# first, last for local element in position 2 (small element)
217
(index_base + 1 * data_size + 1,
218
index_base + 2 * data_size),
219
# first, last for local element in position 3 (small element)
220
(index_base + 2 * data_size + 1,
221
index_base + 3 * data_size),
222
# first, last for local element in position 4 (small element)
223
(index_base + 3 * data_size + 1,
224
index_base + 4 * data_size),
225
# first, last for local element in position 5 (large element)
226
(index_base + 4 * data_size + 1,
227
index_base + 8 * data_size))
228
end
229
230
# This method is called when a SemidiscretizationHyperbolic is constructed.
231
# It constructs the basic `cache` used throughout the simulation to compute
232
# the RHS etc.
233
function create_cache(mesh::P4estMeshParallel, equations::AbstractEquations, dg::DG,
234
::Any, ::Type{uEltype}) where {uEltype <: Real}
235
# Make sure to balance and partition the p4est and create a new ghost layer before creating any
236
# containers in case someone has tampered with the p4est after creating the mesh
237
balance!(mesh)
238
partition!(mesh)
239
update_ghost_layer!(mesh)
240
241
elements = init_elements(mesh, equations, dg.basis, uEltype)
242
243
mpi_interfaces = init_mpi_interfaces(mesh, equations, dg.basis, elements)
244
mpi_mortars = init_mpi_mortars(mesh, equations, dg.basis, elements)
245
mpi_cache = init_mpi_cache(mesh, mpi_interfaces, mpi_mortars,
246
nvariables(equations), nnodes(dg), uEltype)
247
248
exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg))
249
250
interfaces = init_interfaces(mesh, equations, dg.basis, elements)
251
boundaries = init_boundaries(mesh, equations, dg.basis, elements)
252
mortars = init_mortars(mesh, equations, dg.basis, elements)
253
254
# Container cache
255
cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars,
256
mpi_mortars, mpi_cache)
257
258
# Add Volume-Integral cache
259
cache = (; cache...,
260
create_cache(mesh, equations, dg.volume_integral, dg, cache, uEltype)...)
261
# Add Mortar cache
262
cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
263
264
return cache
265
end
266
267
function init_mpi_cache(mesh::P4estMeshParallel, mpi_interfaces, mpi_mortars, nvars,
268
nnodes, uEltype)
269
mpi_cache = P4estMPICache(uEltype)
270
init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars, nvars, nnodes,
271
uEltype)
272
273
return mpi_cache
274
end
275
276
function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::P4estMeshParallel,
277
mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype)
278
mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces,
279
mpi_mortars,
280
mesh)
281
282
_mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces,
283
_mpi_neighbor_mortars,
284
ndims(mesh),
285
nvars,
286
n_nodes,
287
uEltype)
288
289
# Determine local and total number of elements
290
n_elements_global = Int(mesh.p4est.global_num_quadrants[])
291
n_elements_by_rank = vcat(Int.(unsafe_wrap(Array, mesh.p4est.global_first_quadrant,
292
mpi_nranks())),
293
n_elements_global) |> diff # diff sufficient due to 0-based quad indices
294
n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1))
295
# Account for 1-based indexing in Julia
296
first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1
297
@assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements"
298
299
mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces)
300
mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars)
301
mpi_send_buffers = VecOfArrays(_mpi_send_buffers)
302
mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers)
303
304
# TODO reuse existing structures
305
@pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces,
306
mpi_neighbor_mortars,
307
mpi_send_buffers, mpi_recv_buffers,
308
mpi_send_requests, mpi_recv_requests,
309
n_elements_by_rank, n_elements_global,
310
first_element_global_id
311
end
312
313
function init_mpi_neighbor_connectivity(mpi_interfaces, mpi_mortars,
314
mesh::P4estMeshParallel)
315
# Let p4est iterate over all interfaces and call init_neighbor_rank_connectivity_iter_face
316
# to collect connectivity information
317
iter_face_c = cfunction(init_neighbor_rank_connectivity_iter_face, Val(ndims(mesh)))
318
user_data = InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces,
319
mpi_mortars, mesh)
320
321
iterate_p4est(mesh.p4est, user_data; ghost_layer = mesh.ghost,
322
iter_face_c = iter_face_c)
323
324
# Build proper connectivity data structures from information gathered by iterating over p4est
325
@unpack global_interface_ids, neighbor_ranks_interface, global_mortar_ids, neighbor_ranks_mortar = user_data
326
327
mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |>
328
sort |> unique
329
330
p = sortperm(global_interface_ids)
331
neighbor_ranks_interface .= neighbor_ranks_interface[p]
332
interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p]
333
334
p = sortperm(global_mortar_ids)
335
neighbor_ranks_mortar .= neighbor_ranks_mortar[p]
336
mortar_ids = collect(1:nmpimortars(mpi_mortars))[p]
337
338
# For each neighbor rank, init connectivity data structures
339
mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
340
mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
341
for (index, rank) in enumerate(mpi_neighbor_ranks)
342
mpi_neighbor_interfaces[index] = interface_ids[findall(==(rank),
343
neighbor_ranks_interface)]
344
mpi_neighbor_mortars[index] = mortar_ids[findall(x -> (rank in x),
345
neighbor_ranks_mortar)]
346
end
347
348
# Check that all interfaces were counted exactly once
349
@assert mapreduce(length, +, mpi_neighbor_interfaces; init = 0) ==
350
nmpiinterfaces(mpi_interfaces)
351
352
return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars
353
end
354
355
mutable struct InitNeighborRankConnectivityIterFaceUserData{MPIInterfaces, MPIMortars,
356
Mesh}
357
interfaces::MPIInterfaces
358
interface_id::Int
359
global_interface_ids::Vector{Int}
360
neighbor_ranks_interface::Vector{Int}
361
mortars::MPIMortars
362
mortar_id::Int
363
global_mortar_ids::Vector{Int}
364
neighbor_ranks_mortar::Vector{Vector{Int}}
365
mesh::Mesh
366
end
367
368
function InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces, mpi_mortars, mesh)
369
global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces))
370
neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces))
371
global_mortar_ids = fill(-1, nmpimortars(mpi_mortars))
372
neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars))
373
374
return InitNeighborRankConnectivityIterFaceUserData{typeof(mpi_interfaces),
375
typeof(mpi_mortars),
376
typeof(mesh)}(mpi_interfaces, 1,
377
global_interface_ids,
378
neighbor_ranks_interface,
379
mpi_mortars, 1,
380
global_mortar_ids,
381
neighbor_ranks_mortar,
382
mesh)
383
end
384
385
function init_neighbor_rank_connectivity_iter_face(info, user_data)
386
data = unsafe_pointer_to_objref(Ptr{InitNeighborRankConnectivityIterFaceUserData}(user_data))
387
388
# Function barrier because the unpacked user_data above is not type-stable
389
return init_neighbor_rank_connectivity_iter_face_inner(info, data)
390
end
391
392
# 2D
393
function cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{2})
394
@cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid,
395
(Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
396
end
397
# 3D
398
function cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{3})
399
@cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid,
400
(Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
401
end
402
403
# Function barrier for type stability
404
function init_neighbor_rank_connectivity_iter_face_inner(info, user_data)
405
@unpack interfaces, interface_id, global_interface_ids, neighbor_ranks_interface,
406
mortars, mortar_id, global_mortar_ids, neighbor_ranks_mortar, mesh = user_data
407
408
info_pw = PointerWrapper(info)
409
# Get the global interface/mortar ids and neighbor rank if current face belongs to an MPI
410
# interface/mortar
411
if info_pw.sides.elem_count[] == 2 # MPI interfaces/mortars have two neighboring elements
412
# Extract surface data
413
sides_pw = (load_pointerwrapper_side(info_pw, 1),
414
load_pointerwrapper_side(info_pw, 2))
415
416
if sides_pw[1].is_hanging[] == false && sides_pw[2].is_hanging[] == false # No hanging nodes for MPI interfaces
417
if sides_pw[1].is.full.is_ghost[] == true
418
remote_side = 1
419
local_side = 2
420
elseif sides_pw[2].is.full.is_ghost[] == true
421
remote_side = 2
422
local_side = 1
423
else # both sides are on this rank -> skip since it's a regular interface
424
return nothing
425
end
426
427
# Sanity check, current face should belong to current MPI interface
428
local_tree_pw = load_pointerwrapper_tree(mesh.p4est,
429
sides_pw[local_side].treeid[] + 1) # one-based indexing
430
local_quad_id = local_tree_pw.quadrants_offset[] +
431
sides_pw[local_side].is.full.quadid[]
432
@assert interfaces.local_neighbor_ids[interface_id] == local_quad_id + 1 # one-based indexing
433
434
# Get neighbor ID from ghost layer
435
proc_offsets = unsafe_wrap(Array,
436
info_pw.ghost_layer.proc_offsets,
437
mpi_nranks() + 1)
438
ghost_id = sides_pw[remote_side].is.full.quadid[] # indexes the ghost layer, 0-based
439
neighbor_rank = findfirst(r -> proc_offsets[r] <= ghost_id <
440
proc_offsets[r + 1],
441
1:mpi_nranks()) - 1 # MPI ranks are 0-based
442
neighbor_ranks_interface[interface_id] = neighbor_rank
443
444
# Global interface id is the globally unique quadrant id of the quadrant on the primary
445
# side (1) multiplied by the number of faces per quadrant plus face
446
if local_side == 1
447
offset = mesh.p4est.global_first_quadrant[mpi_rank() + 1] # one-based indexing
448
primary_quad_id = offset + local_quad_id
449
else
450
offset = mesh.p4est.global_first_quadrant[neighbor_rank + 1] # one-based indexing
451
primary_quad_id = offset + sides_pw[1].is.full.quad.p.piggy3.local_num[]
452
end
453
global_interface_id = 2 * ndims(mesh) * primary_quad_id + sides_pw[1].face[]
454
global_interface_ids[interface_id] = global_interface_id
455
456
user_data.interface_id += 1
457
else # hanging node
458
if sides_pw[1].is_hanging[] == true
459
hanging_side = 1
460
full_side = 2
461
else
462
hanging_side = 2
463
full_side = 1
464
end
465
# Verify before accessing is.full / is.hanging
466
@assert sides_pw[hanging_side].is_hanging[] == true &&
467
sides_pw[full_side].is_hanging[] == false
468
469
# If all quadrants are locally available, this is a regular mortar -> skip
470
if sides_pw[full_side].is.full.is_ghost[] == false &&
471
all(sides_pw[hanging_side].is.hanging.is_ghost[] .== false)
472
return nothing
473
end
474
475
trees_pw = (load_pointerwrapper_tree(mesh.p4est, sides_pw[1].treeid[] + 1),
476
load_pointerwrapper_tree(mesh.p4est, sides_pw[2].treeid[] + 1))
477
478
# Find small quads that are remote and determine which rank owns them
479
remote_small_quad_positions = findall(sides_pw[hanging_side].is.hanging.is_ghost[] .==
480
true)
481
proc_offsets = unsafe_wrap(Array,
482
info_pw.ghost_layer.proc_offsets,
483
mpi_nranks() + 1)
484
# indices of small remote quads inside the ghost layer, 0-based
485
ghost_ids = map(pos -> sides_pw[hanging_side].is.hanging.quadid[][pos],
486
remote_small_quad_positions)
487
neighbor_ranks = map(ghost_ids) do ghost_id
488
return findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r + 1],
489
1:mpi_nranks()) - 1 # MPI ranks are 0-based
490
end
491
# Determine global quad id of large element to determine global MPI mortar id
492
# Furthermore, if large element is ghost, add its owner rank to neighbor_ranks
493
if sides_pw[full_side].is.full.is_ghost[] == true
494
ghost_id = sides_pw[full_side].is.full.quadid[]
495
large_quad_owner_rank = findfirst(r -> proc_offsets[r] <= ghost_id <
496
proc_offsets[r + 1],
497
1:mpi_nranks()) - 1 # MPI ranks are 0-based
498
push!(neighbor_ranks, large_quad_owner_rank)
499
500
offset = mesh.p4est.global_first_quadrant[large_quad_owner_rank + 1] # one-based indexing
501
large_quad_id = offset +
502
sides_pw[full_side].is.full.quad.p.piggy3.local_num[]
503
else
504
offset = mesh.p4est.global_first_quadrant[mpi_rank() + 1] # one-based indexing
505
large_quad_id = offset + trees_pw[full_side].quadrants_offset[] +
506
sides_pw[full_side].is.full.quadid[]
507
end
508
neighbor_ranks_mortar[mortar_id] = neighbor_ranks
509
# Global mortar id is the globally unique quadrant id of the large quadrant multiplied by the
510
# number of faces per quadrant plus face
511
global_mortar_ids[mortar_id] = 2 * ndims(mesh) * large_quad_id +
512
sides_pw[full_side].face[]
513
514
user_data.mortar_id += 1
515
end
516
end
517
518
return nothing
519
end
520
521
# Exchange normal directions of small elements of the MPI mortars. They are needed on all involved
522
# MPI ranks to calculate the mortar fluxes.
523
function exchange_normal_directions!(mpi_mortars, mpi_cache,
524
mesh::Union{P4estMeshParallel, T8codeMeshParallel},
525
n_nodes)
526
RealT = real(mesh)
527
n_dims = ndims(mesh)
528
@unpack mpi_neighbor_mortars, mpi_neighbor_ranks = mpi_cache
529
n_small_elements = 2^(n_dims - 1)
530
data_size = n_nodes^(n_dims - 1) * n_dims
531
532
# Create buffers and requests
533
send_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars))
534
recv_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars))
535
for index in 1:length(mpi_neighbor_mortars)
536
send_buffers[index] = Vector{RealT}(undef,
537
length(mpi_neighbor_mortars[index]) *
538
n_small_elements * data_size)
539
send_buffers[index] .= NaN |> RealT
540
recv_buffers[index] = Vector{RealT}(undef,
541
length(mpi_neighbor_mortars[index]) *
542
n_small_elements * data_size)
543
recv_buffers[index] .= NaN |> RealT
544
end
545
send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars))
546
recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars))
547
548
# Fill send buffers
549
for rank in 1:length(mpi_neighbor_ranks)
550
send_buffer = send_buffers[rank]
551
552
for (index, mortar) in enumerate(mpi_neighbor_mortars[rank])
553
index_base = (index - 1) * n_small_elements * data_size
554
indices = buffer_mortar_indices(mesh, index_base, data_size)
555
for position in mpi_mortars.local_neighbor_positions[mortar]
556
if position <= n_small_elements # element is small
557
first, last = indices[position]
558
@views send_buffer[first:last] .= vec(mpi_mortars.normal_directions[:,
559
..,
560
position,
561
mortar])
562
end
563
end
564
end
565
end
566
567
# Start data exchange
568
for (index, rank) in enumerate(mpi_neighbor_ranks)
569
send_requests[index] = MPI.Isend(send_buffers[index], rank, mpi_rank(),
570
mpi_comm())
571
recv_requests[index] = MPI.Irecv!(recv_buffers[index], rank, rank, mpi_comm())
572
end
573
574
# Unpack data from receive buffers
575
data = MPI.Waitany(recv_requests)
576
while data !== nothing
577
recv_buffer = recv_buffers[data]
578
579
for (index, mortar) in enumerate(mpi_neighbor_mortars[data])
580
index_base = (index - 1) * n_small_elements * data_size
581
indices = buffer_mortar_indices(mesh, index_base, data_size)
582
for position in 1:n_small_elements
583
# Skip if received data for `position` is NaN as no real data has been sent for the
584
# corresponding element
585
if isnan(recv_buffer[Base.first(indices[position])])
586
continue
587
end
588
589
first, last = indices[position]
590
@views vec(mpi_mortars.normal_directions[:, .., position, mortar]) .= recv_buffer[first:last]
591
end
592
end
593
594
data = MPI.Waitany(recv_requests)
595
end
596
597
# Wait for communication to finish
598
MPI.Waitall(send_requests, MPI.Status)
599
600
return nothing
601
end
602
603
# Get normal direction of MPI mortar
604
@inline function get_normal_direction(mpi_mortars::P4estMPIMortarContainer, indices...)
605
return SVector(ntuple(@inline(dim->mpi_mortars.normal_directions[dim, indices...]),
606
Val(ndims(mpi_mortars))))
607
end
608
609
include("dg_2d_parallel.jl")
610
include("dg_3d_parallel.jl")
611
include("dg_2d_parabolic_parallel.jl")
612
end # muladd
613
614