CoCalc -- dg_parallel.jl

GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/solvers/dgsem_p4est/dg_parallel.jl
⁵⁶¹⁶ views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7

8
mutable struct P4estMPICache{BufferType <: DenseVector,
9
                             VecInt <: DenseVector{<:Integer}}
10
    mpi_neighbor_ranks::Vector{Int}
11
    mpi_neighbor_interfaces::VecOfArrays{VecInt}
12
    mpi_neighbor_mortars::VecOfArrays{VecInt}
13
    mpi_send_buffers::VecOfArrays{BufferType}
14
    mpi_recv_buffers::VecOfArrays{BufferType}
15
    mpi_send_requests::Vector{MPI.Request}
16
    mpi_recv_requests::Vector{MPI.Request}
17
    n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}}
18
    n_elements_global::Int
19
    first_element_global_id::Int
20
end
21

22
function P4estMPICache(uEltype)
23
    # MPI communication "just works" for bitstypes only
24
    if !isbitstype(uEltype)
25
        throw(ArgumentError("P4estMPICache only supports bitstypes, $uEltype is not a bitstype."))
26
    end
27

28
    mpi_neighbor_ranks = Vector{Int}(undef, 0)
29
    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays
30
    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays
31
    mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays
32
    mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays
33
    mpi_send_requests = Vector{MPI.Request}(undef, 0)
34
    mpi_recv_requests = Vector{MPI.Request}(undef, 0)
35
    n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1)
36
    n_elements_global = 0
37
    first_element_global_id = 0
38

39
    return P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks,
40
                                                       mpi_neighbor_interfaces,
41
                                                       mpi_neighbor_mortars,
42
                                                       mpi_send_buffers,
43
                                                       mpi_recv_buffers,
44
                                                       mpi_send_requests,
45
                                                       mpi_recv_requests,
46
                                                       n_elements_by_rank,
47
                                                       n_elements_global,
48
                                                       first_element_global_id)
49
end
50

51
@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType)
52

53
# @eval due to @muladd
54
@eval Adapt.@adapt_structure(P4estMPICache)
55

56
##
57
# Note that the code in `start_mpi_send`/`finish_mpi_receive!` is sensitive to inference on (at least) Julia 1.10.
58
# Julia's inference is bi-stable, it can sometimes depend on what code has been looked at already, and
59
# the presence of an inference result in the cache can have an impact on the inference of code.
60
# In this case the `send_buffer[first:last] .= vec(cache.mpi_mortars.u[2, :, :, ..,mortar])`,
61
# can fail to be inferred due to heuristics if this function is not in the cache...
62
precompile(Base.reindex,
63
           (Tuple{Base.Slice{Base.OneTo{Int64}}, Int64, Base.Slice{Base.OneTo{Int64}},
64
                  Base.Slice{Base.OneTo{Int64}}, Int64}, Tuple{Int64, Int64, Int64}))
65

66
function start_mpi_send!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
67
    data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
68
    n_small_elements = 2^(ndims(mesh) - 1)
69

70
    for rank in 1:length(mpi_cache.mpi_neighbor_ranks)
71
        send_buffer = mpi_cache.mpi_send_buffers[rank]
72

73
        for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[rank])
74
            first = (index - 1) * data_size + 1
75
            last = (index - 1) * data_size + data_size
76
            local_side = cache.mpi_interfaces.local_sides[interface]
77
            @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[local_side, ..,
78
                                                                         interface])
79
        end
80

81
        # Set send_buffer corresponding to mortar data to NaN and overwrite the parts where local
82
        # data exists
83
        interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[rank]) *
84
                               data_size
85
        mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[rank]) *
86
                            n_small_elements * 2 * data_size
87
        # `NaN |> eltype(...)` ensures that the NaN's are of the appropriate floating point type
88
        send_buffer[(interfaces_data_size + 1):(interfaces_data_size + mortars_data_size)] .= NaN |>
89
                                                                                              eltype(mpi_cache)
90

91
        for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[rank])
92
            index_base = interfaces_data_size +
93
                         (index - 1) * n_small_elements * 2 * data_size
94
            indices = buffer_mortar_indices(mesh, index_base, data_size)
95

96
            for position in cache.mpi_mortars.local_neighbor_positions[mortar]
97
                first, last = indices[position]
98
                if position > n_small_elements # large element
99
                    @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[2, :, :,
100
                                                                              ..,
101
                                                                              mortar])
102
                else # small element
103
                    @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[1, :,
104
                                                                              position,
105
                                                                              ..,
106
                                                                              mortar])
107
                end
108
            end
109
        end
110
    end
111

112
    # Start sending
113
    for (index, rank) in enumerate(mpi_cache.mpi_neighbor_ranks)
114
        mpi_cache.mpi_send_requests[index] = MPI.Isend(mpi_cache.mpi_send_buffers[index],
115
                                                       rank, mpi_rank(), mpi_comm())
116
    end
117

118
    return nothing
119
end
120

121
function start_mpi_receive!(mpi_cache::P4estMPICache)
122
    for (index, rank) in enumerate(mpi_cache.mpi_neighbor_ranks)
123
        mpi_cache.mpi_recv_requests[index] = MPI.Irecv!(mpi_cache.mpi_recv_buffers[index],
124
                                                        rank, rank, mpi_comm())
125
    end
126

127
    return nothing
128
end
129

130
function finish_mpi_send!(mpi_cache::P4estMPICache)
131
    return MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status)
132
end
133

134
function finish_mpi_receive!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
135
    data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
136
    n_small_elements = 2^(ndims(mesh) - 1)
137
    n_positions = n_small_elements + 1
138

139
    # Start receiving and unpack received data until all communication is finished
140
    data = MPI.Waitany(mpi_cache.mpi_recv_requests)
141
    while data !== nothing
142
        recv_buffer = mpi_cache.mpi_recv_buffers[data]
143

144
        for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[data])
145
            first = (index - 1) * data_size + 1
146
            last = (index - 1) * data_size + data_size
147

148
            if cache.mpi_interfaces.local_sides[interface] == 1 # local element on primary side
149
                @views vec(cache.mpi_interfaces.u[2, .., interface]) .= recv_buffer[first:last]
150
            else # local element at secondary side
151
                @views vec(cache.mpi_interfaces.u[1, .., interface]) .= recv_buffer[first:last]
152
            end
153
        end
154

155
        interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[data]) *
156
                               data_size
157
        for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[data])
158
            index_base = interfaces_data_size +
159
                         (index - 1) * n_small_elements * 2 * data_size
160
            indices = buffer_mortar_indices(mesh, index_base, data_size)
161

162
            for position in 1:n_positions
163
                # Skip if received data for `position` is NaN as no real data has been sent for the
164
                # corresponding element
165
                if isnan(recv_buffer[Base.first(indices[position])])
166
                    continue
167
                end
168

169
                first, last = indices[position]
170
                if position == n_positions # large element
171
                    @views vec(cache.mpi_mortars.u[2, :, :, .., mortar]) .= recv_buffer[first:last]
172
                else # small element
173
                    @views vec(cache.mpi_mortars.u[1, :, position, .., mortar]) .= recv_buffer[first:last]
174
                end
175
            end
176
        end
177

178
        data = MPI.Waitany(mpi_cache.mpi_recv_requests)
179
    end
180

181
    return nothing
182
end
183

184
# Return a tuple `indices` where indices[position] is a `(first, last)` tuple for accessing the
185
# data corresponding to the `position` part of a mortar in an MPI buffer. The mortar data must begin
186
# at `index_base`+1 in the MPI buffer. `data_size` is the data size associated with each small
187
# position (i.e. position 1 or 2). The data corresponding to the large side (i.e. position 3) has
188
# size `2 * data_size`.
189
@inline function buffer_mortar_indices(mesh::Union{P4estMeshParallel{2},
190
                                                   T8codeMeshParallel{2}}, index_base,
191
                                       data_size)
192
    return (
193
            # first, last for local element in position 1 (small element)
194
            (index_base + 1,
195
             index_base + 1 * data_size),
196
            # first, last for local element in position 2 (small element)
197
            (index_base + 1 * data_size + 1,
198
             index_base + 2 * data_size),
199
            # first, last for local element in position 3 (large element)
200
            (index_base + 2 * data_size + 1,
201
             index_base + 4 * data_size))
202
end
203

204
# Return a tuple `indices` where indices[position] is a `(first, last)` tuple for accessing the
205
# data corresponding to the `position` part of a mortar in an MPI buffer. The mortar data must begin
206
# at `index_base`+1 in the MPI buffer. `data_size` is the data size associated with each small
207
# position (i.e. position 1 to 4). The data corresponding to the large side (i.e. position 5) has
208
# size `4 * data_size`.
209
@inline function buffer_mortar_indices(mesh::Union{P4estMeshParallel{3},
210
                                                   T8codeMeshParallel{3}}, index_base,
211
                                       data_size)
212
    return (
213
            # first, last for local element in position 1 (small element)
214
            (index_base + 1,
215
             index_base + 1 * data_size),
216
            # first, last for local element in position 2 (small element)
217
            (index_base + 1 * data_size + 1,
218
             index_base + 2 * data_size),
219
            # first, last for local element in position 3 (small element)
220
            (index_base + 2 * data_size + 1,
221
             index_base + 3 * data_size),
222
            # first, last for local element in position 4 (small element)
223
            (index_base + 3 * data_size + 1,
224
             index_base + 4 * data_size),
225
            # first, last for local element in position 5 (large element)
226
            (index_base + 4 * data_size + 1,
227
             index_base + 8 * data_size))
228
end
229

230
# This method is called when a SemidiscretizationHyperbolic is constructed.
231
# It constructs the basic `cache` used throughout the simulation to compute
232
# the RHS etc.
233
function create_cache(mesh::P4estMeshParallel, equations::AbstractEquations, dg::DG,
234
                      ::Any, ::Type{uEltype}) where {uEltype <: Real}
235
    # Make sure to balance and partition the p4est and create a new ghost layer before creating any
236
    # containers in case someone has tampered with the p4est after creating the mesh
237
    balance!(mesh)
238
    partition!(mesh)
239
    update_ghost_layer!(mesh)
240

241
    elements = init_elements(mesh, equations, dg.basis, uEltype)
242

243
    mpi_interfaces = init_mpi_interfaces(mesh, equations, dg.basis, elements)
244
    mpi_mortars = init_mpi_mortars(mesh, equations, dg.basis, elements)
245
    mpi_cache = init_mpi_cache(mesh, mpi_interfaces, mpi_mortars,
246
                               nvariables(equations), nnodes(dg), uEltype)
247

248
    exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg))
249

250
    interfaces = init_interfaces(mesh, equations, dg.basis, elements)
251
    boundaries = init_boundaries(mesh, equations, dg.basis, elements)
252
    mortars = init_mortars(mesh, equations, dg.basis, elements)
253

254
    # Container cache
255
    cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars,
256
             mpi_mortars, mpi_cache)
257

258
    # Add Volume-Integral cache
259
    cache = (; cache...,
260
             create_cache(mesh, equations, dg.volume_integral, dg, cache, uEltype)...)
261
    # Add Mortar cache
262
    cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
263

264
    return cache
265
end
266

267
function init_mpi_cache(mesh::P4estMeshParallel, mpi_interfaces, mpi_mortars, nvars,
268
                        nnodes, uEltype)
269
    mpi_cache = P4estMPICache(uEltype)
270
    init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars, nvars, nnodes,
271
                    uEltype)
272

273
    return mpi_cache
274
end
275

276
function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::P4estMeshParallel,
277
                         mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype)
278
    mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces,
279
                                                                                                         mpi_mortars,
280
                                                                                                         mesh)
281

282
    _mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces,
283
                                                                                                          _mpi_neighbor_mortars,
284
                                                                                                          ndims(mesh),
285
                                                                                                          nvars,
286
                                                                                                          n_nodes,
287
                                                                                                          uEltype)
288

289
    # Determine local and total number of elements
290
    n_elements_global = Int(mesh.p4est.global_num_quadrants[])
291
    n_elements_by_rank = vcat(Int.(unsafe_wrap(Array, mesh.p4est.global_first_quadrant,
292
                                               mpi_nranks())),
293
                              n_elements_global) |> diff # diff sufficient due to 0-based quad indices
294
    n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1))
295
    # Account for 1-based indexing in Julia
296
    first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1
297
    @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements"
298

299
    mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces)
300
    mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars)
301
    mpi_send_buffers = VecOfArrays(_mpi_send_buffers)
302
    mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers)
303

304
    # TODO reuse existing structures
305
    @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces,
306
                       mpi_neighbor_mortars,
307
                       mpi_send_buffers, mpi_recv_buffers,
308
                       mpi_send_requests, mpi_recv_requests,
309
                       n_elements_by_rank, n_elements_global,
310
                       first_element_global_id
311
end
312

313
function init_mpi_neighbor_connectivity(mpi_interfaces, mpi_mortars,
314
                                        mesh::P4estMeshParallel)
315
    # Let p4est iterate over all interfaces and call init_neighbor_rank_connectivity_iter_face
316
    # to collect connectivity information
317
    iter_face_c = cfunction(init_neighbor_rank_connectivity_iter_face, Val(ndims(mesh)))
318
    user_data = InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces,
319
                                                             mpi_mortars, mesh)
320

321
    iterate_p4est(mesh.p4est, user_data; ghost_layer = mesh.ghost,
322
                  iter_face_c = iter_face_c)
323

324
    # Build proper connectivity data structures from information gathered by iterating over p4est
325
    @unpack global_interface_ids, neighbor_ranks_interface, global_mortar_ids, neighbor_ranks_mortar = user_data
326

327
    mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |>
328
                         sort |> unique
329

330
    p = sortperm(global_interface_ids)
331
    neighbor_ranks_interface .= neighbor_ranks_interface[p]
332
    interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p]
333

334
    p = sortperm(global_mortar_ids)
335
    neighbor_ranks_mortar .= neighbor_ranks_mortar[p]
336
    mortar_ids = collect(1:nmpimortars(mpi_mortars))[p]
337

338
    # For each neighbor rank, init connectivity data structures
339
    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
340
    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
341
    for (index, rank) in enumerate(mpi_neighbor_ranks)
342
        mpi_neighbor_interfaces[index] = interface_ids[findall(==(rank),
343
                                                               neighbor_ranks_interface)]
344
        mpi_neighbor_mortars[index] = mortar_ids[findall(x -> (rank in x),
345
                                                         neighbor_ranks_mortar)]
346
    end
347

348
    # Check that all interfaces were counted exactly once
349
    @assert mapreduce(length, +, mpi_neighbor_interfaces; init = 0) ==
350
            nmpiinterfaces(mpi_interfaces)
351

352
    return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars
353
end
354

355
mutable struct InitNeighborRankConnectivityIterFaceUserData{MPIInterfaces, MPIMortars,
356
                                                            Mesh}
357
    interfaces::MPIInterfaces
358
    interface_id::Int
359
    global_interface_ids::Vector{Int}
360
    neighbor_ranks_interface::Vector{Int}
361
    mortars::MPIMortars
362
    mortar_id::Int
363
    global_mortar_ids::Vector{Int}
364
    neighbor_ranks_mortar::Vector{Vector{Int}}
365
    mesh::Mesh
366
end
367

368
function InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces, mpi_mortars, mesh)
369
    global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces))
370
    neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces))
371
    global_mortar_ids = fill(-1, nmpimortars(mpi_mortars))
372
    neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars))
373

374
    return InitNeighborRankConnectivityIterFaceUserData{typeof(mpi_interfaces),
375
                                                        typeof(mpi_mortars),
376
                                                        typeof(mesh)}(mpi_interfaces, 1,
377
                                                                      global_interface_ids,
378
                                                                      neighbor_ranks_interface,
379
                                                                      mpi_mortars, 1,
380
                                                                      global_mortar_ids,
381
                                                                      neighbor_ranks_mortar,
382
                                                                      mesh)
383
end
384

385
function init_neighbor_rank_connectivity_iter_face(info, user_data)
386
    data = unsafe_pointer_to_objref(Ptr{InitNeighborRankConnectivityIterFaceUserData}(user_data))
387

388
    # Function barrier because the unpacked user_data above is not type-stable
389
    return init_neighbor_rank_connectivity_iter_face_inner(info, data)
390
end
391

392
# 2D
393
function cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{2})
394
    @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid,
395
               (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
396
end
397
# 3D
398
function cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{3})
399
    @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid,
400
               (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
401
end
402

403
# Function barrier for type stability
404
function init_neighbor_rank_connectivity_iter_face_inner(info, user_data)
405
    @unpack interfaces, interface_id, global_interface_ids, neighbor_ranks_interface,
406
    mortars, mortar_id, global_mortar_ids, neighbor_ranks_mortar, mesh = user_data
407

408
    info_pw = PointerWrapper(info)
409
    # Get the global interface/mortar ids and neighbor rank if current face belongs to an MPI
410
    # interface/mortar
411
    if info_pw.sides.elem_count[] == 2 # MPI interfaces/mortars have two neighboring elements
412
        # Extract surface data
413
        sides_pw = (load_pointerwrapper_side(info_pw, 1),
414
                    load_pointerwrapper_side(info_pw, 2))
415

416
        if sides_pw[1].is_hanging[] == false && sides_pw[2].is_hanging[] == false # No hanging nodes for MPI interfaces
417
            if sides_pw[1].is.full.is_ghost[] == true
418
                remote_side = 1
419
                local_side = 2
420
            elseif sides_pw[2].is.full.is_ghost[] == true
421
                remote_side = 2
422
                local_side = 1
423
            else # both sides are on this rank -> skip since it's a regular interface
424
                return nothing
425
            end
426

427
            # Sanity check, current face should belong to current MPI interface
428
            local_tree_pw = load_pointerwrapper_tree(mesh.p4est,
429
                                                     sides_pw[local_side].treeid[] + 1) # one-based indexing
430
            local_quad_id = local_tree_pw.quadrants_offset[] +
431
                            sides_pw[local_side].is.full.quadid[]
432
            @assert interfaces.local_neighbor_ids[interface_id] == local_quad_id + 1 # one-based indexing
433

434
            # Get neighbor ID from ghost layer
435
            proc_offsets = unsafe_wrap(Array,
436
                                       info_pw.ghost_layer.proc_offsets,
437
                                       mpi_nranks() + 1)
438
            ghost_id = sides_pw[remote_side].is.full.quadid[] # indexes the ghost layer, 0-based
439
            neighbor_rank = findfirst(r -> proc_offsets[r] <= ghost_id <
440
                                           proc_offsets[r + 1],
441
                                      1:mpi_nranks()) - 1 # MPI ranks are 0-based
442
            neighbor_ranks_interface[interface_id] = neighbor_rank
443

444
            # Global interface id is the globally unique quadrant id of the quadrant on the primary
445
            # side (1) multiplied by the number of faces per quadrant plus face
446
            if local_side == 1
447
                offset = mesh.p4est.global_first_quadrant[mpi_rank() + 1] # one-based indexing
448
                primary_quad_id = offset + local_quad_id
449
            else
450
                offset = mesh.p4est.global_first_quadrant[neighbor_rank + 1] # one-based indexing
451
                primary_quad_id = offset + sides_pw[1].is.full.quad.p.piggy3.local_num[]
452
            end
453
            global_interface_id = 2 * ndims(mesh) * primary_quad_id + sides_pw[1].face[]
454
            global_interface_ids[interface_id] = global_interface_id
455

456
            user_data.interface_id += 1
457
        else # hanging node
458
            if sides_pw[1].is_hanging[] == true
459
                hanging_side = 1
460
                full_side = 2
461
            else
462
                hanging_side = 2
463
                full_side = 1
464
            end
465
            # Verify before accessing is.full / is.hanging
466
            @assert sides_pw[hanging_side].is_hanging[] == true &&
467
                    sides_pw[full_side].is_hanging[] == false
468

469
            # If all quadrants are locally available, this is a regular mortar -> skip
470
            if sides_pw[full_side].is.full.is_ghost[] == false &&
471
               all(sides_pw[hanging_side].is.hanging.is_ghost[] .== false)
472
                return nothing
473
            end
474

475
            trees_pw = (load_pointerwrapper_tree(mesh.p4est, sides_pw[1].treeid[] + 1),
476
                        load_pointerwrapper_tree(mesh.p4est, sides_pw[2].treeid[] + 1))
477

478
            # Find small quads that are remote and determine which rank owns them
479
            remote_small_quad_positions = findall(sides_pw[hanging_side].is.hanging.is_ghost[] .==
480
                                                  true)
481
            proc_offsets = unsafe_wrap(Array,
482
                                       info_pw.ghost_layer.proc_offsets,
483
                                       mpi_nranks() + 1)
484
            # indices of small remote quads inside the ghost layer, 0-based
485
            ghost_ids = map(pos -> sides_pw[hanging_side].is.hanging.quadid[][pos],
486
                            remote_small_quad_positions)
487
            neighbor_ranks = map(ghost_ids) do ghost_id
488
                return findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r + 1],
489
                                 1:mpi_nranks()) - 1 # MPI ranks are 0-based
490
            end
491
            # Determine global quad id of large element to determine global MPI mortar id
492
            # Furthermore, if large element is ghost, add its owner rank to neighbor_ranks
493
            if sides_pw[full_side].is.full.is_ghost[] == true
494
                ghost_id = sides_pw[full_side].is.full.quadid[]
495
                large_quad_owner_rank = findfirst(r -> proc_offsets[r] <= ghost_id <
496
                                                       proc_offsets[r + 1],
497
                                                  1:mpi_nranks()) - 1 # MPI ranks are 0-based
498
                push!(neighbor_ranks, large_quad_owner_rank)
499

500
                offset = mesh.p4est.global_first_quadrant[large_quad_owner_rank + 1] # one-based indexing
501
                large_quad_id = offset +
502
                                sides_pw[full_side].is.full.quad.p.piggy3.local_num[]
503
            else
504
                offset = mesh.p4est.global_first_quadrant[mpi_rank() + 1] # one-based indexing
505
                large_quad_id = offset + trees_pw[full_side].quadrants_offset[] +
506
                                sides_pw[full_side].is.full.quadid[]
507
            end
508
            neighbor_ranks_mortar[mortar_id] = neighbor_ranks
509
            # Global mortar id is the globally unique quadrant id of the large quadrant multiplied by the
510
            # number of faces per quadrant plus face
511
            global_mortar_ids[mortar_id] = 2 * ndims(mesh) * large_quad_id +
512
                                           sides_pw[full_side].face[]
513

514
            user_data.mortar_id += 1
515
        end
516
    end
517

518
    return nothing
519
end
520

521
# Exchange normal directions of small elements of the MPI mortars. They are needed on all involved
522
# MPI ranks to calculate the mortar fluxes.
523
function exchange_normal_directions!(mpi_mortars, mpi_cache,
524
                                     mesh::Union{P4estMeshParallel, T8codeMeshParallel},
525
                                     n_nodes)
526
    RealT = real(mesh)
527
    n_dims = ndims(mesh)
528
    @unpack mpi_neighbor_mortars, mpi_neighbor_ranks = mpi_cache
529
    n_small_elements = 2^(n_dims - 1)
530
    data_size = n_nodes^(n_dims - 1) * n_dims
531

532
    # Create buffers and requests
533
    send_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars))
534
    recv_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars))
535
    for index in 1:length(mpi_neighbor_mortars)
536
        send_buffers[index] = Vector{RealT}(undef,
537
                                            length(mpi_neighbor_mortars[index]) *
538
                                            n_small_elements * data_size)
539
        send_buffers[index] .= NaN |> RealT
540
        recv_buffers[index] = Vector{RealT}(undef,
541
                                            length(mpi_neighbor_mortars[index]) *
542
                                            n_small_elements * data_size)
543
        recv_buffers[index] .= NaN |> RealT
544
    end
545
    send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars))
546
    recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars))
547

548
    # Fill send buffers
549
    for rank in 1:length(mpi_neighbor_ranks)
550
        send_buffer = send_buffers[rank]
551

552
        for (index, mortar) in enumerate(mpi_neighbor_mortars[rank])
553
            index_base = (index - 1) * n_small_elements * data_size
554
            indices = buffer_mortar_indices(mesh, index_base, data_size)
555
            for position in mpi_mortars.local_neighbor_positions[mortar]
556
                if position <= n_small_elements # element is small
557
                    first, last = indices[position]
558
                    @views send_buffer[first:last] .= vec(mpi_mortars.normal_directions[:,
559
                                                                                        ..,
560
                                                                                        position,
561
                                                                                        mortar])
562
                end
563
            end
564
        end
565
    end
566

567
    # Start data exchange
568
    for (index, rank) in enumerate(mpi_neighbor_ranks)
569
        send_requests[index] = MPI.Isend(send_buffers[index], rank, mpi_rank(),
570
                                         mpi_comm())
571
        recv_requests[index] = MPI.Irecv!(recv_buffers[index], rank, rank, mpi_comm())
572
    end
573

574
    # Unpack data from receive buffers
575
    data = MPI.Waitany(recv_requests)
576
    while data !== nothing
577
        recv_buffer = recv_buffers[data]
578

579
        for (index, mortar) in enumerate(mpi_neighbor_mortars[data])
580
            index_base = (index - 1) * n_small_elements * data_size
581
            indices = buffer_mortar_indices(mesh, index_base, data_size)
582
            for position in 1:n_small_elements
583
                # Skip if received data for `position` is NaN as no real data has been sent for the
584
                # corresponding element
585
                if isnan(recv_buffer[Base.first(indices[position])])
586
                    continue
587
                end
588

589
                first, last = indices[position]
590
                @views vec(mpi_mortars.normal_directions[:, .., position, mortar]) .= recv_buffer[first:last]
591
            end
592
        end
593

594
        data = MPI.Waitany(recv_requests)
595
    end
596

597
    # Wait for communication to finish
598
    MPI.Waitall(send_requests, MPI.Status)
599

600
    return nothing
601
end
602

603
# Get normal direction of MPI mortar
604
@inline function get_normal_direction(mpi_mortars::P4estMPIMortarContainer, indices...)
605
    return SVector(ntuple(@inline(dim->mpi_mortars.normal_directions[dim, indices...]),
606
                          Val(ndims(mpi_mortars))))
607
end
608

609
include("dg_2d_parallel.jl")
610
include("dg_3d_parallel.jl")
611
include("dg_2d_parabolic_parallel.jl")
612
end # muladd
613

614
Product

Resources

Company