CoCalc -- dg_3d_subcell

GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
⁵⁵⁹¹ views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7

8
function create_cache_subcell_limiting(mesh::Union{TreeMesh{3}, P4estMesh{3}},
9
                                       equations,
10
                                       volume_integral::VolumeIntegralSubcellLimiting,
11
                                       dg::DG, cache_containers, uEltype)
12
    cache = NamedTuple()
13

14
    fhat1_L_threaded, fhat1_R_threaded,
15
    fhat2_L_threaded, fhat2_R_threaded,
16
    fhat3_L_threaded, fhat3_R_threaded = create_f_threaded(mesh, equations, dg, uEltype)
17

18
    A4d = Array{uEltype, 4}
19
    flux_temp_threaded = A4d[A4d(undef, nvariables(equations),
20
                                 nnodes(dg), nnodes(dg), nnodes(dg))
21
                             for _ in 1:Threads.maxthreadid()]
22
    fhat_temp_threaded = A4d[A4d(undef, nvariables(equations),
23
                                 nnodes(dg), nnodes(dg), nnodes(dg))
24
                             for _ in 1:Threads.maxthreadid()]
25

26
    n_elements = nelements(cache_containers.elements)
27
    antidiffusive_fluxes = ContainerAntidiffusiveFlux3D{uEltype}(n_elements,
28
                                                                 nvariables(equations),
29
                                                                 nnodes(dg))
30

31
    if have_nonconservative_terms(equations) == true
32
        A5d = Array{uEltype, 5}
33
        # Extract the nonconservative flux as a dispatch argument for `n_nonconservative_terms`
34
        _, volume_flux_noncons = volume_integral.volume_flux_dg
35

36
        flux_nonconservative_temp_threaded = A5d[A5d(undef, nvariables(equations),
37
                                                     n_nonconservative_terms(volume_flux_noncons),
38
                                                     nnodes(dg), nnodes(dg),
39
                                                     nnodes(dg))
40
                                                 for _ in 1:Threads.maxthreadid()]
41
        fhat_nonconservative_temp_threaded = A5d[A5d(undef, nvariables(equations),
42
                                                     n_nonconservative_terms(volume_flux_noncons),
43
                                                     nnodes(dg), nnodes(dg),
44
                                                     nnodes(dg))
45
                                                 for _ in 1:Threads.maxthreadid()]
46
        phi_threaded = A5d[A5d(undef, nvariables(equations),
47
                               n_nonconservative_terms(volume_flux_noncons),
48
                               nnodes(dg), nnodes(dg), nnodes(dg))
49
                           for _ in 1:Threads.maxthreadid()]
50
        cache = (; cache..., flux_nonconservative_temp_threaded,
51
                 fhat_nonconservative_temp_threaded, phi_threaded)
52
    end
53

54
    # The limiter cache was created with 0 elements
55
    resize_subcell_limiter_cache!(dg.volume_integral.limiter, n_elements)
56

57
    return (; cache..., antidiffusive_fluxes,
58
            fhat1_L_threaded, fhat1_R_threaded,
59
            fhat2_L_threaded, fhat2_R_threaded,
60
            fhat3_L_threaded, fhat3_R_threaded,
61
            flux_temp_threaded, fhat_temp_threaded)
62
end
63

64
# Subcell limiting currently only implemented for certain mesh types
65
@inline function volume_integral_kernel!(du, u, element,
66
                                         MeshT::Type{<:Union{TreeMesh{3}, P4estMesh{3}}},
67
                                         nonconservative_terms, equations,
68
                                         volume_integral::VolumeIntegralSubcellLimiting,
69
                                         dg::DGSEM, cache)
70
    @unpack inverse_weights = dg.basis # Plays role of DG subcell sizes
71
    @unpack volume_flux_dg, volume_flux_fv, limiter = volume_integral
72

73
    # high-order DG fluxes
74
    @unpack fhat1_L_threaded, fhat1_R_threaded, fhat2_L_threaded, fhat2_R_threaded, fhat3_L_threaded, fhat3_R_threaded = cache
75

76
    fhat1_L = fhat1_L_threaded[Threads.threadid()]
77
    fhat1_R = fhat1_R_threaded[Threads.threadid()]
78
    fhat2_L = fhat2_L_threaded[Threads.threadid()]
79
    fhat2_R = fhat2_R_threaded[Threads.threadid()]
80
    fhat3_L = fhat3_L_threaded[Threads.threadid()]
81
    fhat3_R = fhat3_R_threaded[Threads.threadid()]
82
    calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R,
83
                   u, MeshT, nonconservative_terms, equations, volume_flux_dg,
84
                   dg, element, cache)
85

86
    # low-order FV fluxes
87
    @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache
88

89
    fstar1_L = fstar1_L_threaded[Threads.threadid()]
90
    fstar1_R = fstar1_R_threaded[Threads.threadid()]
91
    fstar2_L = fstar2_L_threaded[Threads.threadid()]
92
    fstar2_R = fstar2_R_threaded[Threads.threadid()]
93
    fstar3_L = fstar3_L_threaded[Threads.threadid()]
94
    fstar3_R = fstar3_R_threaded[Threads.threadid()]
95
    calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R,
96
                 u, MeshT, nonconservative_terms, equations, volume_flux_fv,
97
                 dg, element, cache)
98

99
    # antidiffusive flux
100
    calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R,
101
                            fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R,
102
                            u, MeshT, nonconservative_terms, equations, limiter,
103
                            dg, element, cache)
104

105
    # Calculate volume integral contribution of low-order FV flux
106
    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
107
        for v in eachvariable(equations)
108
            du[v, i, j, k, element] += inverse_weights[i] *
109
                                       (fstar1_L[v, i + 1, j, k] - fstar1_R[v, i, j, k]) +
110
                                       inverse_weights[j] *
111
                                       (fstar2_L[v, i, j + 1, k] - fstar2_R[v, i, j, k]) +
112
                                       inverse_weights[k] *
113
                                       (fstar3_L[v, i, j, k + 1] - fstar3_R[v, i, j, k])
114
        end
115
    end
116

117
    return nothing
118
end
119

120
# Calculate the DG staggered volume fluxes `fhat` in subcell FV-form inside the element
121
# (**without non-conservative terms**).
122
#
123
# See also `flux_differencing_kernel!`.
124
@inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R,
125
                                u, ::Type{<:TreeMesh{3}},
126
                                have_nonconservative_terms::False, equations,
127
                                volume_flux, dg::DGSEM, element, cache)
128
    @unpack weights, derivative_split = dg.basis
129
    @unpack flux_temp_threaded = cache
130

131
    flux_temp = flux_temp_threaded[Threads.threadid()]
132

133
    # The FV-form fluxes are calculated in a recursive manner, i.e.:
134
    # fhat_(0,1)   = w_0 * FVol_0,
135
    # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
136
    # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
137

138
    # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
139
    # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
140
    # and saved in in `flux_temp`.
141

142
    # Split form volume flux in orientation 1: x direction
143
    flux_temp .= zero(eltype(flux_temp))
144

145
    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
146
        u_node = get_node_vars(u, equations, dg, i, j, k, element)
147

148
        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
149
        # the computation of the diagonal terms. In addition, we use the symmetry
150
        # of the `volume_flux` to save half of the possible two-point flux
151
        # computations.
152
        for ii in (i + 1):nnodes(dg)
153
            u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
154
            flux1 = volume_flux(u_node, u_node_ii, 1, equations)
155
            multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], flux1,
156
                                       equations, dg, i, j, k)
157
            multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], flux1,
158
                                       equations, dg, ii, j, k)
159
        end
160
    end
161

162
    # FV-form flux `fhat` in x direction
163
    for k in eachnode(dg), j in eachnode(dg), i in 1:(nnodes(dg) - 1)
164
        for v in eachvariable(equations)
165
            fhat1_L[v, i + 1, j, k] = fhat1_L[v, i, j, k] +
166
                                      weights[i] * flux_temp[v, i, j, k]
167
            fhat1_R[v, i + 1, j, k] = fhat1_L[v, i + 1, j, k]
168
        end
169
    end
170

171
    # Split form volume flux in orientation 2: y direction
172
    flux_temp .= zero(eltype(flux_temp))
173

174
    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
175
        u_node = get_node_vars(u, equations, dg, i, j, k, element)
176
        for jj in (j + 1):nnodes(dg)
177
            u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
178
            flux2 = volume_flux(u_node, u_node_jj, 2, equations)
179
            multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], flux2,
180
                                       equations, dg, i, j, k)
181
            multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], flux2,
182
                                       equations, dg, i, jj, k)
183
        end
184
    end
185

186
    # FV-form flux `fhat` in y direction
187
    for k in eachnode(dg), j in 1:(nnodes(dg) - 1), i in eachnode(dg)
188
        for v in eachvariable(equations)
189
            fhat2_L[v, i, j + 1, k] = fhat2_L[v, i, j, k] +
190
                                      weights[j] * flux_temp[v, i, j, k]
191
            fhat2_R[v, i, j + 1, k] = fhat2_L[v, i, j + 1, k]
192
        end
193
    end
194

195
    # Split form volume flux in orientation 3: z direction
196
    flux_temp .= zero(eltype(flux_temp))
197

198
    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
199
        u_node = get_node_vars(u, equations, dg, i, j, k, element)
200
        for kk in (k + 1):nnodes(dg)
201
            u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
202
            flux3 = volume_flux(u_node, u_node_kk, 3, equations)
203
            multiply_add_to_node_vars!(flux_temp, derivative_split[k, kk], flux3,
204
                                       equations, dg, i, j, k)
205
            multiply_add_to_node_vars!(flux_temp, derivative_split[kk, k], flux3,
206
                                       equations, dg, i, j, kk)
207
        end
208
    end
209

210
    # FV-form flux `fhat` in z direction
211
    for k in 1:(nnodes(dg) - 1), j in eachnode(dg), i in eachnode(dg)
212
        for v in eachvariable(equations)
213
            fhat3_L[v, i, j, k + 1] = fhat3_L[v, i, j, k] +
214
                                      weights[k] * flux_temp[v, i, j, k]
215
            fhat3_R[v, i, j, k + 1] = fhat3_L[v, i, j, k + 1]
216
        end
217
    end
218

219
    return nothing
220
end
221

222
# Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems.
223
@inline function calcflux_antidiffusive!(fhat1_L, fhat1_R,
224
                                         fhat2_L, fhat2_R,
225
                                         fhat3_L, fhat3_R,
226
                                         fstar1_L, fstar1_R,
227
                                         fstar2_L, fstar2_R,
228
                                         fstar3_L, fstar3_R,
229
                                         u, ::Type{<:Union{TreeMesh{3}, P4estMesh{3}}},
230
                                         nonconservative_terms::False, equations,
231
                                         limiter::SubcellLimiterIDP, dg, element, cache)
232
    @unpack antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes
233

234
    # Due to the use of LGL nodes, the DG staggered fluxes `fhat` and FV fluxes `fstar` are equal
235
    # on the element interfaces. So, they are not computed in the volume integral and set to zero
236
    # in their respective computation.
237
    # The antidiffusive fluxes are therefore zero on the element interfaces and don't need to be
238
    # computed either. They are set to zero directly after resizing the container.
239
    # This applies to the indices `i=1` and `i=nnodes(dg)+1` for `antidiffusive_flux1_L` and
240
    # `antidiffusive_flux1_R` and analogously for the other two directions.
241

242
    for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
243
        for v in eachvariable(equations)
244
            antidiffusive_flux1_L[v, i, j, k, element] = fhat1_L[v, i, j, k] -
245
                                                         fstar1_L[v, i, j, k]
246
            antidiffusive_flux1_R[v, i, j, k, element] = antidiffusive_flux1_L[v,
247
                                                                               i, j, k,
248
                                                                               element]
249
        end
250
    end
251
    for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
252
        for v in eachvariable(equations)
253
            antidiffusive_flux2_L[v, i, j, k, element] = fhat2_L[v, i, j, k] -
254
                                                         fstar2_L[v, i, j, k]
255
            antidiffusive_flux2_R[v, i, j, k, element] = antidiffusive_flux2_L[v,
256
                                                                               i, j, k,
257
                                                                               element]
258
        end
259
    end
260
    for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
261
        for v in eachvariable(equations)
262
            antidiffusive_flux3_L[v, i, j, k, element] = fhat3_L[v, i, j, k] -
263
                                                         fstar3_L[v, i, j, k]
264
            antidiffusive_flux3_R[v, i, j, k, element] = antidiffusive_flux3_L[v,
265
                                                                               i, j, k,
266
                                                                               element]
267
        end
268
    end
269

270
    return nothing
271
end
272

273
# Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems.
274
@inline function calcflux_antidiffusive!(fhat1_L, fhat1_R,
275
                                         fhat2_L, fhat2_R,
276
                                         fhat3_L, fhat3_R,
277
                                         fstar1_L, fstar1_R,
278
                                         fstar2_L, fstar2_R,
279
                                         fstar3_L, fstar3_R,
280
                                         u, ::Type{<:Union{TreeMesh{3}, P4estMesh{3}}},
281
                                         nonconservative_terms::True, equations,
282
                                         limiter::SubcellLimiterIDP, dg, element, cache)
283
    @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes
284

285
    # Due to the use of LGL nodes, the DG staggered fluxes `fhat` and FV fluxes `fstar` are equal
286
    # on the element interfaces. So, they are not computed in the volume integral and set to zero
287
    # in their respective computation.
288
    # The antidiffusive fluxes are therefore zero on the element interfaces and don't need to be
289
    # computed either. They are set to zero directly after resizing the container.
290
    # This applies to the indices `i=1` and `i=nnodes(dg)+1` for `antidiffusive_flux1_L` and
291
    # `antidiffusive_flux1_R` and analogously for the other two directions.
292

293
    for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
294
        for v in eachvariable(equations)
295
            antidiffusive_flux1_L[v, i, j, k, element] = fhat1_L[v, i, j, k] -
296
                                                         fstar1_L[v, i, j, k]
297
            antidiffusive_flux1_R[v, i, j, k, element] = fhat1_R[v, i, j, k] -
298
                                                         fstar1_R[v, i, j, k]
299
        end
300
    end
301
    for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
302
        for v in eachvariable(equations)
303
            antidiffusive_flux2_L[v, i, j, k, element] = fhat2_L[v, i, j, k] -
304
                                                         fstar2_L[v, i, j, k]
305
            antidiffusive_flux2_R[v, i, j, k, element] = fhat2_R[v, i, j, k] -
306
                                                         fstar2_R[v, i, j, k]
307
        end
308
    end
309
    for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
310
        for v in eachvariable(equations)
311
            antidiffusive_flux3_L[v, i, j, k, element] = fhat3_L[v, i, j, k] -
312
                                                         fstar3_L[v, i, j, k]
313
            antidiffusive_flux3_R[v, i, j, k, element] = fhat3_R[v, i, j, k] -
314
                                                         fstar3_R[v, i, j, k]
315
        end
316
    end
317

318
    return nothing
319
end
320
end # @muladd
321

322
Product

Resources

Company