CoCalc -- stepsize

GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/stepsize_dg3d.jl
⁵⁵⁸⁶ views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7

8
function max_dt(u, t, mesh::TreeMesh{3},
9
                constant_speed::False, equations, dg::DG, cache)
10
    # Avoid division by zero if the speed vanishes everywhere,
11
    # e.g. for steady-state linear advection
12
    max_scaled_speed = nextfloat(zero(t))
13

14
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
15
        max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)
16
        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
17
            u_node = get_node_vars(u, equations, dg, i, j, k, element)
18
            lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
19
            max_lambda1 = Base.max(max_lambda1, lambda1)
20
            max_lambda2 = Base.max(max_lambda2, lambda2)
21
            max_lambda3 = Base.max(max_lambda3, lambda3)
22
        end
23
        inv_jacobian = cache.elements.inverse_jacobian[element]
24
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
25
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
26
        max_scaled_speed = Base.max(max_scaled_speed,
27
                                    inv_jacobian *
28
                                    (max_lambda1 + max_lambda2 + max_lambda3))
29
    end
30

31
    return 2 / (nnodes(dg) * max_scaled_speed)
32
end
33

34
function max_dt(u, t, mesh::TreeMesh{3},
35
                constant_diffusivity::False, equations,
36
                equations_parabolic::AbstractEquationsParabolic,
37
                dg::DG, cache)
38
    # Avoid division by zero if the diffusivity vanishes everywhere
39
    max_scaled_diffusivity = nextfloat(zero(t))
40

41
    @batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
42
        max_diffusivity_ = zero(max_scaled_diffusivity)
43
        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
44
            u_node = get_node_vars(u, equations, dg, i, j, k, element)
45
            # Note: For the currently supported parabolic equations
46
            # Diffusion & Navier-Stokes, we only have one diffusivity.
47
            diffusivity = max_diffusivity(u_node, equations_parabolic)
48
            max_diffusivity_ = max(max_diffusivity_, diffusivity)
49
        end
50
        inv_jacobian = cache.elements.inverse_jacobian[element] # 2 / Δx
51
        max_scaled_diffusivity = max(max_scaled_diffusivity,
52
                                     inv_jacobian^2 * max_diffusivity_)
53
    end
54

55
    # Factor 4 cancels with 2^2 from `inv_jacobian^2`, resulting in Δx^2
56
    return 4 / (nnodes(dg) * max_scaled_diffusivity)
57
end
58

59
function max_dt(u, t, mesh::TreeMesh{3},
60
                constant_speed::True, equations, dg::DG, cache)
61
    # Avoid division by zero if the speed vanishes everywhere,
62
    # e.g. for steady-state linear advection
63
    max_scaled_speed = nextfloat(zero(t))
64

65
    max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
66

67
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
68
        inv_jacobian = cache.elements.inverse_jacobian[element]
69
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
70
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
71
        max_scaled_speed = Base.max(max_scaled_speed,
72
                                    inv_jacobian *
73
                                    (max_lambda1 + max_lambda2 + max_lambda3))
74
    end
75

76
    return 2 / (nnodes(dg) * max_scaled_speed)
77
end
78

79
function max_dt(u, t,
80
                mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
81
                constant_speed, equations, dg::DG, cache)
82
    backend = trixi_backend(u)
83

84
    max_lambda = calc_max_scaled_speed(backend, u, mesh, constant_speed, equations, dg,
85
                                       cache)
86

87
    # Avoid division by zero if the speed vanishes everywhere,
88
    # e.g. for steady-state linear advection
89
    max_scaled_speed = Base.max(nextfloat(zero(t)), max_lambda)
90

91
    return 2 / (nnodes(dg) * max_scaled_speed)
92
end
93

94
@inline function max_scaled_speed_per_element(u,
95
                                              ::Type{<:Union{StructuredMesh{3},
96
                                                             P4estMesh{3},
97
                                                             T8codeMesh{3}}},
98
                                              constant_speed::False, equations, dg,
99
                                              contravariant_vectors, inverse_jacobian,
100
                                              element)
101
    max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u))
102
    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
103
        u_node = get_node_vars(u, equations, dg, i, j, k, element)
104
        lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
105

106
        Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
107
                                                    i, j, k, element)
108
        lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3)
109
        Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
110
                                                    i, j, k, element)
111
        lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3)
112
        Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
113
                                                    i, j, k, element)
114
        lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3)
115

116
        inv_jacobian = abs(inverse_jacobian[i, j, k, element])
117

118
        max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed)
119
        max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed)
120
        max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed)
121
    end
122
    return max_lambda1 + max_lambda2 + max_lambda3
123
end
124

125
function max_dt(u, t,
126
                mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`
127
                constant_diffusivity::False, equations,
128
                equations_parabolic::AbstractEquationsParabolic,
129
                dg::DG, cache)
130
    # Avoid division by zero if the diffusivity vanishes everywhere
131
    max_scaled_diffusivity = nextfloat(zero(t))
132

133
    @unpack contravariant_vectors, inverse_jacobian = cache.elements
134

135
    @batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
136
        max_diffusivity1 = max_diffusivity2 = max_diffusivity3 = zero(max_scaled_diffusivity)
137
        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
138
            u_node = get_node_vars(u, equations, dg, i, j, k, element)
139
            diffusivity = max_diffusivity(u_node, equations_parabolic)
140

141
            # Local diffusivity transformed to the reference element
142
            Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
143
                                                        i, j, k, element)
144
            # The metric terms are squared due to the second derivatives in the parabolic terms,
145
            # which lead to application of the chain rule (coordinate transformation) twice.
146
            # See for instance also the implementation in FLEXI
147
            # https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77
148
            # or FLUXO:
149
            # https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132
150
            diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2 + Ja13^2)
151
            Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
152
                                                        i, j, k, element)
153
            diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2 + Ja23^2)
154
            Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
155
                                                        i, j, k, element)
156
            diffusivity3_transformed = diffusivity * (Ja31^2 + Ja32^2 + Ja33^2)
157

158
            inv_jacobian = abs(inverse_jacobian[i, j, k, element])
159

160
            max_diffusivity1 = Base.max(max_diffusivity1,
161
                                        diffusivity1_transformed * inv_jacobian^2)
162
            max_diffusivity2 = Base.max(max_diffusivity2,
163
                                        diffusivity2_transformed * inv_jacobian^2)
164
            max_diffusivity3 = Base.max(max_diffusivity3,
165
                                        diffusivity3_transformed * inv_jacobian^2)
166
        end
167
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
168
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
169
        max_scaled_diffusivity = Base.max(max_scaled_diffusivity,
170
                                          max_diffusivity1 + max_diffusivity2 +
171
                                          max_diffusivity3)
172
    end
173

174
    return 4 / (nnodes(dg) * max_scaled_diffusivity)
175
end
176

177
@inline function max_scaled_speed_per_element(u,
178
                                              ::Type{<:Union{StructuredMesh{3},
179
                                                             P4estMesh{3},
180
                                                             T8codeMesh{3}}},
181
                                              constant_speed::True, equations, dg::DG,
182
                                              contravariant_vectors, inverse_jacobian,
183
                                              element)
184
    max_scaled_speed = zero(eltype(u))
185
    max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
186
    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
187
        Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
188
                                                    i, j, k, element)
189
        lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 +
190
                                  Ja13 * max_lambda3)
191
        Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
192
                                                    i, j, k, element)
193
        lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 +
194
                                  Ja23 * max_lambda3)
195
        Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
196
                                                    i, j, k, element)
197
        lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 +
198
                                  Ja33 * max_lambda3)
199

200
        inv_jacobian = abs(inverse_jacobian[i, j, k, element])
201

202
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
203
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
204
        max_scaled_speed = Base.max(max_scaled_speed,
205
                                    inv_jacobian *
206
                                    (lambda1_transformed + lambda2_transformed +
207
                                     lambda3_transformed))
208
    end
209

210
    return max_scaled_speed
211
end
212

213
function max_dt(u, t,
214
                mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`
215
                constant_diffusivity::True, equations,
216
                equations_parabolic::AbstractEquationsParabolic,
217
                dg::DG, cache)
218
    # Avoid division by zero if the diffusivity vanishes everywhere
219
    max_scaled_diffusivity = nextfloat(zero(t))
220

221
    diffusivity = max_diffusivity(equations_parabolic)
222

223
    @unpack contravariant_vectors, inverse_jacobian = cache.elements
224

225
    @batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
226
        max_diffusivity1 = max_diffusivity2 = max_diffusivity3 = zero(max_scaled_diffusivity)
227
        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
228
            # Local diffusivity transformed to the reference element
229
            Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
230
                                                        i, j, k, element)
231
            # The metric terms are squared due to the second derivatives in the parabolic terms,
232
            # which lead to application of the chain rule (coordinate transformation) twice.
233
            # See for instance also the implementation in FLEXI
234
            # https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77
235
            # or FLUXO:
236
            # https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132
237
            diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2 + Ja13^2)
238
            Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
239
                                                        i, j, k, element)
240
            diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2 + Ja23^2)
241
            Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
242
                                                        i, j, k, element)
243
            diffusivity3_transformed = diffusivity * (Ja31^2 + Ja32^2 + Ja33^2)
244

245
            inv_jacobian = abs(inverse_jacobian[i, j, k, element])
246

247
            max_diffusivity1 = Base.max(max_diffusivity1,
248
                                        diffusivity1_transformed * inv_jacobian^2)
249
            max_diffusivity2 = Base.max(max_diffusivity2,
250
                                        diffusivity2_transformed * inv_jacobian^2)
251
            max_diffusivity3 = Base.max(max_diffusivity3,
252
                                        diffusivity3_transformed * inv_jacobian^2)
253
        end
254
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
255
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
256
        max_scaled_diffusivity = Base.max(max_scaled_diffusivity,
257
                                          max_diffusivity1 + max_diffusivity2 +
258
                                          max_diffusivity3)
259
    end
260

261
    return 4 / (nnodes(dg) * max_scaled_diffusivity)
262
end
263

264
function max_dt(u, t, mesh::P4estMeshParallel{3},
265
                constant_speed::False, equations, dg::DG, cache)
266
    # call the method accepting a general `mesh::P4estMesh{3}`
267
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
268
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
269
    #       Then, this specific array type should also work well with DiffEq etc.
270
    dt = invoke(max_dt,
271
                Tuple{typeof(u), typeof(t), P4estMesh{3},
272
                      typeof(constant_speed), typeof(equations), typeof(dg),
273
                      typeof(cache)},
274
                u, t, mesh, constant_speed, equations, dg, cache)
275
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
276
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
277

278
    return dt
279
end
280

281
function max_dt(u, t, mesh::P4estMeshParallel{3},
282
                constant_speed::True, equations, dg::DG, cache)
283
    # call the method accepting a general `mesh::P4estMesh{3}`
284
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
285
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
286
    #       Then, this specific array type should also work well with DiffEq etc.
287
    dt = invoke(max_dt,
288
                Tuple{typeof(u), typeof(t), P4estMesh{3},
289
                      typeof(constant_speed), typeof(equations), typeof(dg),
290
                      typeof(cache)},
291
                u, t, mesh, constant_speed, equations, dg, cache)
292
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
293
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
294

295
    return dt
296
end
297

298
function max_dt(u, t, mesh::T8codeMeshParallel{3},
299
                constant_speed::False, equations, dg::DG, cache)
300
    # call the method accepting a general `mesh::T8codeMesh{3}`
301
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
302
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
303
    #       Then, this specific array type should also work well with DiffEq etc.
304
    dt = invoke(max_dt,
305
                Tuple{typeof(u), typeof(t), T8codeMesh{3},
306
                      typeof(constant_speed), typeof(equations), typeof(dg),
307
                      typeof(cache)},
308
                u, t, mesh, constant_speed, equations, dg, cache)
309
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
310
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
311

312
    return dt
313
end
314

315
function max_dt(u, t, mesh::T8codeMeshParallel{3},
316
                constant_speed::True, equations, dg::DG, cache)
317
    # call the method accepting a general `mesh::T8codeMesh{3}`
318
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
319
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
320
    #       Then, this specific array type should also work well with DiffEq etc.
321
    dt = invoke(max_dt,
322
                Tuple{typeof(u), typeof(t), T8codeMesh{3},
323
                      typeof(constant_speed), typeof(equations), typeof(dg),
324
                      typeof(cache)},
325
                u, t, mesh, constant_speed, equations, dg, cache)
326
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
327
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
328

329
    return dt
330
end
331
end # @muladd
332

333
Product

Resources

Company