CoCalc -- stepsize

GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/stepsize_dg2d.jl
⁵⁵⁸⁶ views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7

8
function max_dt(u, t, mesh::TreeMesh{2},
9
                constant_speed::False, equations, dg::DG, cache)
10
    # Avoid division by zero if the speed vanishes everywhere,
11
    # e.g. for steady-state linear advection
12
    max_scaled_speed = nextfloat(zero(t))
13

14
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
15
        max_lambda1 = max_lambda2 = zero(max_scaled_speed)
16
        for j in eachnode(dg), i in eachnode(dg)
17
            u_node = get_node_vars(u, equations, dg, i, j, element)
18
            lambda1, lambda2 = max_abs_speeds(u_node, equations)
19
            max_lambda1 = Base.max(max_lambda1, lambda1)
20
            max_lambda2 = Base.max(max_lambda2, lambda2)
21
        end
22
        inv_jacobian = cache.elements.inverse_jacobian[element]
23
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
24
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
25
        max_scaled_speed = Base.max(max_scaled_speed,
26
                                    inv_jacobian * (max_lambda1 + max_lambda2))
27
    end
28

29
    return 2 / (nnodes(dg) * max_scaled_speed)
30
end
31

32
function max_dt(u, t, mesh::TreeMesh{2},
33
                constant_diffusivity::False, equations,
34
                equations_parabolic::AbstractEquationsParabolic,
35
                dg::DG, cache)
36
    # Avoid division by zero if the diffusivity vanishes everywhere
37
    max_scaled_speed = nextfloat(zero(t))
38

39
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
40
        max_diffusivity_ = zero(max_scaled_speed)
41
        for j in eachnode(dg), i in eachnode(dg)
42
            u_node = get_node_vars(u, equations, dg, i, j, element)
43
            # Note: For the currently supported parabolic equations
44
            # Diffusion & Navier-Stokes, we only have one diffusivity.
45
            diffusivity = max_diffusivity(u_node, equations_parabolic)
46
            max_diffusivity_ = max(max_diffusivity_, diffusivity)
47
        end
48
        inv_jacobian = cache.elements.inverse_jacobian[element] # 2 / Δx
49
        max_scaled_speed = max(max_scaled_speed, inv_jacobian^2 * max_diffusivity_)
50
    end
51

52
    # Factor 4 cancels with 2^2 from `inv_jacobian^2`, resulting in Δx^2
53
    return 4 / (nnodes(dg) * max_scaled_speed)
54
end
55

56
function max_dt(u, t, mesh::TreeMesh{2},
57
                constant_speed::True, equations, dg::DG, cache)
58
    # Avoid division by zero if the speed vanishes everywhere,
59
    # e.g. for steady-state linear advection
60
    max_scaled_speed = nextfloat(zero(t))
61

62
    max_lambda1, max_lambda2 = max_abs_speeds(equations)
63

64
    @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
65
        inv_jacobian = cache.elements.inverse_jacobian[element]
66
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
67
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
68
        max_scaled_speed = Base.max(max_scaled_speed,
69
                                    inv_jacobian * (max_lambda1 + max_lambda2))
70
    end
71

72
    return 2 / (nnodes(dg) * max_scaled_speed)
73
end
74

75
function max_dt(u, t, mesh::TreeMeshParallel{2},
76
                constant_speed::False, equations, dg::DG, cache)
77
    # call the method accepting a general `mesh::TreeMesh{2}`
78
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
79
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
80
    #       Then, this specific array type should also work well with DiffEq etc.
81
    dt = invoke(max_dt,
82
                Tuple{typeof(u), typeof(t), TreeMesh{2},
83
                      typeof(constant_speed), typeof(equations), typeof(dg),
84
                      typeof(cache)},
85
                u, t, mesh, constant_speed, equations, dg, cache)
86
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
87
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
88

89
    return dt
90
end
91

92
function max_dt(u, t, mesh::TreeMeshParallel{2},
93
                constant_speed::True, equations, dg::DG, cache)
94
    # call the method accepting a general `mesh::TreeMesh{2}`
95
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
96
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
97
    #       Then, this specific array type should also work well with DiffEq etc.
98
    dt = invoke(max_dt,
99
                Tuple{typeof(u), typeof(t), TreeMesh{2},
100
                      typeof(constant_speed), typeof(equations), typeof(dg),
101
                      typeof(cache)},
102
                u, t, mesh, constant_speed, equations, dg, cache)
103
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
104
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
105

106
    return dt
107
end
108

109
# Hyperbolic-parabolic simulations are not yet supported on MPI-parallel meshes.
110
# Thus, there is no `max_dt` function for `TreeMeshParallel{2}` and
111
# `equations_parabolic::AbstractEquationsParabolic` implemented.
112

113
function max_dt(u, t,
114
                mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},
115
                            P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}},
116
                constant_speed, equations, dg::DG, cache)
117
    backend = trixi_backend(u)
118

119
    max_lambda = calc_max_scaled_speed(backend, u, mesh, constant_speed, equations, dg,
120
                                       cache)
121

122
    # Avoid division by zero if the speed vanishes everywhere,
123
    # e.g. for steady-state linear advection
124
    max_scaled_speed = Base.max(nextfloat(zero(t)), max_lambda)
125

126
    return 2 / (nnodes(dg) * max_scaled_speed)
127
end
128

129
@inline function max_scaled_speed_per_element(u,
130
                                              ::Type{<:Union{StructuredMesh{2},
131
                                                             UnstructuredMesh2D,
132
                                                             P4estMesh{2},
133
                                                             T8codeMesh{2},
134
                                                             StructuredMeshView{2}}},
135
                                              constant_speed::False, equations, dg::DG,
136
                                              contravariant_vectors, inverse_jacobian,
137
                                              element)
138
    max_lambda1 = max_lambda2 = zero(eltype(u))
139
    for j in eachnode(dg), i in eachnode(dg)
140
        u_node = get_node_vars(u, equations, dg, i, j, element)
141
        lambda1, lambda2 = max_abs_speeds(u_node, equations)
142

143
        # Local speeds transformed to the reference element
144
        Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
145
                                              i, j, element)
146
        lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2)
147
        Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
148
                                              i, j, element)
149
        lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2)
150

151
        inv_jacobian = abs(inverse_jacobian[i, j, element])
152

153
        max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian)
154
        max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian)
155
    end
156
    return max_lambda1 + max_lambda2
157
end
158

159
function max_dt(u, t,
160
                mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`
161
                constant_diffusivity::False, equations,
162
                equations_parabolic::AbstractEquationsParabolic,
163
                dg::DG, cache)
164
    # Avoid division by zero if the diffusivity vanishes everywhere
165
    max_scaled_diffusivity = nextfloat(zero(t))
166

167
    @unpack contravariant_vectors, inverse_jacobian = cache.elements
168

169
    @batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
170
        max_diffusivity1 = max_diffusivity2 = zero(max_scaled_diffusivity)
171
        for j in eachnode(dg), i in eachnode(dg)
172
            u_node = get_node_vars(u, equations, dg, i, j, element)
173
            diffusivity = max_diffusivity(u_node, equations_parabolic)
174

175
            # Local diffusivity transformed to the reference element
176
            Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
177
                                                  i, j, element)
178
            # The metric terms are squared due to the second derivatives in the parabolic terms,
179
            # which lead to application of the chain rule (coordinate transformation) twice.
180
            # See for instance also the implementation in FLEXI
181
            # https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77
182
            # or FLUXO:
183
            # https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132
184
            diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2)
185
            Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
186
                                                  i, j, element)
187
            diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2)
188

189
            inv_jacobian = abs(inverse_jacobian[i, j, element])
190

191
            max_diffusivity1 = Base.max(max_diffusivity1,
192
                                        diffusivity1_transformed * inv_jacobian^2)
193
            max_diffusivity2 = Base.max(max_diffusivity2,
194
                                        diffusivity2_transformed * inv_jacobian^2)
195
        end
196
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
197
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
198
        max_scaled_diffusivity = Base.max(max_scaled_diffusivity,
199
                                          max_diffusivity1 + max_diffusivity2)
200
    end
201

202
    return 4 / (nnodes(dg) * max_scaled_diffusivity)
203
end
204

205
@inline function max_scaled_speed_per_element(u,
206
                                              ::Type{<:Union{StructuredMesh{2},
207
                                                             UnstructuredMesh2D,
208
                                                             P4estMesh{2},
209
                                                             P4estMeshView{2},
210
                                                             T8codeMesh{2},
211
                                                             StructuredMeshView{2}}},
212
                                              constant_speed::True, equations, dg::DG,
213
                                              contravariant_vectors, inverse_jacobian,
214
                                              element)
215
    max_scaled_speed = zero(eltype(u))
216
    max_lambda1, max_lambda2 = max_abs_speeds(equations)
217
    for j in eachnode(dg), i in eachnode(dg)
218
        # Local speeds transformed to the reference element
219
        Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
220
                                              i, j, element)
221
        lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2)
222
        Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
223
                                              i, j, element)
224
        lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2)
225

226
        inv_jacobian = abs(inverse_jacobian[i, j, element])
227

228
        max_scaled_speed = Base.max(max_scaled_speed,
229
                                    inv_jacobian *
230
                                    (lambda1_transformed + lambda2_transformed))
231
    end
232

233
    return max_scaled_speed
234
end
235

236
function max_dt(u, t,
237
                mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`
238
                constant_diffusivity::True, equations,
239
                equations_parabolic::AbstractEquationsParabolic,
240
                dg::DG, cache)
241
    # Avoid division by zero if the diffusivity vanishes everywhere
242
    max_scaled_diffusivity = nextfloat(zero(t))
243

244
    diffusivity = max_diffusivity(equations_parabolic)
245

246
    @unpack contravariant_vectors, inverse_jacobian = cache.elements
247

248
    @batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
249
        max_diffusivity1 = max_diffusivity2 = zero(max_scaled_diffusivity)
250
        for j in eachnode(dg), i in eachnode(dg)
251
            # Local diffusivity transformed to the reference element
252
            Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
253
                                                  i, j, element)
254
            # The metric terms are squared due to the second derivatives in the parabolic terms,
255
            # which lead to application of the chain rule (coordinate transformation) twice.
256
            # See for instance also the implementation in FLEXI
257
            # https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77
258
            # or FLUXO:
259
            # https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132
260
            diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2)
261
            Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
262
                                                  i, j, element)
263
            diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2)
264

265
            inv_jacobian = abs(inverse_jacobian[i, j, element])
266

267
            max_diffusivity1 = Base.max(max_diffusivity1,
268
                                        diffusivity1_transformed * inv_jacobian^2)
269
            max_diffusivity2 = Base.max(max_diffusivity2,
270
                                        diffusivity2_transformed * inv_jacobian^2)
271
        end
272
        # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
273
        # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
274
        max_scaled_diffusivity = Base.max(max_scaled_diffusivity,
275
                                          max_diffusivity1 + max_diffusivity2)
276
    end
277

278
    return 4 / (nnodes(dg) * max_scaled_diffusivity)
279
end
280

281
function max_dt(u, t, mesh::P4estMeshParallel{2},
282
                constant_speed::False, equations, dg::DG, cache)
283
    # call the method accepting a general `mesh::P4estMesh{2}`
284
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
285
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
286
    #       Then, this specific array type should also work well with DiffEq etc.
287
    dt = invoke(max_dt,
288
                Tuple{typeof(u), typeof(t), P4estMesh{2},
289
                      typeof(constant_speed), typeof(equations), typeof(dg),
290
                      typeof(cache)},
291
                u, t, mesh, constant_speed, equations, dg, cache)
292
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
293
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
294

295
    return dt
296
end
297

298
function max_dt(u, t, mesh::P4estMeshParallel{2},
299
                constant_speed::True, equations, dg::DG, cache)
300
    # call the method accepting a general `mesh::P4estMesh{2}`
301
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
302
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
303
    #       Then, this specific array type should also work well with DiffEq etc.
304
    dt = invoke(max_dt,
305
                Tuple{typeof(u), typeof(t), P4estMesh{2},
306
                      typeof(constant_speed), typeof(equations), typeof(dg),
307
                      typeof(cache)},
308
                u, t, mesh, constant_speed, equations, dg, cache)
309
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
310
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
311

312
    return dt
313
end
314

315
function max_dt(u, t, mesh::T8codeMeshParallel{2},
316
                constant_speed::False, equations, dg::DG, cache)
317
    # call the method accepting a general `mesh::T8codeMesh{2}`
318
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
319
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
320
    #       Then, this specific array type should also work well with DiffEq etc.
321
    dt = invoke(max_dt,
322
                Tuple{typeof(u), typeof(t), T8codeMesh{2},
323
                      typeof(constant_speed), typeof(equations), typeof(dg),
324
                      typeof(cache)},
325
                u, t, mesh, constant_speed, equations, dg, cache)
326
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
327
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
328

329
    return dt
330
end
331

332
function max_dt(u, t, mesh::T8codeMeshParallel{2},
333
                constant_speed::True, equations, dg::DG, cache)
334
    # call the method accepting a general `mesh::T8codeMesh{2}`
335
    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
336
    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
337
    #       Then, this specific array type should also work well with DiffEq etc.
338
    dt = invoke(max_dt,
339
                Tuple{typeof(u), typeof(t), T8codeMesh{2},
340
                      typeof(constant_speed), typeof(equations), typeof(dg),
341
                      typeof(cache)},
342
                u, t, mesh, constant_speed, equations, dg, cache)
343
    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
344
    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
345

346
    return dt
347
end
348
end # @muladd
349

350
Product

Resources

Company