Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
trixi-framework
GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/stepsize_dg2d.jl
5586 views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7
8
function max_dt(u, t, mesh::TreeMesh{2},
9
constant_speed::False, equations, dg::DG, cache)
10
# Avoid division by zero if the speed vanishes everywhere,
11
# e.g. for steady-state linear advection
12
max_scaled_speed = nextfloat(zero(t))
13
14
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
15
max_lambda1 = max_lambda2 = zero(max_scaled_speed)
16
for j in eachnode(dg), i in eachnode(dg)
17
u_node = get_node_vars(u, equations, dg, i, j, element)
18
lambda1, lambda2 = max_abs_speeds(u_node, equations)
19
max_lambda1 = Base.max(max_lambda1, lambda1)
20
max_lambda2 = Base.max(max_lambda2, lambda2)
21
end
22
inv_jacobian = cache.elements.inverse_jacobian[element]
23
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
24
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
25
max_scaled_speed = Base.max(max_scaled_speed,
26
inv_jacobian * (max_lambda1 + max_lambda2))
27
end
28
29
return 2 / (nnodes(dg) * max_scaled_speed)
30
end
31
32
function max_dt(u, t, mesh::TreeMesh{2},
33
constant_diffusivity::False, equations,
34
equations_parabolic::AbstractEquationsParabolic,
35
dg::DG, cache)
36
# Avoid division by zero if the diffusivity vanishes everywhere
37
max_scaled_speed = nextfloat(zero(t))
38
39
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
40
max_diffusivity_ = zero(max_scaled_speed)
41
for j in eachnode(dg), i in eachnode(dg)
42
u_node = get_node_vars(u, equations, dg, i, j, element)
43
# Note: For the currently supported parabolic equations
44
# Diffusion & Navier-Stokes, we only have one diffusivity.
45
diffusivity = max_diffusivity(u_node, equations_parabolic)
46
max_diffusivity_ = max(max_diffusivity_, diffusivity)
47
end
48
inv_jacobian = cache.elements.inverse_jacobian[element] # 2 / Δx
49
max_scaled_speed = max(max_scaled_speed, inv_jacobian^2 * max_diffusivity_)
50
end
51
52
# Factor 4 cancels with 2^2 from `inv_jacobian^2`, resulting in Δx^2
53
return 4 / (nnodes(dg) * max_scaled_speed)
54
end
55
56
function max_dt(u, t, mesh::TreeMesh{2},
57
constant_speed::True, equations, dg::DG, cache)
58
# Avoid division by zero if the speed vanishes everywhere,
59
# e.g. for steady-state linear advection
60
max_scaled_speed = nextfloat(zero(t))
61
62
max_lambda1, max_lambda2 = max_abs_speeds(equations)
63
64
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
65
inv_jacobian = cache.elements.inverse_jacobian[element]
66
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
67
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
68
max_scaled_speed = Base.max(max_scaled_speed,
69
inv_jacobian * (max_lambda1 + max_lambda2))
70
end
71
72
return 2 / (nnodes(dg) * max_scaled_speed)
73
end
74
75
function max_dt(u, t, mesh::TreeMeshParallel{2},
76
constant_speed::False, equations, dg::DG, cache)
77
# call the method accepting a general `mesh::TreeMesh{2}`
78
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
79
# and create some MPI array type, overloading broadcasting and mapreduce etc.
80
# Then, this specific array type should also work well with DiffEq etc.
81
dt = invoke(max_dt,
82
Tuple{typeof(u), typeof(t), TreeMesh{2},
83
typeof(constant_speed), typeof(equations), typeof(dg),
84
typeof(cache)},
85
u, t, mesh, constant_speed, equations, dg, cache)
86
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
87
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
88
89
return dt
90
end
91
92
function max_dt(u, t, mesh::TreeMeshParallel{2},
93
constant_speed::True, equations, dg::DG, cache)
94
# call the method accepting a general `mesh::TreeMesh{2}`
95
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
96
# and create some MPI array type, overloading broadcasting and mapreduce etc.
97
# Then, this specific array type should also work well with DiffEq etc.
98
dt = invoke(max_dt,
99
Tuple{typeof(u), typeof(t), TreeMesh{2},
100
typeof(constant_speed), typeof(equations), typeof(dg),
101
typeof(cache)},
102
u, t, mesh, constant_speed, equations, dg, cache)
103
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
104
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
105
106
return dt
107
end
108
109
# Hyperbolic-parabolic simulations are not yet supported on MPI-parallel meshes.
110
# Thus, there is no `max_dt` function for `TreeMeshParallel{2}` and
111
# `equations_parabolic::AbstractEquationsParabolic` implemented.
112
113
function max_dt(u, t,
114
mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},
115
P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}},
116
constant_speed, equations, dg::DG, cache)
117
backend = trixi_backend(u)
118
119
max_lambda = calc_max_scaled_speed(backend, u, mesh, constant_speed, equations, dg,
120
cache)
121
122
# Avoid division by zero if the speed vanishes everywhere,
123
# e.g. for steady-state linear advection
124
max_scaled_speed = Base.max(nextfloat(zero(t)), max_lambda)
125
126
return 2 / (nnodes(dg) * max_scaled_speed)
127
end
128
129
@inline function max_scaled_speed_per_element(u,
130
::Type{<:Union{StructuredMesh{2},
131
UnstructuredMesh2D,
132
P4estMesh{2},
133
T8codeMesh{2},
134
StructuredMeshView{2}}},
135
constant_speed::False, equations, dg::DG,
136
contravariant_vectors, inverse_jacobian,
137
element)
138
max_lambda1 = max_lambda2 = zero(eltype(u))
139
for j in eachnode(dg), i in eachnode(dg)
140
u_node = get_node_vars(u, equations, dg, i, j, element)
141
lambda1, lambda2 = max_abs_speeds(u_node, equations)
142
143
# Local speeds transformed to the reference element
144
Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
145
i, j, element)
146
lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2)
147
Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
148
i, j, element)
149
lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2)
150
151
inv_jacobian = abs(inverse_jacobian[i, j, element])
152
153
max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian)
154
max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian)
155
end
156
return max_lambda1 + max_lambda2
157
end
158
159
function max_dt(u, t,
160
mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`
161
constant_diffusivity::False, equations,
162
equations_parabolic::AbstractEquationsParabolic,
163
dg::DG, cache)
164
# Avoid division by zero if the diffusivity vanishes everywhere
165
max_scaled_diffusivity = nextfloat(zero(t))
166
167
@unpack contravariant_vectors, inverse_jacobian = cache.elements
168
169
@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
170
max_diffusivity1 = max_diffusivity2 = zero(max_scaled_diffusivity)
171
for j in eachnode(dg), i in eachnode(dg)
172
u_node = get_node_vars(u, equations, dg, i, j, element)
173
diffusivity = max_diffusivity(u_node, equations_parabolic)
174
175
# Local diffusivity transformed to the reference element
176
Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
177
i, j, element)
178
# The metric terms are squared due to the second derivatives in the parabolic terms,
179
# which lead to application of the chain rule (coordinate transformation) twice.
180
# See for instance also the implementation in FLEXI
181
# https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77
182
# or FLUXO:
183
# https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132
184
diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2)
185
Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
186
i, j, element)
187
diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2)
188
189
inv_jacobian = abs(inverse_jacobian[i, j, element])
190
191
max_diffusivity1 = Base.max(max_diffusivity1,
192
diffusivity1_transformed * inv_jacobian^2)
193
max_diffusivity2 = Base.max(max_diffusivity2,
194
diffusivity2_transformed * inv_jacobian^2)
195
end
196
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
197
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
198
max_scaled_diffusivity = Base.max(max_scaled_diffusivity,
199
max_diffusivity1 + max_diffusivity2)
200
end
201
202
return 4 / (nnodes(dg) * max_scaled_diffusivity)
203
end
204
205
@inline function max_scaled_speed_per_element(u,
206
::Type{<:Union{StructuredMesh{2},
207
UnstructuredMesh2D,
208
P4estMesh{2},
209
P4estMeshView{2},
210
T8codeMesh{2},
211
StructuredMeshView{2}}},
212
constant_speed::True, equations, dg::DG,
213
contravariant_vectors, inverse_jacobian,
214
element)
215
max_scaled_speed = zero(eltype(u))
216
max_lambda1, max_lambda2 = max_abs_speeds(equations)
217
for j in eachnode(dg), i in eachnode(dg)
218
# Local speeds transformed to the reference element
219
Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
220
i, j, element)
221
lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2)
222
Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
223
i, j, element)
224
lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2)
225
226
inv_jacobian = abs(inverse_jacobian[i, j, element])
227
228
max_scaled_speed = Base.max(max_scaled_speed,
229
inv_jacobian *
230
(lambda1_transformed + lambda2_transformed))
231
end
232
233
return max_scaled_speed
234
end
235
236
function max_dt(u, t,
237
mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`
238
constant_diffusivity::True, equations,
239
equations_parabolic::AbstractEquationsParabolic,
240
dg::DG, cache)
241
# Avoid division by zero if the diffusivity vanishes everywhere
242
max_scaled_diffusivity = nextfloat(zero(t))
243
244
diffusivity = max_diffusivity(equations_parabolic)
245
246
@unpack contravariant_vectors, inverse_jacobian = cache.elements
247
248
@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
249
max_diffusivity1 = max_diffusivity2 = zero(max_scaled_diffusivity)
250
for j in eachnode(dg), i in eachnode(dg)
251
# Local diffusivity transformed to the reference element
252
Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors,
253
i, j, element)
254
# The metric terms are squared due to the second derivatives in the parabolic terms,
255
# which lead to application of the chain rule (coordinate transformation) twice.
256
# See for instance also the implementation in FLEXI
257
# https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77
258
# or FLUXO:
259
# https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132
260
diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2)
261
Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors,
262
i, j, element)
263
diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2)
264
265
inv_jacobian = abs(inverse_jacobian[i, j, element])
266
267
max_diffusivity1 = Base.max(max_diffusivity1,
268
diffusivity1_transformed * inv_jacobian^2)
269
max_diffusivity2 = Base.max(max_diffusivity2,
270
diffusivity2_transformed * inv_jacobian^2)
271
end
272
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
273
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
274
max_scaled_diffusivity = Base.max(max_scaled_diffusivity,
275
max_diffusivity1 + max_diffusivity2)
276
end
277
278
return 4 / (nnodes(dg) * max_scaled_diffusivity)
279
end
280
281
function max_dt(u, t, mesh::P4estMeshParallel{2},
282
constant_speed::False, equations, dg::DG, cache)
283
# call the method accepting a general `mesh::P4estMesh{2}`
284
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
285
# and create some MPI array type, overloading broadcasting and mapreduce etc.
286
# Then, this specific array type should also work well with DiffEq etc.
287
dt = invoke(max_dt,
288
Tuple{typeof(u), typeof(t), P4estMesh{2},
289
typeof(constant_speed), typeof(equations), typeof(dg),
290
typeof(cache)},
291
u, t, mesh, constant_speed, equations, dg, cache)
292
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
293
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
294
295
return dt
296
end
297
298
function max_dt(u, t, mesh::P4estMeshParallel{2},
299
constant_speed::True, equations, dg::DG, cache)
300
# call the method accepting a general `mesh::P4estMesh{2}`
301
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
302
# and create some MPI array type, overloading broadcasting and mapreduce etc.
303
# Then, this specific array type should also work well with DiffEq etc.
304
dt = invoke(max_dt,
305
Tuple{typeof(u), typeof(t), P4estMesh{2},
306
typeof(constant_speed), typeof(equations), typeof(dg),
307
typeof(cache)},
308
u, t, mesh, constant_speed, equations, dg, cache)
309
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
310
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
311
312
return dt
313
end
314
315
function max_dt(u, t, mesh::T8codeMeshParallel{2},
316
constant_speed::False, equations, dg::DG, cache)
317
# call the method accepting a general `mesh::T8codeMesh{2}`
318
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
319
# and create some MPI array type, overloading broadcasting and mapreduce etc.
320
# Then, this specific array type should also work well with DiffEq etc.
321
dt = invoke(max_dt,
322
Tuple{typeof(u), typeof(t), T8codeMesh{2},
323
typeof(constant_speed), typeof(equations), typeof(dg),
324
typeof(cache)},
325
u, t, mesh, constant_speed, equations, dg, cache)
326
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
327
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
328
329
return dt
330
end
331
332
function max_dt(u, t, mesh::T8codeMeshParallel{2},
333
constant_speed::True, equations, dg::DG, cache)
334
# call the method accepting a general `mesh::T8codeMesh{2}`
335
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
336
# and create some MPI array type, overloading broadcasting and mapreduce etc.
337
# Then, this specific array type should also work well with DiffEq etc.
338
dt = invoke(max_dt,
339
Tuple{typeof(u), typeof(t), T8codeMesh{2},
340
typeof(constant_speed), typeof(equations), typeof(dg),
341
typeof(cache)},
342
u, t, mesh, constant_speed, equations, dg, cache)
343
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
344
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
345
346
return dt
347
end
348
end # @muladd
349
350