Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
trixi-framework
GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/callbacks_step/stepsize_dg3d.jl
5586 views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7
8
function max_dt(u, t, mesh::TreeMesh{3},
9
constant_speed::False, equations, dg::DG, cache)
10
# Avoid division by zero if the speed vanishes everywhere,
11
# e.g. for steady-state linear advection
12
max_scaled_speed = nextfloat(zero(t))
13
14
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
15
max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)
16
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
17
u_node = get_node_vars(u, equations, dg, i, j, k, element)
18
lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
19
max_lambda1 = Base.max(max_lambda1, lambda1)
20
max_lambda2 = Base.max(max_lambda2, lambda2)
21
max_lambda3 = Base.max(max_lambda3, lambda3)
22
end
23
inv_jacobian = cache.elements.inverse_jacobian[element]
24
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
25
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
26
max_scaled_speed = Base.max(max_scaled_speed,
27
inv_jacobian *
28
(max_lambda1 + max_lambda2 + max_lambda3))
29
end
30
31
return 2 / (nnodes(dg) * max_scaled_speed)
32
end
33
34
function max_dt(u, t, mesh::TreeMesh{3},
35
constant_diffusivity::False, equations,
36
equations_parabolic::AbstractEquationsParabolic,
37
dg::DG, cache)
38
# Avoid division by zero if the diffusivity vanishes everywhere
39
max_scaled_diffusivity = nextfloat(zero(t))
40
41
@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
42
max_diffusivity_ = zero(max_scaled_diffusivity)
43
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
44
u_node = get_node_vars(u, equations, dg, i, j, k, element)
45
# Note: For the currently supported parabolic equations
46
# Diffusion & Navier-Stokes, we only have one diffusivity.
47
diffusivity = max_diffusivity(u_node, equations_parabolic)
48
max_diffusivity_ = max(max_diffusivity_, diffusivity)
49
end
50
inv_jacobian = cache.elements.inverse_jacobian[element] # 2 / Δx
51
max_scaled_diffusivity = max(max_scaled_diffusivity,
52
inv_jacobian^2 * max_diffusivity_)
53
end
54
55
# Factor 4 cancels with 2^2 from `inv_jacobian^2`, resulting in Δx^2
56
return 4 / (nnodes(dg) * max_scaled_diffusivity)
57
end
58
59
function max_dt(u, t, mesh::TreeMesh{3},
60
constant_speed::True, equations, dg::DG, cache)
61
# Avoid division by zero if the speed vanishes everywhere,
62
# e.g. for steady-state linear advection
63
max_scaled_speed = nextfloat(zero(t))
64
65
max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
66
67
@batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache)
68
inv_jacobian = cache.elements.inverse_jacobian[element]
69
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
70
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
71
max_scaled_speed = Base.max(max_scaled_speed,
72
inv_jacobian *
73
(max_lambda1 + max_lambda2 + max_lambda3))
74
end
75
76
return 2 / (nnodes(dg) * max_scaled_speed)
77
end
78
79
function max_dt(u, t,
80
mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
81
constant_speed, equations, dg::DG, cache)
82
backend = trixi_backend(u)
83
84
max_lambda = calc_max_scaled_speed(backend, u, mesh, constant_speed, equations, dg,
85
cache)
86
87
# Avoid division by zero if the speed vanishes everywhere,
88
# e.g. for steady-state linear advection
89
max_scaled_speed = Base.max(nextfloat(zero(t)), max_lambda)
90
91
return 2 / (nnodes(dg) * max_scaled_speed)
92
end
93
94
@inline function max_scaled_speed_per_element(u,
95
::Type{<:Union{StructuredMesh{3},
96
P4estMesh{3},
97
T8codeMesh{3}}},
98
constant_speed::False, equations, dg,
99
contravariant_vectors, inverse_jacobian,
100
element)
101
max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u))
102
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
103
u_node = get_node_vars(u, equations, dg, i, j, k, element)
104
lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
105
106
Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
107
i, j, k, element)
108
lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3)
109
Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
110
i, j, k, element)
111
lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3)
112
Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
113
i, j, k, element)
114
lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3)
115
116
inv_jacobian = abs(inverse_jacobian[i, j, k, element])
117
118
max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed)
119
max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed)
120
max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed)
121
end
122
return max_lambda1 + max_lambda2 + max_lambda3
123
end
124
125
function max_dt(u, t,
126
mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`
127
constant_diffusivity::False, equations,
128
equations_parabolic::AbstractEquationsParabolic,
129
dg::DG, cache)
130
# Avoid division by zero if the diffusivity vanishes everywhere
131
max_scaled_diffusivity = nextfloat(zero(t))
132
133
@unpack contravariant_vectors, inverse_jacobian = cache.elements
134
135
@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
136
max_diffusivity1 = max_diffusivity2 = max_diffusivity3 = zero(max_scaled_diffusivity)
137
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
138
u_node = get_node_vars(u, equations, dg, i, j, k, element)
139
diffusivity = max_diffusivity(u_node, equations_parabolic)
140
141
# Local diffusivity transformed to the reference element
142
Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
143
i, j, k, element)
144
# The metric terms are squared due to the second derivatives in the parabolic terms,
145
# which lead to application of the chain rule (coordinate transformation) twice.
146
# See for instance also the implementation in FLEXI
147
# https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77
148
# or FLUXO:
149
# https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132
150
diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2 + Ja13^2)
151
Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
152
i, j, k, element)
153
diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2 + Ja23^2)
154
Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
155
i, j, k, element)
156
diffusivity3_transformed = diffusivity * (Ja31^2 + Ja32^2 + Ja33^2)
157
158
inv_jacobian = abs(inverse_jacobian[i, j, k, element])
159
160
max_diffusivity1 = Base.max(max_diffusivity1,
161
diffusivity1_transformed * inv_jacobian^2)
162
max_diffusivity2 = Base.max(max_diffusivity2,
163
diffusivity2_transformed * inv_jacobian^2)
164
max_diffusivity3 = Base.max(max_diffusivity3,
165
diffusivity3_transformed * inv_jacobian^2)
166
end
167
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
168
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
169
max_scaled_diffusivity = Base.max(max_scaled_diffusivity,
170
max_diffusivity1 + max_diffusivity2 +
171
max_diffusivity3)
172
end
173
174
return 4 / (nnodes(dg) * max_scaled_diffusivity)
175
end
176
177
@inline function max_scaled_speed_per_element(u,
178
::Type{<:Union{StructuredMesh{3},
179
P4estMesh{3},
180
T8codeMesh{3}}},
181
constant_speed::True, equations, dg::DG,
182
contravariant_vectors, inverse_jacobian,
183
element)
184
max_scaled_speed = zero(eltype(u))
185
max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
186
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
187
Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
188
i, j, k, element)
189
lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 +
190
Ja13 * max_lambda3)
191
Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
192
i, j, k, element)
193
lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 +
194
Ja23 * max_lambda3)
195
Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
196
i, j, k, element)
197
lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 +
198
Ja33 * max_lambda3)
199
200
inv_jacobian = abs(inverse_jacobian[i, j, k, element])
201
202
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
203
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
204
max_scaled_speed = Base.max(max_scaled_speed,
205
inv_jacobian *
206
(lambda1_transformed + lambda2_transformed +
207
lambda3_transformed))
208
end
209
210
return max_scaled_speed
211
end
212
213
function max_dt(u, t,
214
mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh`
215
constant_diffusivity::True, equations,
216
equations_parabolic::AbstractEquationsParabolic,
217
dg::DG, cache)
218
# Avoid division by zero if the diffusivity vanishes everywhere
219
max_scaled_diffusivity = nextfloat(zero(t))
220
221
diffusivity = max_diffusivity(equations_parabolic)
222
223
@unpack contravariant_vectors, inverse_jacobian = cache.elements
224
225
@batch reduction=(max, max_scaled_diffusivity) for element in eachelement(dg, cache)
226
max_diffusivity1 = max_diffusivity2 = max_diffusivity3 = zero(max_scaled_diffusivity)
227
for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
228
# Local diffusivity transformed to the reference element
229
Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
230
i, j, k, element)
231
# The metric terms are squared due to the second derivatives in the parabolic terms,
232
# which lead to application of the chain rule (coordinate transformation) twice.
233
# See for instance also the implementation in FLEXI
234
# https://github.com/flexi-framework/flexi/blob/e980e8635e6605daf906fc176c9897314a9cd9b1/src/equations/navierstokes/calctimestep.f90#L75-L77
235
# or FLUXO:
236
# https://github.com/project-fluxo/fluxo/blob/c7e0cc9b7fd4569dcab67bbb6e5a25c0a84859f1/src/equation/navierstokes/calctimestep.f90#L130-L132
237
diffusivity1_transformed = diffusivity * (Ja11^2 + Ja12^2 + Ja13^2)
238
Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
239
i, j, k, element)
240
diffusivity2_transformed = diffusivity * (Ja21^2 + Ja22^2 + Ja23^2)
241
Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
242
i, j, k, element)
243
diffusivity3_transformed = diffusivity * (Ja31^2 + Ja32^2 + Ja33^2)
244
245
inv_jacobian = abs(inverse_jacobian[i, j, k, element])
246
247
max_diffusivity1 = Base.max(max_diffusivity1,
248
diffusivity1_transformed * inv_jacobian^2)
249
max_diffusivity2 = Base.max(max_diffusivity2,
250
diffusivity2_transformed * inv_jacobian^2)
251
max_diffusivity3 = Base.max(max_diffusivity3,
252
diffusivity3_transformed * inv_jacobian^2)
253
end
254
# Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate
255
# `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323
256
max_scaled_diffusivity = Base.max(max_scaled_diffusivity,
257
max_diffusivity1 + max_diffusivity2 +
258
max_diffusivity3)
259
end
260
261
return 4 / (nnodes(dg) * max_scaled_diffusivity)
262
end
263
264
function max_dt(u, t, mesh::P4estMeshParallel{3},
265
constant_speed::False, equations, dg::DG, cache)
266
# call the method accepting a general `mesh::P4estMesh{3}`
267
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
268
# and create some MPI array type, overloading broadcasting and mapreduce etc.
269
# Then, this specific array type should also work well with DiffEq etc.
270
dt = invoke(max_dt,
271
Tuple{typeof(u), typeof(t), P4estMesh{3},
272
typeof(constant_speed), typeof(equations), typeof(dg),
273
typeof(cache)},
274
u, t, mesh, constant_speed, equations, dg, cache)
275
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
276
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
277
278
return dt
279
end
280
281
function max_dt(u, t, mesh::P4estMeshParallel{3},
282
constant_speed::True, equations, dg::DG, cache)
283
# call the method accepting a general `mesh::P4estMesh{3}`
284
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
285
# and create some MPI array type, overloading broadcasting and mapreduce etc.
286
# Then, this specific array type should also work well with DiffEq etc.
287
dt = invoke(max_dt,
288
Tuple{typeof(u), typeof(t), P4estMesh{3},
289
typeof(constant_speed), typeof(equations), typeof(dg),
290
typeof(cache)},
291
u, t, mesh, constant_speed, equations, dg, cache)
292
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
293
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
294
295
return dt
296
end
297
298
function max_dt(u, t, mesh::T8codeMeshParallel{3},
299
constant_speed::False, equations, dg::DG, cache)
300
# call the method accepting a general `mesh::T8codeMesh{3}`
301
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
302
# and create some MPI array type, overloading broadcasting and mapreduce etc.
303
# Then, this specific array type should also work well with DiffEq etc.
304
dt = invoke(max_dt,
305
Tuple{typeof(u), typeof(t), T8codeMesh{3},
306
typeof(constant_speed), typeof(equations), typeof(dg),
307
typeof(cache)},
308
u, t, mesh, constant_speed, equations, dg, cache)
309
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
310
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
311
312
return dt
313
end
314
315
function max_dt(u, t, mesh::T8codeMeshParallel{3},
316
constant_speed::True, equations, dg::DG, cache)
317
# call the method accepting a general `mesh::T8codeMesh{3}`
318
# TODO: MPI, we should improve this; maybe we should dispatch on `u`
319
# and create some MPI array type, overloading broadcasting and mapreduce etc.
320
# Then, this specific array type should also work well with DiffEq etc.
321
dt = invoke(max_dt,
322
Tuple{typeof(u), typeof(t), T8codeMesh{3},
323
typeof(constant_speed), typeof(equations), typeof(dg),
324
typeof(cache)},
325
u, t, mesh, constant_speed, equations, dg, cache)
326
# Base.min instead of min needed, see comment in src/auxiliary/math.jl
327
dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
328
329
return dt
330
end
331
end # @muladd
332
333