Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
trixi-framework
GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/solvers/dgmulti/flux_differencing_compressible_euler.jl
5590 views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7
8
# TODO: Upstream, LoopVectorization
9
# At the time of writing, LoopVectorization.jl cannot handle this kind of
10
# loop optimally when passing our custom functions `cons2entropy` and
11
# `entropy2cons`. Thus, we need to insert the physics directly here to
12
# get a significant runtime performance improvement.
13
function cons2entropy!(entropy_var_values::StructArray,
14
u_values::StructArray,
15
equations::CompressibleEulerEquations2D)
16
# The following is semantically equivalent to
17
# @threaded for i in eachindex(u_values)
18
# entropy_var_values[i] = cons2entropy(u_values[i], equations)
19
# end
20
# but much more efficient due to explicit optimization via `@turbo` from
21
# LoopVectorization.jl.
22
@unpack gamma, inv_gamma_minus_one = equations
23
24
rho_values, rho_v1_values, rho_v2_values, rho_e_total_values = StructArrays.components(u_values)
25
w1_values, w2_values, w3_values, w4_values = StructArrays.components(entropy_var_values)
26
27
@turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
28
rho_e_total_values,
29
w1_values, w2_values, w3_values, w4_values)
30
rho = rho_values[i]
31
rho_v1 = rho_v1_values[i]
32
rho_v2 = rho_v2_values[i]
33
rho_e_total = rho_e_total_values[i]
34
35
# The following is basically the same code as in `cons2entropy`
36
v1 = rho_v1 / rho
37
v2 = rho_v2 / rho
38
v_square = v1^2 + v2^2
39
p = (gamma - 1) * (rho_e_total - 0.5f0 * rho * v_square)
40
s = log(p) - gamma * log(rho)
41
rho_p = rho / p
42
43
w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square
44
w2_values[i] = rho_p * v1
45
w3_values[i] = rho_p * v2
46
w4_values[i] = -rho_p
47
end
48
end
49
50
function entropy2cons!(entropy_projected_u_values::StructArray,
51
projected_entropy_var_values::StructArray,
52
equations::CompressibleEulerEquations2D)
53
# The following is semantically equivalent to
54
# @threaded for i in eachindex(projected_entropy_var_values)
55
# entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
56
# end
57
# but much more efficient due to explicit optimization via `@turbo` from
58
# LoopVectorization.jl.
59
@unpack gamma, inv_gamma_minus_one = equations
60
gamma_minus_one = gamma - 1
61
62
rho_values, rho_v1_values, rho_v2_values, rho_e_total_values = StructArrays.components(entropy_projected_u_values)
63
w1_values, w2_values, w3_values, w4_values = StructArrays.components(projected_entropy_var_values)
64
65
@turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
66
rho_e_total_values,
67
w1_values, w2_values, w3_values, w4_values)
68
69
# The following is basically the same code as in `entropy2cons`
70
# Convert to entropy `-rho * s` used by
71
# - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
72
# [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
73
# instead of `-rho * s / (gamma - 1)`
74
w1 = gamma_minus_one * w1_values[i]
75
w2 = gamma_minus_one * w2_values[i]
76
w3 = gamma_minus_one * w3_values[i]
77
w4 = gamma_minus_one * w4_values[i]
78
79
# s = specific entropy, eq. (53)
80
s = gamma - w1 + (w2^2 + w3^2) / (2 * w4)
81
82
# eq. (52)
83
rho_iota = (gamma_minus_one / (-w4)^gamma)^(inv_gamma_minus_one) *
84
exp(-s * inv_gamma_minus_one)
85
86
# eq. (51)
87
rho_values[i] = -rho_iota * w4
88
rho_v1_values[i] = rho_iota * w2
89
rho_v2_values[i] = rho_iota * w3
90
rho_e_total_values[i] = rho_iota * (1 - (w2^2 + w3^2) / (2 * w4))
91
end
92
end
93
94
function cons2entropy!(entropy_var_values::StructArray,
95
u_values::StructArray,
96
equations::CompressibleEulerEquations3D)
97
# The following is semantically equivalent to
98
# @threaded for i in eachindex(u_values)
99
# entropy_var_values[i] = cons2entropy(u_values[i], equations)
100
# end
101
# but much more efficient due to explicit optimization via `@turbo` from
102
# LoopVectorization.jl.
103
@unpack gamma, inv_gamma_minus_one = equations
104
105
rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_total_values = StructArrays.components(u_values)
106
w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(entropy_var_values)
107
108
@turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
109
rho_v3_values, rho_e_total_values,
110
w1_values, w2_values, w3_values, w4_values,
111
w5_values)
112
rho = rho_values[i]
113
rho_v1 = rho_v1_values[i]
114
rho_v2 = rho_v2_values[i]
115
rho_v3 = rho_v3_values[i]
116
rho_e_total = rho_e_total_values[i]
117
118
# The following is basically the same code as in `cons2entropy`
119
v1 = rho_v1 / rho
120
v2 = rho_v2 / rho
121
v3 = rho_v3 / rho
122
v_square = v1^2 + v2^2 + v3^2
123
p = (gamma - 1) * (rho_e_total - 0.5f0 * rho * v_square)
124
s = log(p) - gamma * log(rho)
125
rho_p = rho / p
126
127
w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square
128
w2_values[i] = rho_p * v1
129
w3_values[i] = rho_p * v2
130
w4_values[i] = rho_p * v3
131
w5_values[i] = -rho_p
132
end
133
end
134
135
function entropy2cons!(entropy_projected_u_values::StructArray,
136
projected_entropy_var_values::StructArray,
137
equations::CompressibleEulerEquations3D)
138
# The following is semantically equivalent to
139
# @threaded for i in eachindex(projected_entropy_var_values)
140
# entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
141
# end
142
# but much more efficient due to explicit optimization via `@turbo` from
143
# LoopVectorization.jl.
144
@unpack gamma, inv_gamma_minus_one = equations
145
gamma_minus_one = gamma - 1
146
147
rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_total_values = StructArrays.components(entropy_projected_u_values)
148
w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(projected_entropy_var_values)
149
150
@turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
151
rho_v3_values, rho_e_total_values,
152
w1_values, w2_values, w3_values, w4_values,
153
w5_values)
154
155
# The following is basically the same code as in `entropy2cons`
156
# Convert to entropy `-rho * s` used by
157
# - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
158
# [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
159
# instead of `-rho * s / (gamma - 1)`
160
w1 = gamma_minus_one * w1_values[i]
161
w2 = gamma_minus_one * w2_values[i]
162
w3 = gamma_minus_one * w3_values[i]
163
w4 = gamma_minus_one * w4_values[i]
164
w5 = gamma_minus_one * w5_values[i]
165
166
# s = specific entropy, eq. (53)
167
s = gamma - w1 + (w2^2 + w3^2 + w4^2) / (2 * w5)
168
169
# eq. (52)
170
rho_iota = (gamma_minus_one / (-w5)^gamma)^(inv_gamma_minus_one) *
171
exp(-s * inv_gamma_minus_one)
172
173
# eq. (51)
174
rho_values[i] = -rho_iota * w5
175
rho_v1_values[i] = rho_iota * w2
176
rho_v2_values[i] = rho_iota * w3
177
rho_v3_values[i] = rho_iota * w4
178
rho_e_total_values[i] = rho_iota * (1 - (w2^2 + w3^2 + w4^2) / (2 * w5))
179
end
180
end
181
end # @muladd
182
183