CoCalc -- flux_differencing_compressible

GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/solvers/dgmulti/flux_differencing_compressible_euler.jl
⁵⁵⁹⁰ views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7

8
# TODO: Upstream, LoopVectorization
9
#       At the time of writing, LoopVectorization.jl cannot handle this kind of
10
#       loop optimally when passing our custom functions `cons2entropy` and
11
#       `entropy2cons`. Thus, we need to insert the physics directly here to
12
#       get a significant runtime performance improvement.
13
function cons2entropy!(entropy_var_values::StructArray,
14
                       u_values::StructArray,
15
                       equations::CompressibleEulerEquations2D)
16
    # The following is semantically equivalent to
17
    # @threaded for i in eachindex(u_values)
18
    #   entropy_var_values[i] = cons2entropy(u_values[i], equations)
19
    # end
20
    # but much more efficient due to explicit optimization via `@turbo` from
21
    # LoopVectorization.jl.
22
    @unpack gamma, inv_gamma_minus_one = equations
23

24
    rho_values, rho_v1_values, rho_v2_values, rho_e_total_values = StructArrays.components(u_values)
25
    w1_values, w2_values, w3_values, w4_values = StructArrays.components(entropy_var_values)
26

27
    @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
28
                                          rho_e_total_values,
29
                                          w1_values, w2_values, w3_values, w4_values)
30
        rho = rho_values[i]
31
        rho_v1 = rho_v1_values[i]
32
        rho_v2 = rho_v2_values[i]
33
        rho_e_total = rho_e_total_values[i]
34

35
        # The following is basically the same code as in `cons2entropy`
36
        v1 = rho_v1 / rho
37
        v2 = rho_v2 / rho
38
        v_square = v1^2 + v2^2
39
        p = (gamma - 1) * (rho_e_total - 0.5f0 * rho * v_square)
40
        s = log(p) - gamma * log(rho)
41
        rho_p = rho / p
42

43
        w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square
44
        w2_values[i] = rho_p * v1
45
        w3_values[i] = rho_p * v2
46
        w4_values[i] = -rho_p
47
    end
48
end
49

50
function entropy2cons!(entropy_projected_u_values::StructArray,
51
                       projected_entropy_var_values::StructArray,
52
                       equations::CompressibleEulerEquations2D)
53
    # The following is semantically equivalent to
54
    # @threaded for i in eachindex(projected_entropy_var_values)
55
    #   entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
56
    # end
57
    # but much more efficient due to explicit optimization via `@turbo` from
58
    # LoopVectorization.jl.
59
    @unpack gamma, inv_gamma_minus_one = equations
60
    gamma_minus_one = gamma - 1
61

62
    rho_values, rho_v1_values, rho_v2_values, rho_e_total_values = StructArrays.components(entropy_projected_u_values)
63
    w1_values, w2_values, w3_values, w4_values = StructArrays.components(projected_entropy_var_values)
64

65
    @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
66
                                          rho_e_total_values,
67
                                          w1_values, w2_values, w3_values, w4_values)
68

69
        # The following is basically the same code as in `entropy2cons`
70
        # Convert to entropy `-rho * s` used by
71
        # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
72
        #   [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
73
        # instead of `-rho * s / (gamma - 1)`
74
        w1 = gamma_minus_one * w1_values[i]
75
        w2 = gamma_minus_one * w2_values[i]
76
        w3 = gamma_minus_one * w3_values[i]
77
        w4 = gamma_minus_one * w4_values[i]
78

79
        # s = specific entropy, eq. (53)
80
        s = gamma - w1 + (w2^2 + w3^2) / (2 * w4)
81

82
        # eq. (52)
83
        rho_iota = (gamma_minus_one / (-w4)^gamma)^(inv_gamma_minus_one) *
84
                   exp(-s * inv_gamma_minus_one)
85

86
        # eq. (51)
87
        rho_values[i] = -rho_iota * w4
88
        rho_v1_values[i] = rho_iota * w2
89
        rho_v2_values[i] = rho_iota * w3
90
        rho_e_total_values[i] = rho_iota * (1 - (w2^2 + w3^2) / (2 * w4))
91
    end
92
end
93

94
function cons2entropy!(entropy_var_values::StructArray,
95
                       u_values::StructArray,
96
                       equations::CompressibleEulerEquations3D)
97
    # The following is semantically equivalent to
98
    # @threaded for i in eachindex(u_values)
99
    #   entropy_var_values[i] = cons2entropy(u_values[i], equations)
100
    # end
101
    # but much more efficient due to explicit optimization via `@turbo` from
102
    # LoopVectorization.jl.
103
    @unpack gamma, inv_gamma_minus_one = equations
104

105
    rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_total_values = StructArrays.components(u_values)
106
    w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(entropy_var_values)
107

108
    @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
109
                                          rho_v3_values, rho_e_total_values,
110
                                          w1_values, w2_values, w3_values, w4_values,
111
                                          w5_values)
112
        rho = rho_values[i]
113
        rho_v1 = rho_v1_values[i]
114
        rho_v2 = rho_v2_values[i]
115
        rho_v3 = rho_v3_values[i]
116
        rho_e_total = rho_e_total_values[i]
117

118
        # The following is basically the same code as in `cons2entropy`
119
        v1 = rho_v1 / rho
120
        v2 = rho_v2 / rho
121
        v3 = rho_v3 / rho
122
        v_square = v1^2 + v2^2 + v3^2
123
        p = (gamma - 1) * (rho_e_total - 0.5f0 * rho * v_square)
124
        s = log(p) - gamma * log(rho)
125
        rho_p = rho / p
126

127
        w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square
128
        w2_values[i] = rho_p * v1
129
        w3_values[i] = rho_p * v2
130
        w4_values[i] = rho_p * v3
131
        w5_values[i] = -rho_p
132
    end
133
end
134

135
function entropy2cons!(entropy_projected_u_values::StructArray,
136
                       projected_entropy_var_values::StructArray,
137
                       equations::CompressibleEulerEquations3D)
138
    # The following is semantically equivalent to
139
    # @threaded for i in eachindex(projected_entropy_var_values)
140
    #   entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
141
    # end
142
    # but much more efficient due to explicit optimization via `@turbo` from
143
    # LoopVectorization.jl.
144
    @unpack gamma, inv_gamma_minus_one = equations
145
    gamma_minus_one = gamma - 1
146

147
    rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_total_values = StructArrays.components(entropy_projected_u_values)
148
    w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(projected_entropy_var_values)
149

150
    @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
151
                                          rho_v3_values, rho_e_total_values,
152
                                          w1_values, w2_values, w3_values, w4_values,
153
                                          w5_values)
154

155
        # The following is basically the same code as in `entropy2cons`
156
        # Convert to entropy `-rho * s` used by
157
        # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
158
        #   [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
159
        # instead of `-rho * s / (gamma - 1)`
160
        w1 = gamma_minus_one * w1_values[i]
161
        w2 = gamma_minus_one * w2_values[i]
162
        w3 = gamma_minus_one * w3_values[i]
163
        w4 = gamma_minus_one * w4_values[i]
164
        w5 = gamma_minus_one * w5_values[i]
165

166
        # s = specific entropy, eq. (53)
167
        s = gamma - w1 + (w2^2 + w3^2 + w4^2) / (2 * w5)
168

169
        # eq. (52)
170
        rho_iota = (gamma_minus_one / (-w5)^gamma)^(inv_gamma_minus_one) *
171
                   exp(-s * inv_gamma_minus_one)
172

173
        # eq. (51)
174
        rho_values[i] = -rho_iota * w5
175
        rho_v1_values[i] = rho_iota * w2
176
        rho_v2_values[i] = rho_iota * w3
177
        rho_v3_values[i] = rho_iota * w4
178
        rho_e_total_values[i] = rho_iota * (1 - (w2^2 + w3^2 + w4^2) / (2 * w5))
179
    end
180
end
181
end # @muladd
182

183
Product

Resources

Company