Path: blob/main/src/solvers/dgmulti/flux_differencing_compressible_euler.jl
5590 views
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).1# Since these FMAs can increase the performance of many numerical algorithms,2# we need to opt-in explicitly.3# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.4@muladd begin5#! format: noindent67# TODO: Upstream, LoopVectorization8# At the time of writing, LoopVectorization.jl cannot handle this kind of9# loop optimally when passing our custom functions `cons2entropy` and10# `entropy2cons`. Thus, we need to insert the physics directly here to11# get a significant runtime performance improvement.12function cons2entropy!(entropy_var_values::StructArray,13u_values::StructArray,14equations::CompressibleEulerEquations2D)15# The following is semantically equivalent to16# @threaded for i in eachindex(u_values)17# entropy_var_values[i] = cons2entropy(u_values[i], equations)18# end19# but much more efficient due to explicit optimization via `@turbo` from20# LoopVectorization.jl.21@unpack gamma, inv_gamma_minus_one = equations2223rho_values, rho_v1_values, rho_v2_values, rho_e_total_values = StructArrays.components(u_values)24w1_values, w2_values, w3_values, w4_values = StructArrays.components(entropy_var_values)2526@turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,27rho_e_total_values,28w1_values, w2_values, w3_values, w4_values)29rho = rho_values[i]30rho_v1 = rho_v1_values[i]31rho_v2 = rho_v2_values[i]32rho_e_total = rho_e_total_values[i]3334# The following is basically the same code as in `cons2entropy`35v1 = rho_v1 / rho36v2 = rho_v2 / rho37v_square = v1^2 + v2^238p = (gamma - 1) * (rho_e_total - 0.5f0 * rho * v_square)39s = log(p) - gamma * log(rho)40rho_p = rho / p4142w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square43w2_values[i] = rho_p * v144w3_values[i] = rho_p * v245w4_values[i] = -rho_p46end47end4849function entropy2cons!(entropy_projected_u_values::StructArray,50projected_entropy_var_values::StructArray,51equations::CompressibleEulerEquations2D)52# The following is semantically equivalent to53# @threaded for i in eachindex(projected_entropy_var_values)54# entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)55# end56# but much more efficient due to explicit optimization via `@turbo` from57# LoopVectorization.jl.58@unpack gamma, inv_gamma_minus_one = equations59gamma_minus_one = gamma - 16061rho_values, rho_v1_values, rho_v2_values, rho_e_total_values = StructArrays.components(entropy_projected_u_values)62w1_values, w2_values, w3_values, w4_values = StructArrays.components(projected_entropy_var_values)6364@turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,65rho_e_total_values,66w1_values, w2_values, w3_values, w4_values)6768# The following is basically the same code as in `entropy2cons`69# Convert to entropy `-rho * s` used by70# - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD71# [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)72# instead of `-rho * s / (gamma - 1)`73w1 = gamma_minus_one * w1_values[i]74w2 = gamma_minus_one * w2_values[i]75w3 = gamma_minus_one * w3_values[i]76w4 = gamma_minus_one * w4_values[i]7778# s = specific entropy, eq. (53)79s = gamma - w1 + (w2^2 + w3^2) / (2 * w4)8081# eq. (52)82rho_iota = (gamma_minus_one / (-w4)^gamma)^(inv_gamma_minus_one) *83exp(-s * inv_gamma_minus_one)8485# eq. (51)86rho_values[i] = -rho_iota * w487rho_v1_values[i] = rho_iota * w288rho_v2_values[i] = rho_iota * w389rho_e_total_values[i] = rho_iota * (1 - (w2^2 + w3^2) / (2 * w4))90end91end9293function cons2entropy!(entropy_var_values::StructArray,94u_values::StructArray,95equations::CompressibleEulerEquations3D)96# The following is semantically equivalent to97# @threaded for i in eachindex(u_values)98# entropy_var_values[i] = cons2entropy(u_values[i], equations)99# end100# but much more efficient due to explicit optimization via `@turbo` from101# LoopVectorization.jl.102@unpack gamma, inv_gamma_minus_one = equations103104rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_total_values = StructArrays.components(u_values)105w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(entropy_var_values)106107@turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,108rho_v3_values, rho_e_total_values,109w1_values, w2_values, w3_values, w4_values,110w5_values)111rho = rho_values[i]112rho_v1 = rho_v1_values[i]113rho_v2 = rho_v2_values[i]114rho_v3 = rho_v3_values[i]115rho_e_total = rho_e_total_values[i]116117# The following is basically the same code as in `cons2entropy`118v1 = rho_v1 / rho119v2 = rho_v2 / rho120v3 = rho_v3 / rho121v_square = v1^2 + v2^2 + v3^2122p = (gamma - 1) * (rho_e_total - 0.5f0 * rho * v_square)123s = log(p) - gamma * log(rho)124rho_p = rho / p125126w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square127w2_values[i] = rho_p * v1128w3_values[i] = rho_p * v2129w4_values[i] = rho_p * v3130w5_values[i] = -rho_p131end132end133134function entropy2cons!(entropy_projected_u_values::StructArray,135projected_entropy_var_values::StructArray,136equations::CompressibleEulerEquations3D)137# The following is semantically equivalent to138# @threaded for i in eachindex(projected_entropy_var_values)139# entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)140# end141# but much more efficient due to explicit optimization via `@turbo` from142# LoopVectorization.jl.143@unpack gamma, inv_gamma_minus_one = equations144gamma_minus_one = gamma - 1145146rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_total_values = StructArrays.components(entropy_projected_u_values)147w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(projected_entropy_var_values)148149@turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,150rho_v3_values, rho_e_total_values,151w1_values, w2_values, w3_values, w4_values,152w5_values)153154# The following is basically the same code as in `entropy2cons`155# Convert to entropy `-rho * s` used by156# - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD157# [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)158# instead of `-rho * s / (gamma - 1)`159w1 = gamma_minus_one * w1_values[i]160w2 = gamma_minus_one * w2_values[i]161w3 = gamma_minus_one * w3_values[i]162w4 = gamma_minus_one * w4_values[i]163w5 = gamma_minus_one * w5_values[i]164165# s = specific entropy, eq. (53)166s = gamma - w1 + (w2^2 + w3^2 + w4^2) / (2 * w5)167168# eq. (52)169rho_iota = (gamma_minus_one / (-w5)^gamma)^(inv_gamma_minus_one) *170exp(-s * inv_gamma_minus_one)171172# eq. (51)173rho_values[i] = -rho_iota * w5174rho_v1_values[i] = rho_iota * w2175rho_v2_values[i] = rho_iota * w3176rho_v3_values[i] = rho_iota * w4177rho_e_total_values[i] = rho_iota * (1 - (w2^2 + w3^2 + w4^2) / (2 * w5))178end179end180end # @muladd181182183