Path: blob/main/src/solvers/dgsem/interpolation.jl
5591 views
1# Naive implementations of multiply_dimensionwise used to demonstrate the functionality2# without performance optimizations and for testing correctness of the optimized versions3# implemented below.4function multiply_dimensionwise_naive(matrix::AbstractMatrix,5data_in::AbstractArray{<:Any, 2})6size_out = size(matrix, 1)7size_in = size(matrix, 2)8n_vars = size(data_in, 1)9data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out)1011for i in 1:size_out12for ii in 1:size_in13for v in 1:n_vars14data_out[v, i] += matrix[i, ii] * data_in[v, ii]15end16end17end1819return data_out20end2122function multiply_dimensionwise_naive(matrix::AbstractMatrix,23data_in::AbstractArray{<:Any, 3})24size_out = size(matrix, 1)25size_in = size(matrix, 2)26n_vars = size(data_in, 1)27data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,28size_out)2930for j in 1:size_out, i in 1:size_out31for jj in 1:size_in, ii in 1:size_in32for v in 1:n_vars33data_out[v, i, j] += matrix[i, ii] * matrix[j, jj] * data_in[v, ii, jj]34end35end36end3738return data_out39end4041function multiply_dimensionwise_naive(matrix::AbstractMatrix,42data_in::AbstractArray{<:Any, 4})43size_out = size(matrix, 1)44size_in = size(matrix, 2)45n_vars = size(data_in, 1)46data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,47size_out, size_out)4849for k in 1:size_out, j in 1:size_out, i in 1:size_out50for kk in 1:size_in, jj in 1:size_in, ii in 1:size_in51for v in 1:n_vars52data_out[v, i, j, k] += matrix[i, ii] * matrix[j, jj] * matrix[k, kk] *53data_in[v, ii, jj, kk]54end55end56end5758return data_out59end6061"""62multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, NDIMS+1})6364Multiply the array `data_in` by `matrix` in each coordinate direction, where `data_in`65is assumed to have the first coordinate for the number of variables and the remaining coordinates66are multiplied by `matrix`.67"""68function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 2})69# 1D70# optimized version of multiply_dimensionwise_naive71size_out = size(matrix, 1)72n_vars = size(data_in, 1)73data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out)7475multiply_dimensionwise!(data_out, matrix, data_in)7677return data_out78end7980function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 3})81# 2D82# optimized version of multiply_dimensionwise_naive83size_out = size(matrix, 1)84n_vars = size(data_in, 1)85data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,86size_out)8788multiply_dimensionwise!(data_out, matrix, data_in)8990return data_out91end9293function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 4})94# 3D95# optimized version of multiply_dimensionwise_naive96size_out = size(matrix, 1)97n_vars = size(data_in, 1)98data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,99size_out, size_out)100101multiply_dimensionwise!(data_out, matrix, data_in)102103return data_out104end105106# In the following, there are several optimized in-place versions of multiply_dimensionwise.107# These may make use of advanced optimization features such as the macro `@tullio` from Tullio.jl,108# which basically uses an Einstein summation convention syntax.109# Another possibility is `@turbo` from LoopVectorization.jl. The runtime performance could be even110# optimized further by using `@turbo inline=true for` instead of `@turbo for`, but that comes at the111# cost of increased latency, at least on some systems...112113# 1D version114function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix::AbstractMatrix,115data_in::AbstractArray{<:Any, 2})116# @tullio threads=false data_out[v, i] = matrix[i, ii] * data_in[v, ii]117@turbo for i in axes(data_out, 2), v in axes(data_out, 1)118res = zero(eltype(data_out))119for ii in axes(matrix, 2)120res += matrix[i, ii] * data_in[v, ii]121end122data_out[v, i] = res123end124125return nothing126end127128# 1D version for scalars129# Instead of having a leading dimension of size 1 in `data_out, data_in`, this leading dimension130# of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.131function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 1},132matrix::AbstractMatrix,133data_in::AbstractArray{<:Any, 1})134# @tullio threads=false data_out[i] = matrix[i, ii] * data_in[ii]135@turbo for i in axes(data_out, 1)136res = zero(eltype(data_out))137for ii in axes(matrix, 2)138res += matrix[i, ii] * data_in[ii]139end140data_out[i] = res141end142143return nothing144end145146# 1D version, apply matrixJ to data_inJ147function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix1::AbstractMatrix,148data_in1::AbstractArray{<:Any, 2}, matrix2::AbstractMatrix,149data_in2::AbstractArray{<:Any, 2})150# @tullio threads=false data_out[v, i] = matrix1[i, ii] * data_in1[v, ii] + matrix2[i, ii] * data_in2[v, ii]151# TODO: LoopVectorization upgrade152# We would like to use `@turbo` for the outermost loop possibly fuse both inner153# loops, but that does currently not work because of limitations of154# LoopVectorizationjl. However, Chris Elrod is planning to address this in155# the future, cf. https://github.com/JuliaSIMD/LoopVectorization.jl/issues/230#issuecomment-810632972156@turbo for i in axes(data_out, 2), v in axes(data_out, 1)157res = zero(eltype(data_out))158for ii in axes(matrix1, 2)159res += matrix1[i, ii] * data_in1[v, ii]160end161data_out[v, i] = res162end163@turbo for i in axes(data_out, 2), v in axes(data_out, 1)164res = zero(eltype(data_out))165for ii in axes(matrix2, 2)166res += matrix2[i, ii] * data_in2[v, ii]167end168data_out[v, i] += res169end170171return nothing172end173174# 2D version175function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3}, matrix::AbstractMatrix,176data_in::AbstractArray{<:Any, 3},177tmp1 = zeros(eltype(data_out), size(data_out, 1),178size(matrix, 1), size(matrix, 2)))179180# Interpolate in x-direction181# @tullio threads=false tmp1[v, i, j] = matrix[i, ii] * data_in[v, ii, j]182@turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)183res = zero(eltype(tmp1))184for ii in axes(matrix, 2)185res += matrix[i, ii] * data_in[v, ii, j]186end187tmp1[v, i, j] = res188end189190# Interpolate in y-direction191# @tullio threads=false data_out[v, i, j] = matrix[j, jj] * tmp1[v, i, jj]192@turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)193res = zero(eltype(data_out))194for jj in axes(matrix, 2)195res += matrix[j, jj] * tmp1[v, i, jj]196end197data_out[v, i, j] = res198end199200return nothing201end202203# 2D version for scalars204# Instead of having a leading dimension of size 1 in `data_out, data_in`, this leading dimension205# of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.206function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 2},207matrix::AbstractMatrix,208data_in::AbstractArray{<:Any, 2},209tmp1 = zeros(eltype(data_out), size(matrix, 1),210size(matrix, 2)))211212# Interpolate in x-direction213# @tullio threads=false tmp1[i, j] = matrix[i, ii] * data_in[ii, j]214@turbo for j in axes(tmp1, 2), i in axes(tmp1, 1)215res = zero(eltype(tmp1))216for ii in axes(matrix, 2)217res += matrix[i, ii] * data_in[ii, j]218end219tmp1[i, j] = res220end221222# Interpolate in y-direction223# @tullio threads=false data_out[i, j] = matrix[j, jj] * tmp1[i, jj]224@turbo for j in axes(data_out, 2), i in axes(data_out, 1)225res = zero(eltype(data_out))226for jj in axes(matrix, 2)227res += matrix[j, jj] * tmp1[i, jj]228end229data_out[i, j] = res230end231232return nothing233end234235# 2D version, apply matrixJ to dimension J of data_in236function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3},237matrix1::AbstractMatrix, matrix2::AbstractMatrix,238data_in::AbstractArray{<:Any, 3},239tmp1 = zeros(eltype(data_out), size(data_out, 1),240size(matrix1, 1), size(matrix1, 2)))241242# Interpolate in x-direction243# @tullio threads=false tmp1[v, i, j] = matrix1[i, ii] * data_in[v, ii, j]244@turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)245res = zero(eltype(tmp1))246for ii in axes(matrix1, 2)247res += matrix1[i, ii] * data_in[v, ii, j]248end249tmp1[v, i, j] = res250end251252# Interpolate in y-direction253# @tullio threads=false data_out[v, i, j] = matrix2[j, jj] * tmp1[v, i, jj]254@turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)255res = zero(eltype(data_out))256for jj in axes(matrix2, 2)257res += matrix2[j, jj] * tmp1[v, i, jj]258end259data_out[v, i, j] = res260end261262return nothing263end264265# 2D version, apply matrixJ to dimension J of data_in and add the result to data_out266function add_multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3},267matrix1::AbstractMatrix, matrix2::AbstractMatrix,268data_in::AbstractArray{<:Any, 3},269tmp1 = zeros(eltype(data_out), size(data_out, 1),270size(matrix1, 1), size(matrix1, 2)))271272# Interpolate in x-direction273# @tullio threads=false tmp1[v, i, j] = matrix1[i, ii] * data_in[v, ii, j]274@turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)275res = zero(eltype(tmp1))276for ii in axes(matrix1, 2)277res += matrix1[i, ii] * data_in[v, ii, j]278end279tmp1[v, i, j] = res280end281282# Interpolate in y-direction283# @tullio threads=false data_out[v, i, j] += matrix2[j, jj] * tmp1[v, i, jj]284@turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)285res = zero(eltype(data_out))286for jj in axes(matrix2, 2)287res += matrix2[j, jj] * tmp1[v, i, jj]288end289data_out[v, i, j] += res290end291292return nothing293end294295# 3D version296function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4}, matrix::AbstractMatrix,297data_in::AbstractArray{<:Any, 4},298tmp1 = zeros(eltype(data_out), size(data_out, 1),299size(matrix, 1), size(matrix, 2),300size(matrix, 2)),301tmp2 = zeros(eltype(data_out), size(data_out, 1),302size(matrix, 1), size(matrix, 1),303size(matrix, 2)))304305# Interpolate in x-direction306# @tullio threads=false tmp1[v, i, j, k] = matrix[i, ii] * data_in[v, ii, j, k]307@turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),308v in axes(tmp1, 1)309310res = zero(eltype(tmp1))311for ii in axes(matrix, 2)312res += matrix[i, ii] * data_in[v, ii, j, k]313end314tmp1[v, i, j, k] = res315end316317# Interpolate in y-direction318# @tullio threads=false tmp2[v, i, j, k] = matrix[j, jj] * tmp1[v, i, jj, k]319@turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),320v in axes(tmp2, 1)321322res = zero(eltype(tmp2))323for jj in axes(matrix, 2)324res += matrix[j, jj] * tmp1[v, i, jj, k]325end326tmp2[v, i, j, k] = res327end328329# Interpolate in z-direction330# @tullio threads=false data_out[v, i, j, k] = matrix[k, kk] * tmp2[v, i, j, kk]331@turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),332v in axes(data_out, 1)333334res = zero(eltype(data_out))335for kk in axes(matrix, 2)336res += matrix[k, kk] * tmp2[v, i, j, kk]337end338data_out[v, i, j, k] = res339end340341return nothing342end343344# 3D version for scalars345# Instead of having a leading dimension of size 1 in `data_out, data_in`, this leading dimension346# of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.347function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 3},348matrix::AbstractMatrix,349data_in::AbstractArray{<:Any, 3},350tmp1 = zeros(eltype(data_out), size(matrix, 1),351size(matrix, 2), size(matrix, 2)),352tmp2 = zeros(eltype(data_out), size(matrix, 1),353size(matrix, 1), size(matrix, 2)))354355# Interpolate in x-direction356# @tullio threads=false tmp1[i, j, k] = matrix[i, ii] * data_in[ii, j, k]357@turbo for k in axes(tmp1, 3), j in axes(tmp1, 2), i in axes(tmp1, 1)358res = zero(eltype(tmp1))359for ii in axes(matrix, 2)360res += matrix[i, ii] * data_in[ii, j, k]361end362tmp1[i, j, k] = res363end364365# Interpolate in y-direction366# @tullio threads=false tmp2[i, j, k] = matrix[j, jj] * tmp1[i, jj, k]367@turbo for k in axes(tmp2, 3), j in axes(tmp2, 2), i in axes(tmp2, 1)368res = zero(eltype(tmp2))369for jj in axes(matrix, 2)370res += matrix[j, jj] * tmp1[i, jj, k]371end372tmp2[i, j, k] = res373end374375# Interpolate in z-direction376# @tullio threads=false data_out[i, j, k] = matrix[k, kk] * tmp2[i, j, kk]377@turbo for k in axes(data_out, 3), j in axes(data_out, 2), i in axes(data_out, 1)378res = zero(eltype(data_out))379for kk in axes(matrix, 2)380res += matrix[k, kk] * tmp2[i, j, kk]381end382data_out[i, j, k] = res383end384385return nothing386end387388# 3D version, apply matrixJ to dimension J of data_in389function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4},390matrix1::AbstractMatrix, matrix2::AbstractMatrix,391matrix3::AbstractMatrix,392data_in::AbstractArray{<:Any, 4},393tmp1 = zeros(eltype(data_out), size(data_out, 1),394size(matrix1, 1), size(matrix1, 2),395size(matrix1, 2)),396tmp2 = zeros(eltype(data_out), size(data_out, 1),397size(matrix1, 1), size(matrix1, 1),398size(matrix1, 2)))399400# Interpolate in x-direction401# @tullio threads=false tmp1[v, i, j, k] = matrix1[i, ii] * data_in[v, ii, j, k]402@turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),403v in axes(tmp1, 1)404405res = zero(eltype(tmp1))406for ii in axes(matrix1, 2)407res += matrix1[i, ii] * data_in[v, ii, j, k]408end409tmp1[v, i, j, k] = res410end411412# Interpolate in y-direction413# @tullio threads=false tmp2[v, i, j, k] = matrix2[j, jj] * tmp1[v, i, jj, k]414@turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),415v in axes(tmp2, 1)416417res = zero(eltype(tmp1))418for jj in axes(matrix2, 2)419res += matrix2[j, jj] * tmp1[v, i, jj, k]420end421tmp2[v, i, j, k] = res422end423424# Interpolate in z-direction425# @tullio threads=false data_out[v, i, j, k] = matrix3[k, kk] * tmp2[v, i, j, kk]426@turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),427v in axes(data_out, 1)428429res = zero(eltype(data_out))430for kk in axes(matrix3, 2)431res += matrix3[k, kk] * tmp2[v, i, j, kk]432end433data_out[v, i, j, k] = res434end435436return nothing437end438439# 3D version, apply matrixJ to dimension J of data_in and add the result to data_out440function add_multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4},441matrix1::AbstractMatrix, matrix2::AbstractMatrix,442matrix3::AbstractMatrix,443data_in::AbstractArray{<:Any, 4},444tmp1 = zeros(eltype(data_out), size(data_out, 1),445size(matrix1, 1), size(matrix1, 2),446size(matrix1, 2)),447tmp2 = zeros(eltype(data_out), size(data_out, 1),448size(matrix1, 1), size(matrix1, 1),449size(matrix1, 2)))450451# Interpolate in x-direction452# @tullio threads=false tmp1[v, i, j, k] = matrix1[i, ii] * data_in[v, ii, j, k]453@turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),454v in axes(tmp1, 1)455456res = zero(eltype(tmp1))457for ii in axes(matrix1, 2)458res += matrix1[i, ii] * data_in[v, ii, j, k]459end460tmp1[v, i, j, k] = res461end462463# Interpolate in y-direction464# @tullio threads=false tmp2[v, i, j, k] = matrix2[j, jj] * tmp1[v, i, jj, k]465@turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),466v in axes(tmp2, 1)467468res = zero(eltype(tmp1))469for jj in axes(matrix2, 2)470res += matrix2[j, jj] * tmp1[v, i, jj, k]471end472tmp2[v, i, j, k] = res473end474475# Interpolate in z-direction476# @tullio threads=false data_out[v, i, j, k] += matrix3[k, kk] * tmp2[v, i, j, kk]477@turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),478v in axes(data_out, 1)479480res = zero(eltype(data_out))481for kk in axes(matrix3, 2)482res += matrix3[k, kk] * tmp2[v, i, j, kk]483end484data_out[v, i, j, k] += res485end486487return nothing488end489490491