Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
trixi-framework
GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/solvers/dgsem/interpolation.jl
5591 views
1
2
# Naive implementations of multiply_dimensionwise used to demonstrate the functionality
3
# without performance optimizations and for testing correctness of the optimized versions
4
# implemented below.
5
function multiply_dimensionwise_naive(matrix::AbstractMatrix,
6
data_in::AbstractArray{<:Any, 2})
7
size_out = size(matrix, 1)
8
size_in = size(matrix, 2)
9
n_vars = size(data_in, 1)
10
data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out)
11
12
for i in 1:size_out
13
for ii in 1:size_in
14
for v in 1:n_vars
15
data_out[v, i] += matrix[i, ii] * data_in[v, ii]
16
end
17
end
18
end
19
20
return data_out
21
end
22
23
function multiply_dimensionwise_naive(matrix::AbstractMatrix,
24
data_in::AbstractArray{<:Any, 3})
25
size_out = size(matrix, 1)
26
size_in = size(matrix, 2)
27
n_vars = size(data_in, 1)
28
data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,
29
size_out)
30
31
for j in 1:size_out, i in 1:size_out
32
for jj in 1:size_in, ii in 1:size_in
33
for v in 1:n_vars
34
data_out[v, i, j] += matrix[i, ii] * matrix[j, jj] * data_in[v, ii, jj]
35
end
36
end
37
end
38
39
return data_out
40
end
41
42
function multiply_dimensionwise_naive(matrix::AbstractMatrix,
43
data_in::AbstractArray{<:Any, 4})
44
size_out = size(matrix, 1)
45
size_in = size(matrix, 2)
46
n_vars = size(data_in, 1)
47
data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,
48
size_out, size_out)
49
50
for k in 1:size_out, j in 1:size_out, i in 1:size_out
51
for kk in 1:size_in, jj in 1:size_in, ii in 1:size_in
52
for v in 1:n_vars
53
data_out[v, i, j, k] += matrix[i, ii] * matrix[j, jj] * matrix[k, kk] *
54
data_in[v, ii, jj, kk]
55
end
56
end
57
end
58
59
return data_out
60
end
61
62
"""
63
multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, NDIMS+1})
64
65
Multiply the array `data_in` by `matrix` in each coordinate direction, where `data_in`
66
is assumed to have the first coordinate for the number of variables and the remaining coordinates
67
are multiplied by `matrix`.
68
"""
69
function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 2})
70
# 1D
71
# optimized version of multiply_dimensionwise_naive
72
size_out = size(matrix, 1)
73
n_vars = size(data_in, 1)
74
data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out)
75
76
multiply_dimensionwise!(data_out, matrix, data_in)
77
78
return data_out
79
end
80
81
function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 3})
82
# 2D
83
# optimized version of multiply_dimensionwise_naive
84
size_out = size(matrix, 1)
85
n_vars = size(data_in, 1)
86
data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,
87
size_out)
88
89
multiply_dimensionwise!(data_out, matrix, data_in)
90
91
return data_out
92
end
93
94
function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 4})
95
# 3D
96
# optimized version of multiply_dimensionwise_naive
97
size_out = size(matrix, 1)
98
n_vars = size(data_in, 1)
99
data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,
100
size_out, size_out)
101
102
multiply_dimensionwise!(data_out, matrix, data_in)
103
104
return data_out
105
end
106
107
# In the following, there are several optimized in-place versions of multiply_dimensionwise.
108
# These may make use of advanced optimization features such as the macro `@tullio` from Tullio.jl,
109
# which basically uses an Einstein summation convention syntax.
110
# Another possibility is `@turbo` from LoopVectorization.jl. The runtime performance could be even
111
# optimized further by using `@turbo inline=true for` instead of `@turbo for`, but that comes at the
112
# cost of increased latency, at least on some systems...
113
114
# 1D version
115
function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix::AbstractMatrix,
116
data_in::AbstractArray{<:Any, 2})
117
# @tullio threads=false data_out[v, i] = matrix[i, ii] * data_in[v, ii]
118
@turbo for i in axes(data_out, 2), v in axes(data_out, 1)
119
res = zero(eltype(data_out))
120
for ii in axes(matrix, 2)
121
res += matrix[i, ii] * data_in[v, ii]
122
end
123
data_out[v, i] = res
124
end
125
126
return nothing
127
end
128
129
# 1D version for scalars
130
# Instead of having a leading dimension of size 1 in `data_out, data_in`, this leading dimension
131
# of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.
132
function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 1},
133
matrix::AbstractMatrix,
134
data_in::AbstractArray{<:Any, 1})
135
# @tullio threads=false data_out[i] = matrix[i, ii] * data_in[ii]
136
@turbo for i in axes(data_out, 1)
137
res = zero(eltype(data_out))
138
for ii in axes(matrix, 2)
139
res += matrix[i, ii] * data_in[ii]
140
end
141
data_out[i] = res
142
end
143
144
return nothing
145
end
146
147
# 1D version, apply matrixJ to data_inJ
148
function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix1::AbstractMatrix,
149
data_in1::AbstractArray{<:Any, 2}, matrix2::AbstractMatrix,
150
data_in2::AbstractArray{<:Any, 2})
151
# @tullio threads=false data_out[v, i] = matrix1[i, ii] * data_in1[v, ii] + matrix2[i, ii] * data_in2[v, ii]
152
# TODO: LoopVectorization upgrade
153
# We would like to use `@turbo` for the outermost loop possibly fuse both inner
154
# loops, but that does currently not work because of limitations of
155
# LoopVectorizationjl. However, Chris Elrod is planning to address this in
156
# the future, cf. https://github.com/JuliaSIMD/LoopVectorization.jl/issues/230#issuecomment-810632972
157
@turbo for i in axes(data_out, 2), v in axes(data_out, 1)
158
res = zero(eltype(data_out))
159
for ii in axes(matrix1, 2)
160
res += matrix1[i, ii] * data_in1[v, ii]
161
end
162
data_out[v, i] = res
163
end
164
@turbo for i in axes(data_out, 2), v in axes(data_out, 1)
165
res = zero(eltype(data_out))
166
for ii in axes(matrix2, 2)
167
res += matrix2[i, ii] * data_in2[v, ii]
168
end
169
data_out[v, i] += res
170
end
171
172
return nothing
173
end
174
175
# 2D version
176
function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3}, matrix::AbstractMatrix,
177
data_in::AbstractArray{<:Any, 3},
178
tmp1 = zeros(eltype(data_out), size(data_out, 1),
179
size(matrix, 1), size(matrix, 2)))
180
181
# Interpolate in x-direction
182
# @tullio threads=false tmp1[v, i, j] = matrix[i, ii] * data_in[v, ii, j]
183
@turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
184
res = zero(eltype(tmp1))
185
for ii in axes(matrix, 2)
186
res += matrix[i, ii] * data_in[v, ii, j]
187
end
188
tmp1[v, i, j] = res
189
end
190
191
# Interpolate in y-direction
192
# @tullio threads=false data_out[v, i, j] = matrix[j, jj] * tmp1[v, i, jj]
193
@turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
194
res = zero(eltype(data_out))
195
for jj in axes(matrix, 2)
196
res += matrix[j, jj] * tmp1[v, i, jj]
197
end
198
data_out[v, i, j] = res
199
end
200
201
return nothing
202
end
203
204
# 2D version for scalars
205
# Instead of having a leading dimension of size 1 in `data_out, data_in`, this leading dimension
206
# of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.
207
function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 2},
208
matrix::AbstractMatrix,
209
data_in::AbstractArray{<:Any, 2},
210
tmp1 = zeros(eltype(data_out), size(matrix, 1),
211
size(matrix, 2)))
212
213
# Interpolate in x-direction
214
# @tullio threads=false tmp1[i, j] = matrix[i, ii] * data_in[ii, j]
215
@turbo for j in axes(tmp1, 2), i in axes(tmp1, 1)
216
res = zero(eltype(tmp1))
217
for ii in axes(matrix, 2)
218
res += matrix[i, ii] * data_in[ii, j]
219
end
220
tmp1[i, j] = res
221
end
222
223
# Interpolate in y-direction
224
# @tullio threads=false data_out[i, j] = matrix[j, jj] * tmp1[i, jj]
225
@turbo for j in axes(data_out, 2), i in axes(data_out, 1)
226
res = zero(eltype(data_out))
227
for jj in axes(matrix, 2)
228
res += matrix[j, jj] * tmp1[i, jj]
229
end
230
data_out[i, j] = res
231
end
232
233
return nothing
234
end
235
236
# 2D version, apply matrixJ to dimension J of data_in
237
function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3},
238
matrix1::AbstractMatrix, matrix2::AbstractMatrix,
239
data_in::AbstractArray{<:Any, 3},
240
tmp1 = zeros(eltype(data_out), size(data_out, 1),
241
size(matrix1, 1), size(matrix1, 2)))
242
243
# Interpolate in x-direction
244
# @tullio threads=false tmp1[v, i, j] = matrix1[i, ii] * data_in[v, ii, j]
245
@turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
246
res = zero(eltype(tmp1))
247
for ii in axes(matrix1, 2)
248
res += matrix1[i, ii] * data_in[v, ii, j]
249
end
250
tmp1[v, i, j] = res
251
end
252
253
# Interpolate in y-direction
254
# @tullio threads=false data_out[v, i, j] = matrix2[j, jj] * tmp1[v, i, jj]
255
@turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
256
res = zero(eltype(data_out))
257
for jj in axes(matrix2, 2)
258
res += matrix2[j, jj] * tmp1[v, i, jj]
259
end
260
data_out[v, i, j] = res
261
end
262
263
return nothing
264
end
265
266
# 2D version, apply matrixJ to dimension J of data_in and add the result to data_out
267
function add_multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3},
268
matrix1::AbstractMatrix, matrix2::AbstractMatrix,
269
data_in::AbstractArray{<:Any, 3},
270
tmp1 = zeros(eltype(data_out), size(data_out, 1),
271
size(matrix1, 1), size(matrix1, 2)))
272
273
# Interpolate in x-direction
274
# @tullio threads=false tmp1[v, i, j] = matrix1[i, ii] * data_in[v, ii, j]
275
@turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
276
res = zero(eltype(tmp1))
277
for ii in axes(matrix1, 2)
278
res += matrix1[i, ii] * data_in[v, ii, j]
279
end
280
tmp1[v, i, j] = res
281
end
282
283
# Interpolate in y-direction
284
# @tullio threads=false data_out[v, i, j] += matrix2[j, jj] * tmp1[v, i, jj]
285
@turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
286
res = zero(eltype(data_out))
287
for jj in axes(matrix2, 2)
288
res += matrix2[j, jj] * tmp1[v, i, jj]
289
end
290
data_out[v, i, j] += res
291
end
292
293
return nothing
294
end
295
296
# 3D version
297
function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4}, matrix::AbstractMatrix,
298
data_in::AbstractArray{<:Any, 4},
299
tmp1 = zeros(eltype(data_out), size(data_out, 1),
300
size(matrix, 1), size(matrix, 2),
301
size(matrix, 2)),
302
tmp2 = zeros(eltype(data_out), size(data_out, 1),
303
size(matrix, 1), size(matrix, 1),
304
size(matrix, 2)))
305
306
# Interpolate in x-direction
307
# @tullio threads=false tmp1[v, i, j, k] = matrix[i, ii] * data_in[v, ii, j, k]
308
@turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),
309
v in axes(tmp1, 1)
310
311
res = zero(eltype(tmp1))
312
for ii in axes(matrix, 2)
313
res += matrix[i, ii] * data_in[v, ii, j, k]
314
end
315
tmp1[v, i, j, k] = res
316
end
317
318
# Interpolate in y-direction
319
# @tullio threads=false tmp2[v, i, j, k] = matrix[j, jj] * tmp1[v, i, jj, k]
320
@turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),
321
v in axes(tmp2, 1)
322
323
res = zero(eltype(tmp2))
324
for jj in axes(matrix, 2)
325
res += matrix[j, jj] * tmp1[v, i, jj, k]
326
end
327
tmp2[v, i, j, k] = res
328
end
329
330
# Interpolate in z-direction
331
# @tullio threads=false data_out[v, i, j, k] = matrix[k, kk] * tmp2[v, i, j, kk]
332
@turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),
333
v in axes(data_out, 1)
334
335
res = zero(eltype(data_out))
336
for kk in axes(matrix, 2)
337
res += matrix[k, kk] * tmp2[v, i, j, kk]
338
end
339
data_out[v, i, j, k] = res
340
end
341
342
return nothing
343
end
344
345
# 3D version for scalars
346
# Instead of having a leading dimension of size 1 in `data_out, data_in`, this leading dimension
347
# of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.
348
function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 3},
349
matrix::AbstractMatrix,
350
data_in::AbstractArray{<:Any, 3},
351
tmp1 = zeros(eltype(data_out), size(matrix, 1),
352
size(matrix, 2), size(matrix, 2)),
353
tmp2 = zeros(eltype(data_out), size(matrix, 1),
354
size(matrix, 1), size(matrix, 2)))
355
356
# Interpolate in x-direction
357
# @tullio threads=false tmp1[i, j, k] = matrix[i, ii] * data_in[ii, j, k]
358
@turbo for k in axes(tmp1, 3), j in axes(tmp1, 2), i in axes(tmp1, 1)
359
res = zero(eltype(tmp1))
360
for ii in axes(matrix, 2)
361
res += matrix[i, ii] * data_in[ii, j, k]
362
end
363
tmp1[i, j, k] = res
364
end
365
366
# Interpolate in y-direction
367
# @tullio threads=false tmp2[i, j, k] = matrix[j, jj] * tmp1[i, jj, k]
368
@turbo for k in axes(tmp2, 3), j in axes(tmp2, 2), i in axes(tmp2, 1)
369
res = zero(eltype(tmp2))
370
for jj in axes(matrix, 2)
371
res += matrix[j, jj] * tmp1[i, jj, k]
372
end
373
tmp2[i, j, k] = res
374
end
375
376
# Interpolate in z-direction
377
# @tullio threads=false data_out[i, j, k] = matrix[k, kk] * tmp2[i, j, kk]
378
@turbo for k in axes(data_out, 3), j in axes(data_out, 2), i in axes(data_out, 1)
379
res = zero(eltype(data_out))
380
for kk in axes(matrix, 2)
381
res += matrix[k, kk] * tmp2[i, j, kk]
382
end
383
data_out[i, j, k] = res
384
end
385
386
return nothing
387
end
388
389
# 3D version, apply matrixJ to dimension J of data_in
390
function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4},
391
matrix1::AbstractMatrix, matrix2::AbstractMatrix,
392
matrix3::AbstractMatrix,
393
data_in::AbstractArray{<:Any, 4},
394
tmp1 = zeros(eltype(data_out), size(data_out, 1),
395
size(matrix1, 1), size(matrix1, 2),
396
size(matrix1, 2)),
397
tmp2 = zeros(eltype(data_out), size(data_out, 1),
398
size(matrix1, 1), size(matrix1, 1),
399
size(matrix1, 2)))
400
401
# Interpolate in x-direction
402
# @tullio threads=false tmp1[v, i, j, k] = matrix1[i, ii] * data_in[v, ii, j, k]
403
@turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),
404
v in axes(tmp1, 1)
405
406
res = zero(eltype(tmp1))
407
for ii in axes(matrix1, 2)
408
res += matrix1[i, ii] * data_in[v, ii, j, k]
409
end
410
tmp1[v, i, j, k] = res
411
end
412
413
# Interpolate in y-direction
414
# @tullio threads=false tmp2[v, i, j, k] = matrix2[j, jj] * tmp1[v, i, jj, k]
415
@turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),
416
v in axes(tmp2, 1)
417
418
res = zero(eltype(tmp1))
419
for jj in axes(matrix2, 2)
420
res += matrix2[j, jj] * tmp1[v, i, jj, k]
421
end
422
tmp2[v, i, j, k] = res
423
end
424
425
# Interpolate in z-direction
426
# @tullio threads=false data_out[v, i, j, k] = matrix3[k, kk] * tmp2[v, i, j, kk]
427
@turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),
428
v in axes(data_out, 1)
429
430
res = zero(eltype(data_out))
431
for kk in axes(matrix3, 2)
432
res += matrix3[k, kk] * tmp2[v, i, j, kk]
433
end
434
data_out[v, i, j, k] = res
435
end
436
437
return nothing
438
end
439
440
# 3D version, apply matrixJ to dimension J of data_in and add the result to data_out
441
function add_multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4},
442
matrix1::AbstractMatrix, matrix2::AbstractMatrix,
443
matrix3::AbstractMatrix,
444
data_in::AbstractArray{<:Any, 4},
445
tmp1 = zeros(eltype(data_out), size(data_out, 1),
446
size(matrix1, 1), size(matrix1, 2),
447
size(matrix1, 2)),
448
tmp2 = zeros(eltype(data_out), size(data_out, 1),
449
size(matrix1, 1), size(matrix1, 1),
450
size(matrix1, 2)))
451
452
# Interpolate in x-direction
453
# @tullio threads=false tmp1[v, i, j, k] = matrix1[i, ii] * data_in[v, ii, j, k]
454
@turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),
455
v in axes(tmp1, 1)
456
457
res = zero(eltype(tmp1))
458
for ii in axes(matrix1, 2)
459
res += matrix1[i, ii] * data_in[v, ii, j, k]
460
end
461
tmp1[v, i, j, k] = res
462
end
463
464
# Interpolate in y-direction
465
# @tullio threads=false tmp2[v, i, j, k] = matrix2[j, jj] * tmp1[v, i, jj, k]
466
@turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),
467
v in axes(tmp2, 1)
468
469
res = zero(eltype(tmp1))
470
for jj in axes(matrix2, 2)
471
res += matrix2[j, jj] * tmp1[v, i, jj, k]
472
end
473
tmp2[v, i, j, k] = res
474
end
475
476
# Interpolate in z-direction
477
# @tullio threads=false data_out[v, i, j, k] += matrix3[k, kk] * tmp2[v, i, j, kk]
478
@turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),
479
v in axes(data_out, 1)
480
481
res = zero(eltype(data_out))
482
for kk in axes(matrix3, 2)
483
res += matrix3[k, kk] * tmp2[v, i, j, kk]
484
end
485
data_out[v, i, j, k] += res
486
end
487
488
return nothing
489
end
490
491