Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
trixi-framework
GitHub Repository: trixi-framework/Trixi.jl
Path: blob/main/src/solvers/dgsem_p4est/dg_3d_parallel.jl
5616 views
1
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
2
# Since these FMAs can increase the performance of many numerical algorithms,
3
# we need to opt-in explicitly.
4
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
5
@muladd begin
6
#! format: noindent
7
8
function rhs!(du, u, t,
9
mesh::Union{P4estMeshParallel{3}, T8codeMeshParallel{3}}, equations,
10
boundary_conditions, source_terms::Source,
11
dg::DG, cache) where {Source}
12
backend = trixi_backend(u)
13
14
# Start to receive MPI data
15
@trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache)
16
17
# Prolong solution to MPI interfaces
18
@trixi_timeit timer() "prolong2mpiinterfaces" begin
19
prolong2mpiinterfaces!(cache, u, mesh, equations, dg.surface_integral, dg)
20
end
21
22
# Prolong solution to MPI mortars
23
@trixi_timeit timer() "prolong2mpimortars" begin
24
prolong2mpimortars!(cache, u, mesh, equations,
25
dg.mortar, dg)
26
end
27
28
# Start to send MPI data
29
@trixi_timeit timer() "start MPI send" begin
30
start_mpi_send!(cache.mpi_cache, mesh, equations, dg, cache)
31
end
32
33
# Reset du
34
@trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache)
35
36
# Calculate volume integral
37
@trixi_timeit timer() "volume integral" begin
38
calc_volume_integral!(backend, du, u, mesh,
39
have_nonconservative_terms(equations), equations,
40
dg.volume_integral, dg, cache)
41
end
42
43
# Prolong solution to interfaces
44
@trixi_timeit timer() "prolong2interfaces" begin
45
prolong2interfaces!(backend, cache, u, mesh, equations, dg)
46
end
47
48
# Calculate interface fluxes
49
@trixi_timeit timer() "interface flux" begin
50
calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh,
51
have_nonconservative_terms(equations), equations,
52
dg.surface_integral, dg, cache)
53
end
54
55
# Prolong solution to boundaries
56
@trixi_timeit timer() "prolong2boundaries" begin
57
prolong2boundaries!(cache, u, mesh, equations, dg)
58
end
59
60
# Calculate boundary fluxes
61
@trixi_timeit timer() "boundary flux" begin
62
calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
63
dg.surface_integral, dg)
64
end
65
66
# Prolong solution to mortars
67
@trixi_timeit timer() "prolong2mortars" begin
68
prolong2mortars!(cache, u, mesh, equations,
69
dg.mortar, dg)
70
end
71
72
# Calculate mortar fluxes
73
@trixi_timeit timer() "mortar flux" begin
74
calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
75
have_nonconservative_terms(equations), equations,
76
dg.mortar, dg.surface_integral, dg, cache)
77
end
78
79
# Finish to receive MPI data
80
@trixi_timeit timer() "finish MPI receive" begin
81
finish_mpi_receive!(cache.mpi_cache, mesh, equations, dg, cache)
82
end
83
84
# Calculate MPI interface fluxes
85
@trixi_timeit timer() "MPI interface flux" begin
86
calc_mpi_interface_flux!(cache.elements.surface_flux_values, mesh,
87
have_nonconservative_terms(equations), equations,
88
dg.surface_integral, dg, cache)
89
end
90
91
# Calculate MPI mortar fluxes
92
@trixi_timeit timer() "MPI mortar flux" begin
93
calc_mpi_mortar_flux!(cache.elements.surface_flux_values, mesh,
94
have_nonconservative_terms(equations), equations,
95
dg.mortar, dg.surface_integral, dg, cache)
96
end
97
98
# Calculate surface integrals
99
@trixi_timeit timer() "surface integral" begin
100
calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg,
101
cache)
102
end
103
104
# Apply Jacobian from mapping to reference element
105
@trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg,
106
cache)
107
108
# Calculate source terms
109
@trixi_timeit timer() "source terms" begin
110
calc_sources!(du, u, t, source_terms, equations, dg, cache)
111
end
112
113
# Finish to send MPI data
114
@trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache)
115
116
return nothing
117
end
118
119
function prolong2mpiinterfaces!(cache, u,
120
mesh::Union{P4estMeshParallel{3},
121
T8codeMeshParallel{3}},
122
equations, surface_integral, dg::DG)
123
@unpack mpi_interfaces = cache
124
index_range = eachnode(dg)
125
126
@threaded for interface in eachmpiinterface(dg, cache)
127
# Copy solution data from the local element using "delayed indexing" with
128
# a start value and a step size to get the correct face and orientation.
129
# Note that in the current implementation, the interface will be
130
# "aligned at the primary element", i.e., the index of the primary side
131
# will always run forwards.
132
local_side = mpi_interfaces.local_sides[interface]
133
local_element = mpi_interfaces.local_neighbor_ids[interface]
134
local_indices = mpi_interfaces.node_indices[interface]
135
136
i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1],
137
index_range)
138
j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2],
139
index_range)
140
k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3],
141
index_range)
142
143
i_element = i_element_start
144
j_element = j_element_start
145
k_element = k_element_start
146
for j in eachnode(dg)
147
for i in eachnode(dg)
148
for v in eachvariable(equations)
149
mpi_interfaces.u[local_side, v, i, j, interface] = u[v, i_element,
150
j_element,
151
k_element,
152
local_element]
153
end
154
i_element += i_element_step_i
155
j_element += j_element_step_i
156
k_element += k_element_step_i
157
end
158
i_element += i_element_step_j
159
j_element += j_element_step_j
160
k_element += k_element_step_j
161
end
162
end
163
164
return nothing
165
end
166
167
function calc_mpi_interface_flux!(surface_flux_values,
168
mesh::Union{P4estMeshParallel{3},
169
T8codeMeshParallel{3}},
170
have_nonconservative_terms,
171
equations, surface_integral, dg::DG, cache)
172
@unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces
173
@unpack contravariant_vectors = cache.elements
174
index_range = eachnode(dg)
175
176
@threaded for interface in eachmpiinterface(dg, cache)
177
# Get element and side index information on the local element
178
local_element = local_neighbor_ids[interface]
179
local_indices = node_indices[interface]
180
local_direction = indices2direction(local_indices)
181
local_side = local_sides[interface]
182
183
# Create the local i,j,k indexing on the local element used to pull normal direction information
184
i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1],
185
index_range)
186
j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2],
187
index_range)
188
k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3],
189
index_range)
190
191
i_element = i_element_start
192
j_element = j_element_start
193
k_element = k_element_start
194
195
# Initiate the node indices to be used in the surface for loop,
196
# the surface flux storage must be indexed in alignment with the local element indexing
197
local_surface_indices = surface_indices(local_indices)
198
i_surface_start, i_surface_step_i, i_surface_step_j = index_to_start_step_3d(local_surface_indices[1],
199
index_range)
200
j_surface_start, j_surface_step_i, j_surface_step_j = index_to_start_step_3d(local_surface_indices[2],
201
index_range)
202
i_surface = i_surface_start
203
j_surface = j_surface_start
204
205
for j in eachnode(dg)
206
for i in eachnode(dg)
207
# Get the normal direction on the local element
208
# Contravariant vectors at interfaces in negative coordinate direction
209
# are pointing inwards. This is handled by `get_normal_direction`.
210
normal_direction = get_normal_direction(local_direction,
211
contravariant_vectors,
212
i_element, j_element, k_element,
213
local_element)
214
215
calc_mpi_interface_flux!(surface_flux_values, mesh,
216
have_nonconservative_terms, equations,
217
surface_integral, dg, cache,
218
interface, normal_direction,
219
i, j, local_side,
220
i_surface, j_surface, local_direction,
221
local_element)
222
223
# Increment local element indices to pull the normal direction
224
i_element += i_element_step_i
225
j_element += j_element_step_i
226
k_element += k_element_step_i
227
# Increment the surface node indices along the local element
228
i_surface += i_surface_step_i
229
j_surface += j_surface_step_i
230
end
231
# Increment local element indices to pull the normal direction
232
i_element += i_element_step_j
233
j_element += j_element_step_j
234
k_element += k_element_step_j
235
# Increment the surface node indices along the local element
236
i_surface += i_surface_step_j
237
j_surface += j_surface_step_j
238
end
239
end
240
241
return nothing
242
end
243
244
# Inlined version of the interface flux computation for conservation laws
245
@inline function calc_mpi_interface_flux!(surface_flux_values,
246
mesh::Union{P4estMeshParallel{3},
247
T8codeMeshParallel{3}},
248
have_nonconservative_terms::False, equations,
249
surface_integral, dg::DG, cache,
250
interface_index, normal_direction,
251
interface_i_node_index,
252
interface_j_node_index, local_side,
253
surface_i_node_index, surface_j_node_index,
254
local_direction_index, local_element_index)
255
@unpack u = cache.mpi_interfaces
256
@unpack surface_flux = surface_integral
257
258
u_ll, u_rr = get_surface_node_vars(u, equations, dg,
259
interface_i_node_index, interface_j_node_index,
260
interface_index)
261
262
if local_side == 1
263
flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
264
else # local_side == 2
265
flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
266
end
267
268
for v in eachvariable(equations)
269
surface_flux_values[v, surface_i_node_index, surface_j_node_index,
270
local_direction_index, local_element_index] = flux_[v]
271
end
272
273
return nothing
274
end
275
276
# Inlined version of the interface flux computation for non-conservative equations
277
@inline function calc_mpi_interface_flux!(surface_flux_values,
278
mesh::Union{P4estMeshParallel{3},
279
T8codeMeshParallel{3}},
280
have_nonconservative_terms::True, equations,
281
surface_integral, dg::DG, cache,
282
interface_index, normal_direction,
283
interface_i_node_index,
284
interface_j_node_index, local_side,
285
surface_i_node_index, surface_j_node_index,
286
local_direction_index, local_element_index)
287
@unpack u = cache.mpi_interfaces
288
surface_flux, nonconservative_flux = surface_integral.surface_flux
289
290
u_ll, u_rr = get_surface_node_vars(u, equations, dg,
291
interface_i_node_index, interface_j_node_index,
292
interface_index)
293
294
# Compute flux and non-conservative term for this side of the interface
295
if local_side == 1
296
flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
297
noncons_flux_ = nonconservative_flux(u_ll, u_rr, normal_direction, equations)
298
else # local_side == 2
299
flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
300
noncons_flux_ = -nonconservative_flux(u_rr, u_ll, -normal_direction, equations)
301
end
302
303
for v in eachvariable(equations)
304
surface_flux_values[v, surface_i_node_index, surface_j_node_index,
305
local_direction_index, local_element_index] = flux_[v] +
306
0.5f0 * noncons_flux_[v]
307
end
308
309
return nothing
310
end
311
312
function prolong2mpimortars!(cache, u,
313
mesh::Union{P4estMeshParallel{3}, T8codeMeshParallel{3}},
314
equations,
315
mortar_l2::LobattoLegendreMortarL2,
316
dg::DGSEM)
317
@unpack node_indices = cache.mpi_mortars
318
index_range = eachnode(dg)
319
320
@threaded for mortar in eachmpimortar(dg, cache)
321
local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar]
322
local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar]
323
324
# Get start value and step size for indices on both sides to get the correct face
325
# and orientation
326
small_indices = node_indices[1, mortar]
327
i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],
328
index_range)
329
j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],
330
index_range)
331
k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],
332
index_range)
333
334
large_indices = node_indices[2, mortar]
335
i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1],
336
index_range)
337
j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2],
338
index_range)
339
k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3],
340
index_range)
341
342
for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
343
if position == 5 # -> large element
344
# Buffer to copy solution values of the large element in the correct orientation
345
# before interpolating
346
u_buffer = cache.u_threaded[Threads.threadid()]
347
# temporary buffer for projections
348
fstar_tmp = cache.fstar_tmp_threaded[Threads.threadid()]
349
350
i_large = i_large_start
351
j_large = j_large_start
352
k_large = k_large_start
353
for j in eachnode(dg)
354
for i in eachnode(dg)
355
for v in eachvariable(equations)
356
u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element]
357
end
358
359
i_large += i_large_step_i
360
j_large += j_large_step_i
361
k_large += k_large_step_i
362
end
363
i_large += i_large_step_j
364
j_large += j_large_step_j
365
k_large += k_large_step_j
366
end
367
368
# Interpolate large element face data from buffer to small face locations
369
multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, :,
370
mortar),
371
mortar_l2.forward_lower,
372
mortar_l2.forward_lower,
373
u_buffer,
374
fstar_tmp)
375
multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, :,
376
mortar),
377
mortar_l2.forward_upper,
378
mortar_l2.forward_lower,
379
u_buffer,
380
fstar_tmp)
381
multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 3, :, :,
382
mortar),
383
mortar_l2.forward_lower,
384
mortar_l2.forward_upper,
385
u_buffer,
386
fstar_tmp)
387
multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 4, :, :,
388
mortar),
389
mortar_l2.forward_upper,
390
mortar_l2.forward_upper,
391
u_buffer,
392
fstar_tmp)
393
else # position in (1, 2, 3, 4) -> small element
394
# Copy solution data from the small elements
395
i_small = i_small_start
396
j_small = j_small_start
397
k_small = k_small_start
398
for j in eachnode(dg)
399
for i in eachnode(dg)
400
for v in eachvariable(equations)
401
cache.mpi_mortars.u[1, v, position, i, j, mortar] = u[v,
402
i_small,
403
j_small,
404
k_small,
405
element]
406
end
407
i_small += i_small_step_i
408
j_small += j_small_step_i
409
k_small += k_small_step_i
410
end
411
i_small += i_small_step_j
412
j_small += j_small_step_j
413
k_small += k_small_step_j
414
end
415
end
416
end
417
end
418
419
return nothing
420
end
421
422
function calc_mpi_mortar_flux!(surface_flux_values,
423
mesh::Union{P4estMeshParallel{3}, T8codeMeshParallel{3}},
424
have_nonconservative_terms, equations,
425
mortar_l2::LobattoLegendreMortarL2,
426
surface_integral, dg::DG, cache)
427
@unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
428
@unpack contravariant_vectors = cache.elements
429
@unpack fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded = cache
430
index_range = eachnode(dg)
431
432
@threaded for mortar in eachmpimortar(dg, cache)
433
# Choose thread-specific pre-allocated container
434
fstar_primary = fstar_primary_threaded[Threads.threadid()]
435
fstar_secondary = fstar_secondary_threaded[Threads.threadid()]
436
fstar_tmp = fstar_tmp_threaded[Threads.threadid()]
437
438
# Get index information on the small elements
439
small_indices = node_indices[1, mortar]
440
441
i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],
442
index_range)
443
j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],
444
index_range)
445
k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],
446
index_range)
447
448
for position in 1:4
449
i_small = i_small_start
450
j_small = j_small_start
451
k_small = k_small_start
452
for j in eachnode(dg)
453
for i in eachnode(dg)
454
# Get the normal direction on the small element.
455
normal_direction = get_normal_direction(cache.mpi_mortars, i, j,
456
position, mortar)
457
458
calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, mesh,
459
have_nonconservative_terms, equations,
460
surface_integral, dg, cache,
461
mortar, position, normal_direction,
462
i, j)
463
464
i_small += i_small_step_i
465
j_small += j_small_step_i
466
k_small += k_small_step_i
467
end
468
end
469
i_small += i_small_step_j
470
j_small += j_small_step_j
471
k_small += k_small_step_j
472
end
473
474
# Buffer to interpolate flux values of the large element to before
475
# copying in the correct orientation
476
u_buffer = cache.u_threaded[Threads.threadid()]
477
478
mpi_mortar_fluxes_to_elements!(surface_flux_values,
479
mesh, equations, mortar_l2, dg, cache,
480
mortar, fstar_primary, fstar_secondary, u_buffer,
481
fstar_tmp)
482
end
483
484
return nothing
485
end
486
487
# Inlined version of the mortar flux computation on small elements for conservation laws
488
@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary,
489
mesh::Union{P4estMeshParallel{3},
490
T8codeMeshParallel{3}},
491
have_nonconservative_terms::False, equations,
492
surface_integral, dg::DG, cache,
493
mortar_index, position_index, normal_direction,
494
i_node_index, j_node_index)
495
@unpack u = cache.mpi_mortars
496
@unpack surface_flux = surface_integral
497
498
u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index,
499
i_node_index, j_node_index, mortar_index)
500
501
flux = surface_flux(u_ll, u_rr, normal_direction, equations)
502
503
# Copy flux to buffer
504
set_node_vars!(fstar_primary, flux, equations, dg,
505
i_node_index, j_node_index, position_index)
506
set_node_vars!(fstar_secondary, flux, equations, dg,
507
i_node_index, j_node_index, position_index)
508
509
return nothing
510
end
511
512
# Inlined version of the mortar flux computation on small elements for non-conservative equations
513
@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary,
514
mesh::Union{P4estMeshParallel{3},
515
T8codeMeshParallel{3}},
516
have_nonconservative_terms::True, equations,
517
surface_integral, dg::DG, cache,
518
mortar_index, position_index, normal_direction,
519
i_node_index, j_node_index)
520
@unpack u = cache.mpi_mortars
521
surface_flux, nonconservative_flux = surface_integral.surface_flux
522
523
u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index,
524
j_node_index, mortar_index)
525
526
flux = surface_flux(u_ll, u_rr, normal_direction, equations)
527
noncons_flux_primary = nonconservative_flux(u_ll, u_rr, normal_direction, equations)
528
noncons_flux_secondary = nonconservative_flux(u_rr, u_ll, normal_direction,
529
equations)
530
531
for v in eachvariable(equations)
532
fstar_primary[v, i_node_index, j_node_index, position_index] = flux[v] +
533
0.5f0 *
534
noncons_flux_primary[v]
535
fstar_secondary[v, i_node_index, j_node_index, position_index] = flux[v] +
536
0.5f0 *
537
noncons_flux_secondary[v]
538
end
539
540
return nothing
541
end
542
543
@inline function mpi_mortar_fluxes_to_elements!(surface_flux_values,
544
mesh::Union{P4estMeshParallel{3},
545
T8codeMeshParallel{3}},
546
equations,
547
mortar_l2::LobattoLegendreMortarL2,
548
dg::DGSEM, cache, mortar, fstar_primary,
549
fstar_secondary,
550
u_buffer, fstar_tmp)
551
@unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
552
index_range = eachnode(dg)
553
554
small_indices = node_indices[1, mortar]
555
small_direction = indices2direction(small_indices)
556
large_indices = node_indices[2, mortar]
557
large_direction = indices2direction(large_indices)
558
large_surface_indices = surface_indices(large_indices)
559
560
i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1],
561
index_range)
562
j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2],
563
index_range)
564
565
for (element, position) in zip(local_neighbor_ids[mortar],
566
local_neighbor_positions[mortar])
567
if position == 5 # -> large element
568
# Project small fluxes to large element.
569
multiply_dimensionwise!(u_buffer,
570
mortar_l2.reverse_lower, mortar_l2.reverse_lower,
571
view(fstar_secondary, .., 1),
572
fstar_tmp)
573
add_multiply_dimensionwise!(u_buffer,
574
mortar_l2.reverse_upper,
575
mortar_l2.reverse_lower,
576
view(fstar_secondary, .., 2),
577
fstar_tmp)
578
add_multiply_dimensionwise!(u_buffer,
579
mortar_l2.reverse_lower,
580
mortar_l2.reverse_upper,
581
view(fstar_secondary, .., 3),
582
fstar_tmp)
583
add_multiply_dimensionwise!(u_buffer,
584
mortar_l2.reverse_upper,
585
mortar_l2.reverse_upper,
586
view(fstar_secondary, .., 4),
587
fstar_tmp)
588
# The flux is calculated in the outward direction of the small elements,
589
# so the sign must be switched to get the flux in outward direction
590
# of the large element.
591
# The contravariant vectors of the large element (and therefore the normal
592
# vectors of the large element as well) are four times as large as the
593
# contravariant vectors of the small elements. Therefore, the flux needs
594
# to be scaled by a factor of 4 to obtain the flux of the large element.
595
u_buffer .*= -4
596
# Copy interpolated flux values from buffer to large element face in the
597
# correct orientation.
598
# Note that the index of the small sides will always run forward but
599
# the index of the large side might need to run backwards for flipped sides.
600
i_large = i_large_start
601
j_large = j_large_start
602
for j in eachnode(dg)
603
for i in eachnode(dg)
604
for v in eachvariable(equations)
605
surface_flux_values[v, i_large, j_large, large_direction, element] = u_buffer[v,
606
i,
607
j]
608
end
609
i_large += i_large_step_i
610
j_large += j_large_step_i
611
end
612
i_large += i_large_step_j
613
j_large += j_large_step_j
614
end
615
else # position in (1, 2, 3, 4) -> small element
616
# Copy solution small to small
617
for j in eachnode(dg)
618
for i in eachnode(dg)
619
for v in eachvariable(equations)
620
surface_flux_values[v, i, j, small_direction, element] = fstar_primary[v,
621
i,
622
j,
623
position]
624
end
625
end
626
end
627
end
628
end
629
630
return nothing
631
end
632
end # muladd
633
634