Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/meshoptimizer/vcacheoptimizer.cpp
9903 views
1
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
2
#include "meshoptimizer.h"
3
4
#include <assert.h>
5
#include <string.h>
6
7
// This work is based on:
8
// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
9
// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
10
namespace meshopt
11
{
12
13
const size_t kCacheSizeMax = 16;
14
const size_t kValenceMax = 8;
15
16
struct VertexScoreTable
17
{
18
float cache[1 + kCacheSizeMax];
19
float live[1 + kValenceMax];
20
};
21
22
// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD
23
static const VertexScoreTable kVertexScoreTable = {
24
{0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},
25
{0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},
26
};
27
28
// Tuned to minimize the encoded index buffer size
29
static const VertexScoreTable kVertexScoreTableStrip = {
30
{0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},
31
{0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},
32
};
33
34
struct TriangleAdjacency
35
{
36
unsigned int* counts;
37
unsigned int* offsets;
38
unsigned int* data;
39
};
40
41
static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
42
{
43
size_t face_count = index_count / 3;
44
45
// allocate arrays
46
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
47
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
48
adjacency.data = allocator.allocate<unsigned int>(index_count);
49
50
// fill triangle counts
51
memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
52
53
for (size_t i = 0; i < index_count; ++i)
54
{
55
assert(indices[i] < vertex_count);
56
57
adjacency.counts[indices[i]]++;
58
}
59
60
// fill offset table
61
unsigned int offset = 0;
62
63
for (size_t i = 0; i < vertex_count; ++i)
64
{
65
adjacency.offsets[i] = offset;
66
offset += adjacency.counts[i];
67
}
68
69
assert(offset == index_count);
70
71
// fill triangle data
72
for (size_t i = 0; i < face_count; ++i)
73
{
74
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
75
76
adjacency.data[adjacency.offsets[a]++] = unsigned(i);
77
adjacency.data[adjacency.offsets[b]++] = unsigned(i);
78
adjacency.data[adjacency.offsets[c]++] = unsigned(i);
79
}
80
81
// fix offsets that have been disturbed by the previous pass
82
for (size_t i = 0; i < vertex_count; ++i)
83
{
84
assert(adjacency.offsets[i] >= adjacency.counts[i]);
85
86
adjacency.offsets[i] -= adjacency.counts[i];
87
}
88
}
89
90
static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
91
{
92
// check dead-end stack
93
while (dead_end_top)
94
{
95
unsigned int vertex = dead_end[--dead_end_top];
96
97
if (live_triangles[vertex] > 0)
98
return vertex;
99
}
100
101
// input order
102
while (input_cursor < vertex_count)
103
{
104
if (live_triangles[input_cursor] > 0)
105
return input_cursor;
106
107
++input_cursor;
108
}
109
110
return ~0u;
111
}
112
113
static unsigned int getNextVertexNeighbor(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
114
{
115
unsigned int best_candidate = ~0u;
116
int best_priority = -1;
117
118
for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
119
{
120
unsigned int vertex = *next_candidate;
121
122
// otherwise we don't need to process it
123
if (live_triangles[vertex] > 0)
124
{
125
int priority = 0;
126
127
// will it be in cache after fanning?
128
if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
129
{
130
priority = timestamp - cache_timestamps[vertex]; // position in cache
131
}
132
133
if (priority > best_priority)
134
{
135
best_candidate = vertex;
136
best_priority = priority;
137
}
138
}
139
}
140
141
return best_candidate;
142
}
143
144
static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)
145
{
146
assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
147
148
unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
149
150
return table->cache[1 + cache_position] + table->live[live_triangles_clamped];
151
}
152
153
static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
154
{
155
// input order
156
while (input_cursor < face_count)
157
{
158
if (!emitted_flags[input_cursor])
159
return input_cursor;
160
161
++input_cursor;
162
}
163
164
return ~0u;
165
}
166
167
} // namespace meshopt
168
169
void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)
170
{
171
using namespace meshopt;
172
173
assert(index_count % 3 == 0);
174
175
meshopt_Allocator allocator;
176
177
// guard for empty meshes
178
if (index_count == 0 || vertex_count == 0)
179
return;
180
181
// support in-place optimization
182
if (destination == indices)
183
{
184
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
185
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
186
indices = indices_copy;
187
}
188
189
unsigned int cache_size = 16;
190
assert(cache_size <= kCacheSizeMax);
191
192
size_t face_count = index_count / 3;
193
194
// build adjacency information
195
TriangleAdjacency adjacency = {};
196
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
197
198
// live triangle counts; note, we alias adjacency.counts as we remove triangles after emitting them so the counts always match
199
unsigned int* live_triangles = adjacency.counts;
200
201
// emitted flags
202
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
203
memset(emitted_flags, 0, face_count);
204
205
// compute initial vertex scores
206
float* vertex_scores = allocator.allocate<float>(vertex_count);
207
208
for (size_t i = 0; i < vertex_count; ++i)
209
vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);
210
211
// compute triangle scores
212
float* triangle_scores = allocator.allocate<float>(face_count);
213
214
for (size_t i = 0; i < face_count; ++i)
215
{
216
unsigned int a = indices[i * 3 + 0];
217
unsigned int b = indices[i * 3 + 1];
218
unsigned int c = indices[i * 3 + 2];
219
220
triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
221
}
222
223
unsigned int cache_holder[2 * (kCacheSizeMax + 4)];
224
unsigned int* cache = cache_holder;
225
unsigned int* cache_new = cache_holder + kCacheSizeMax + 4;
226
size_t cache_count = 0;
227
228
unsigned int current_triangle = 0;
229
unsigned int input_cursor = 1;
230
231
unsigned int output_triangle = 0;
232
233
while (current_triangle != ~0u)
234
{
235
assert(output_triangle < face_count);
236
237
unsigned int a = indices[current_triangle * 3 + 0];
238
unsigned int b = indices[current_triangle * 3 + 1];
239
unsigned int c = indices[current_triangle * 3 + 2];
240
241
// output indices
242
destination[output_triangle * 3 + 0] = a;
243
destination[output_triangle * 3 + 1] = b;
244
destination[output_triangle * 3 + 2] = c;
245
output_triangle++;
246
247
// update emitted flags
248
emitted_flags[current_triangle] = true;
249
triangle_scores[current_triangle] = 0;
250
251
// new triangle
252
size_t cache_write = 0;
253
cache_new[cache_write++] = a;
254
cache_new[cache_write++] = b;
255
cache_new[cache_write++] = c;
256
257
// old triangles
258
for (size_t i = 0; i < cache_count; ++i)
259
{
260
unsigned int index = cache[i];
261
262
cache_new[cache_write] = index;
263
cache_write += (index != a) & (index != b) & (index != c);
264
}
265
266
unsigned int* cache_temp = cache;
267
cache = cache_new, cache_new = cache_temp;
268
cache_count = cache_write > cache_size ? cache_size : cache_write;
269
270
// remove emitted triangle from adjacency data
271
// this makes sure that we spend less time traversing these lists on subsequent iterations
272
// live triangle counts are updated as a byproduct of these adjustments
273
for (size_t k = 0; k < 3; ++k)
274
{
275
unsigned int index = indices[current_triangle * 3 + k];
276
277
unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];
278
size_t neighbors_size = adjacency.counts[index];
279
280
for (size_t i = 0; i < neighbors_size; ++i)
281
{
282
unsigned int tri = neighbors[i];
283
284
if (tri == current_triangle)
285
{
286
neighbors[i] = neighbors[neighbors_size - 1];
287
adjacency.counts[index]--;
288
break;
289
}
290
}
291
}
292
293
unsigned int best_triangle = ~0u;
294
float best_score = 0;
295
296
// update cache positions, vertex scores and triangle scores, and find next best triangle
297
for (size_t i = 0; i < cache_write; ++i)
298
{
299
unsigned int index = cache[i];
300
301
// no need to update scores if we are never going to use this vertex
302
if (adjacency.counts[index] == 0)
303
continue;
304
305
int cache_position = i >= cache_size ? -1 : int(i);
306
307
// update vertex score
308
float score = vertexScore(table, cache_position, live_triangles[index]);
309
float score_diff = score - vertex_scores[index];
310
311
vertex_scores[index] = score;
312
313
// update scores of vertex triangles
314
const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[index];
315
const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[index];
316
317
for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)
318
{
319
unsigned int tri = *it;
320
assert(!emitted_flags[tri]);
321
322
float tri_score = triangle_scores[tri] + score_diff;
323
assert(tri_score > 0);
324
325
best_triangle = best_score < tri_score ? tri : best_triangle;
326
best_score = best_score < tri_score ? tri_score : best_score;
327
328
triangle_scores[tri] = tri_score;
329
}
330
}
331
332
// step through input triangles in order if we hit a dead-end
333
current_triangle = best_triangle;
334
335
if (current_triangle == ~0u)
336
{
337
current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
338
}
339
}
340
341
assert(input_cursor == face_count);
342
assert(output_triangle == face_count);
343
}
344
345
void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
346
{
347
meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);
348
}
349
350
void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
351
{
352
meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);
353
}
354
355
void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
356
{
357
using namespace meshopt;
358
359
assert(index_count % 3 == 0);
360
assert(cache_size >= 3);
361
362
meshopt_Allocator allocator;
363
364
// guard for empty meshes
365
if (index_count == 0 || vertex_count == 0)
366
return;
367
368
// support in-place optimization
369
if (destination == indices)
370
{
371
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
372
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
373
indices = indices_copy;
374
}
375
376
size_t face_count = index_count / 3;
377
378
// build adjacency information
379
TriangleAdjacency adjacency = {};
380
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
381
382
// live triangle counts
383
unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
384
memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
385
386
// cache time stamps
387
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
388
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
389
390
// dead-end stack
391
unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);
392
unsigned int dead_end_top = 0;
393
394
// emitted flags
395
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
396
memset(emitted_flags, 0, face_count);
397
398
unsigned int current_vertex = 0;
399
400
unsigned int timestamp = cache_size + 1;
401
unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
402
403
unsigned int output_triangle = 0;
404
405
while (current_vertex != ~0u)
406
{
407
const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
408
409
// emit all vertex neighbors
410
const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
411
const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[current_vertex];
412
413
for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)
414
{
415
unsigned int triangle = *it;
416
417
if (!emitted_flags[triangle])
418
{
419
unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
420
421
// output indices
422
destination[output_triangle * 3 + 0] = a;
423
destination[output_triangle * 3 + 1] = b;
424
destination[output_triangle * 3 + 2] = c;
425
output_triangle++;
426
427
// update dead-end stack
428
dead_end[dead_end_top + 0] = a;
429
dead_end[dead_end_top + 1] = b;
430
dead_end[dead_end_top + 2] = c;
431
dead_end_top += 3;
432
433
// update live triangle counts
434
live_triangles[a]--;
435
live_triangles[b]--;
436
live_triangles[c]--;
437
438
// update cache info
439
// if vertex is not in cache, put it in cache
440
if (timestamp - cache_timestamps[a] > cache_size)
441
cache_timestamps[a] = timestamp++;
442
443
if (timestamp - cache_timestamps[b] > cache_size)
444
cache_timestamps[b] = timestamp++;
445
446
if (timestamp - cache_timestamps[c] > cache_size)
447
cache_timestamps[c] = timestamp++;
448
449
// update emitted flags
450
emitted_flags[triangle] = true;
451
}
452
}
453
454
// next candidates are the ones we pushed to dead-end stack just now
455
const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
456
457
// get next vertex
458
current_vertex = getNextVertexNeighbor(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
459
460
if (current_vertex == ~0u)
461
{
462
current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
463
}
464
}
465
466
assert(output_triangle == face_count);
467
}
468
469