Path: blob/master/thirdparty/meshoptimizer/vcacheoptimizer.cpp
9903 views
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details1#include "meshoptimizer.h"23#include <assert.h>4#include <string.h>56// This work is based on:7// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 20068// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 20079namespace meshopt10{1112const size_t kCacheSizeMax = 16;13const size_t kValenceMax = 8;1415struct VertexScoreTable16{17float cache[1 + kCacheSizeMax];18float live[1 + kValenceMax];19};2021// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD22static const VertexScoreTable kVertexScoreTable = {23{0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},24{0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},25};2627// Tuned to minimize the encoded index buffer size28static const VertexScoreTable kVertexScoreTableStrip = {29{0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},30{0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},31};3233struct TriangleAdjacency34{35unsigned int* counts;36unsigned int* offsets;37unsigned int* data;38};3940static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)41{42size_t face_count = index_count / 3;4344// allocate arrays45adjacency.counts = allocator.allocate<unsigned int>(vertex_count);46adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);47adjacency.data = allocator.allocate<unsigned int>(index_count);4849// fill triangle counts50memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));5152for (size_t i = 0; i < index_count; ++i)53{54assert(indices[i] < vertex_count);5556adjacency.counts[indices[i]]++;57}5859// fill offset table60unsigned int offset = 0;6162for (size_t i = 0; i < vertex_count; ++i)63{64adjacency.offsets[i] = offset;65offset += adjacency.counts[i];66}6768assert(offset == index_count);6970// fill triangle data71for (size_t i = 0; i < face_count; ++i)72{73unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];7475adjacency.data[adjacency.offsets[a]++] = unsigned(i);76adjacency.data[adjacency.offsets[b]++] = unsigned(i);77adjacency.data[adjacency.offsets[c]++] = unsigned(i);78}7980// fix offsets that have been disturbed by the previous pass81for (size_t i = 0; i < vertex_count; ++i)82{83assert(adjacency.offsets[i] >= adjacency.counts[i]);8485adjacency.offsets[i] -= adjacency.counts[i];86}87}8889static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)90{91// check dead-end stack92while (dead_end_top)93{94unsigned int vertex = dead_end[--dead_end_top];9596if (live_triangles[vertex] > 0)97return vertex;98}99100// input order101while (input_cursor < vertex_count)102{103if (live_triangles[input_cursor] > 0)104return input_cursor;105106++input_cursor;107}108109return ~0u;110}111112static unsigned int getNextVertexNeighbor(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)113{114unsigned int best_candidate = ~0u;115int best_priority = -1;116117for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)118{119unsigned int vertex = *next_candidate;120121// otherwise we don't need to process it122if (live_triangles[vertex] > 0)123{124int priority = 0;125126// will it be in cache after fanning?127if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)128{129priority = timestamp - cache_timestamps[vertex]; // position in cache130}131132if (priority > best_priority)133{134best_candidate = vertex;135best_priority = priority;136}137}138}139140return best_candidate;141}142143static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)144{145assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));146147unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;148149return table->cache[1 + cache_position] + table->live[live_triangles_clamped];150}151152static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)153{154// input order155while (input_cursor < face_count)156{157if (!emitted_flags[input_cursor])158return input_cursor;159160++input_cursor;161}162163return ~0u;164}165166} // namespace meshopt167168void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)169{170using namespace meshopt;171172assert(index_count % 3 == 0);173174meshopt_Allocator allocator;175176// guard for empty meshes177if (index_count == 0 || vertex_count == 0)178return;179180// support in-place optimization181if (destination == indices)182{183unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);184memcpy(indices_copy, indices, index_count * sizeof(unsigned int));185indices = indices_copy;186}187188unsigned int cache_size = 16;189assert(cache_size <= kCacheSizeMax);190191size_t face_count = index_count / 3;192193// build adjacency information194TriangleAdjacency adjacency = {};195buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);196197// live triangle counts; note, we alias adjacency.counts as we remove triangles after emitting them so the counts always match198unsigned int* live_triangles = adjacency.counts;199200// emitted flags201unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);202memset(emitted_flags, 0, face_count);203204// compute initial vertex scores205float* vertex_scores = allocator.allocate<float>(vertex_count);206207for (size_t i = 0; i < vertex_count; ++i)208vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);209210// compute triangle scores211float* triangle_scores = allocator.allocate<float>(face_count);212213for (size_t i = 0; i < face_count; ++i)214{215unsigned int a = indices[i * 3 + 0];216unsigned int b = indices[i * 3 + 1];217unsigned int c = indices[i * 3 + 2];218219triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];220}221222unsigned int cache_holder[2 * (kCacheSizeMax + 4)];223unsigned int* cache = cache_holder;224unsigned int* cache_new = cache_holder + kCacheSizeMax + 4;225size_t cache_count = 0;226227unsigned int current_triangle = 0;228unsigned int input_cursor = 1;229230unsigned int output_triangle = 0;231232while (current_triangle != ~0u)233{234assert(output_triangle < face_count);235236unsigned int a = indices[current_triangle * 3 + 0];237unsigned int b = indices[current_triangle * 3 + 1];238unsigned int c = indices[current_triangle * 3 + 2];239240// output indices241destination[output_triangle * 3 + 0] = a;242destination[output_triangle * 3 + 1] = b;243destination[output_triangle * 3 + 2] = c;244output_triangle++;245246// update emitted flags247emitted_flags[current_triangle] = true;248triangle_scores[current_triangle] = 0;249250// new triangle251size_t cache_write = 0;252cache_new[cache_write++] = a;253cache_new[cache_write++] = b;254cache_new[cache_write++] = c;255256// old triangles257for (size_t i = 0; i < cache_count; ++i)258{259unsigned int index = cache[i];260261cache_new[cache_write] = index;262cache_write += (index != a) & (index != b) & (index != c);263}264265unsigned int* cache_temp = cache;266cache = cache_new, cache_new = cache_temp;267cache_count = cache_write > cache_size ? cache_size : cache_write;268269// remove emitted triangle from adjacency data270// this makes sure that we spend less time traversing these lists on subsequent iterations271// live triangle counts are updated as a byproduct of these adjustments272for (size_t k = 0; k < 3; ++k)273{274unsigned int index = indices[current_triangle * 3 + k];275276unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];277size_t neighbors_size = adjacency.counts[index];278279for (size_t i = 0; i < neighbors_size; ++i)280{281unsigned int tri = neighbors[i];282283if (tri == current_triangle)284{285neighbors[i] = neighbors[neighbors_size - 1];286adjacency.counts[index]--;287break;288}289}290}291292unsigned int best_triangle = ~0u;293float best_score = 0;294295// update cache positions, vertex scores and triangle scores, and find next best triangle296for (size_t i = 0; i < cache_write; ++i)297{298unsigned int index = cache[i];299300// no need to update scores if we are never going to use this vertex301if (adjacency.counts[index] == 0)302continue;303304int cache_position = i >= cache_size ? -1 : int(i);305306// update vertex score307float score = vertexScore(table, cache_position, live_triangles[index]);308float score_diff = score - vertex_scores[index];309310vertex_scores[index] = score;311312// update scores of vertex triangles313const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[index];314const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[index];315316for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)317{318unsigned int tri = *it;319assert(!emitted_flags[tri]);320321float tri_score = triangle_scores[tri] + score_diff;322assert(tri_score > 0);323324best_triangle = best_score < tri_score ? tri : best_triangle;325best_score = best_score < tri_score ? tri_score : best_score;326327triangle_scores[tri] = tri_score;328}329}330331// step through input triangles in order if we hit a dead-end332current_triangle = best_triangle;333334if (current_triangle == ~0u)335{336current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);337}338}339340assert(input_cursor == face_count);341assert(output_triangle == face_count);342}343344void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)345{346meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);347}348349void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)350{351meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);352}353354void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)355{356using namespace meshopt;357358assert(index_count % 3 == 0);359assert(cache_size >= 3);360361meshopt_Allocator allocator;362363// guard for empty meshes364if (index_count == 0 || vertex_count == 0)365return;366367// support in-place optimization368if (destination == indices)369{370unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);371memcpy(indices_copy, indices, index_count * sizeof(unsigned int));372indices = indices_copy;373}374375size_t face_count = index_count / 3;376377// build adjacency information378TriangleAdjacency adjacency = {};379buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);380381// live triangle counts382unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);383memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));384385// cache time stamps386unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);387memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));388389// dead-end stack390unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);391unsigned int dead_end_top = 0;392393// emitted flags394unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);395memset(emitted_flags, 0, face_count);396397unsigned int current_vertex = 0;398399unsigned int timestamp = cache_size + 1;400unsigned int input_cursor = 1; // vertex to restart from in case of dead-end401402unsigned int output_triangle = 0;403404while (current_vertex != ~0u)405{406const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;407408// emit all vertex neighbors409const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];410const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[current_vertex];411412for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)413{414unsigned int triangle = *it;415416if (!emitted_flags[triangle])417{418unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];419420// output indices421destination[output_triangle * 3 + 0] = a;422destination[output_triangle * 3 + 1] = b;423destination[output_triangle * 3 + 2] = c;424output_triangle++;425426// update dead-end stack427dead_end[dead_end_top + 0] = a;428dead_end[dead_end_top + 1] = b;429dead_end[dead_end_top + 2] = c;430dead_end_top += 3;431432// update live triangle counts433live_triangles[a]--;434live_triangles[b]--;435live_triangles[c]--;436437// update cache info438// if vertex is not in cache, put it in cache439if (timestamp - cache_timestamps[a] > cache_size)440cache_timestamps[a] = timestamp++;441442if (timestamp - cache_timestamps[b] > cache_size)443cache_timestamps[b] = timestamp++;444445if (timestamp - cache_timestamps[c] > cache_size)446cache_timestamps[c] = timestamp++;447448// update emitted flags449emitted_flags[triangle] = true;450}451}452453// next candidates are the ones we pushed to dead-end stack just now454const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;455456// get next vertex457current_vertex = getNextVertexNeighbor(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);458459if (current_vertex == ~0u)460{461current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);462}463}464465assert(output_triangle == face_count);466}467468469