Path: blob/master/thirdparty/meshoptimizer/meshletutils.cpp
59209 views
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details1#include "meshoptimizer.h"23#include <assert.h>4#include <float.h>5#include <math.h>6#include <string.h>78// This work is based on:9// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 201610// Jack Ritter. An Efficient Bounding Sphere. 199011// Thomas Larsson. Fast and Tight Fitting Bounding Spheres. 200812namespace meshopt13{1415// This must be <= 256 since meshlet indices are stored as bytes16const size_t kMeshletMaxVertices = 256;1718// A reasonable limit is around 2*max_vertices or less19const size_t kMeshletMaxTriangles = 512;2021static void computeBoundingSphere(float result[4], const float* points, size_t count, size_t points_stride, const float* radii, size_t radii_stride, size_t axis_count, const unsigned int* indices = NULL)22{23static const float axes[7][3] = {24// X, Y, Z25{1, 0, 0},26{0, 1, 0},27{0, 0, 1},2829// XYZ, -XYZ, X-YZ, XY-Z; normalized to unit length30{0.57735026f, 0.57735026f, 0.57735026f},31{-0.57735026f, 0.57735026f, 0.57735026f},32{0.57735026f, -0.57735026f, 0.57735026f},33{0.57735026f, 0.57735026f, -0.57735026f},34};3536assert(count > 0);37assert(axis_count <= sizeof(axes) / sizeof(axes[0]));3839size_t points_stride_float = points_stride / sizeof(float);40size_t radii_stride_float = radii_stride / sizeof(float);4142// find extremum points along all axes; for each axis we get a pair of points with min/max coordinates43unsigned int pmin[7], pmax[7];44float tmin[7], tmax[7];4546for (size_t axis = 0; axis < axis_count; ++axis)47{48pmin[axis] = pmax[axis] = 0;49tmin[axis] = FLT_MAX;50tmax[axis] = -FLT_MAX;51}5253for (size_t i = 0; i < count; ++i)54{55unsigned int v = indices ? indices[i] : unsigned(i);56const float* p = points + v * points_stride_float;57float r = radii[v * radii_stride_float];5859for (size_t axis = 0; axis < axis_count; ++axis)60{61const float* ax = axes[axis];6263float tp = ax[0] * p[0] + ax[1] * p[1] + ax[2] * p[2];64float tpmin = tp - r, tpmax = tp + r;6566pmin[axis] = (tpmin < tmin[axis]) ? v : pmin[axis];67pmax[axis] = (tpmax > tmax[axis]) ? v : pmax[axis];68tmin[axis] = (tpmin < tmin[axis]) ? tpmin : tmin[axis];69tmax[axis] = (tpmax > tmax[axis]) ? tpmax : tmax[axis];70}71}7273// find the pair of points with largest distance74size_t paxis = 0;75float paxisdr = 0;7677for (size_t axis = 0; axis < axis_count; ++axis)78{79const float* p1 = points + pmin[axis] * points_stride_float;80const float* p2 = points + pmax[axis] * points_stride_float;81float r1 = radii[pmin[axis] * radii_stride_float];82float r2 = radii[pmax[axis] * radii_stride_float];8384float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]);85float dr = sqrtf(d2) + r1 + r2;8687if (dr > paxisdr)88{89paxisdr = dr;90paxis = axis;91}92}9394// use the longest segment as the initial sphere diameter95const float* p1 = points + pmin[paxis] * points_stride_float;96const float* p2 = points + pmax[paxis] * points_stride_float;97float r1 = radii[pmin[paxis] * radii_stride_float];98float r2 = radii[pmax[paxis] * radii_stride_float];99100float paxisd = sqrtf((p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]));101float paxisk = paxisd > 0 ? (paxisd + r2 - r1) / (2 * paxisd) : 0.f;102103float center[3] = {p1[0] + (p2[0] - p1[0]) * paxisk, p1[1] + (p2[1] - p1[1]) * paxisk, p1[2] + (p2[2] - p1[2]) * paxisk};104float radius = paxisdr / 2;105106// iteratively adjust the sphere up until all points fit107for (size_t i = 0; i < count; ++i)108{109unsigned int v = indices ? indices[i] : unsigned(i);110const float* p = points + v * points_stride_float;111float r = radii[v * radii_stride_float];112113float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);114float d = sqrtf(d2);115116if (d + r > radius)117{118float k = d > 0 ? (d + r - radius) / (2 * d) : 0.f;119120center[0] += k * (p[0] - center[0]);121center[1] += k * (p[1] - center[1]);122center[2] += k * (p[2] - center[2]);123radius = (radius + d + r) / 2;124}125}126127result[0] = center[0];128result[1] = center[1];129result[2] = center[2];130result[3] = radius;131}132133static meshopt_Bounds computeClusterBounds(const unsigned int* indices, size_t index_count, const unsigned int* corners, size_t corner_count, const float* vertex_positions, size_t vertex_positions_stride)134{135size_t vertex_stride_float = vertex_positions_stride / sizeof(float);136137// compute triangle normals (.w completes plane equation)138float normals[kMeshletMaxTriangles][4];139size_t triangles = 0;140141for (size_t i = 0; i < index_count; i += 3)142{143unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];144145const float* p0 = vertex_positions + vertex_stride_float * a;146const float* p1 = vertex_positions + vertex_stride_float * b;147const float* p2 = vertex_positions + vertex_stride_float * c;148149float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};150float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};151152float normalx = p10[1] * p20[2] - p10[2] * p20[1];153float normaly = p10[2] * p20[0] - p10[0] * p20[2];154float normalz = p10[0] * p20[1] - p10[1] * p20[0];155156float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);157158// no need to include degenerate triangles - they will be invisible anyway159if (area == 0.f)160continue;161162normalx /= area;163normaly /= area;164normalz /= area;165166// record triangle normals; normal and corner 0 define a plane equation167normals[triangles][0] = normalx;168normals[triangles][1] = normaly;169normals[triangles][2] = normalz;170normals[triangles][3] = -(normalx * p0[0] + normaly * p0[1] + normalz * p0[2]);171triangles++;172}173174meshopt_Bounds bounds = {};175176// degenerate cluster, no valid triangles => trivial reject (cone data is 0)177if (triangles == 0)178return bounds;179180const float rzero = 0.f;181182// compute cluster bounding sphere; we'll use the center to determine normal cone apex as well183float psphere[4] = {};184computeBoundingSphere(psphere, vertex_positions, corner_count, vertex_positions_stride, &rzero, 0, 7, corners);185186float center[3] = {psphere[0], psphere[1], psphere[2]};187188// treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis189float nsphere[4] = {};190computeBoundingSphere(nsphere, normals[0], triangles, sizeof(float) * 4, &rzero, 0, 3);191192float axis[3] = {nsphere[0], nsphere[1], nsphere[2]};193float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);194float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength;195196axis[0] *= invaxislength;197axis[1] *= invaxislength;198axis[2] *= invaxislength;199200// compute a tight cone around all normals, mindp = cos(angle/2)201float mindp = 1.f;202203for (size_t i = 0; i < triangles; ++i)204{205float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2];206207mindp = (dp < mindp) ? dp : mindp;208}209210// fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones211bounds.center[0] = center[0];212bounds.center[1] = center[1];213bounds.center[2] = center[2];214bounds.radius = psphere[3];215216// degenerate cluster, normal cone is larger than a hemisphere => trivial accept217// note that if mindp is positive but close to 0, the triangle intersection code below gets less stable218// we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful219if (mindp <= 0.1f)220{221bounds.cone_cutoff = 1;222bounds.cone_cutoff_s8 = 127;223return bounds;224}225226float maxt = 0;227228// we need to find the point on center-t*axis ray that lies in negative half-space of all triangles229for (size_t i = 0; i < triangles; ++i)230{231// dot(center-t*axis-corner, trinormal) = 0232// dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0233float dc = center[0] * normals[i][0] + center[1] * normals[i][1] + center[2] * normals[i][2] + normals[i][3];234float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2];235236// dn should be larger than mindp cutoff above237assert(dn > 0.f);238float t = dc / dn;239240maxt = (t > maxt) ? t : maxt;241}242243// cone apex should be in the negative half-space of all cluster triangles by construction244bounds.cone_apex[0] = center[0] - axis[0] * maxt;245bounds.cone_apex[1] = center[1] - axis[1] * maxt;246bounds.cone_apex[2] = center[2] - axis[2] * maxt;247248// note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis249bounds.cone_axis[0] = axis[0];250bounds.cone_axis[1] = axis[1];251bounds.cone_axis[2] = axis[2];252253// cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone254// which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a))255bounds.cone_cutoff = sqrtf(1 - mindp * mindp);256257// quantize axis & cutoff to 8-bit SNORM format258bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8));259bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8));260bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8));261262// for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error263float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]);264float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]);265float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]);266267// note that we need to round this up instead of rounding to nearest, hence +1268int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1);269270bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8);271272return bounds;273}274275} // namespace meshopt276277meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)278{279using namespace meshopt;280281assert(index_count % 3 == 0);282assert(index_count / 3 <= kMeshletMaxTriangles);283assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);284assert(vertex_positions_stride % sizeof(float) == 0);285286(void)vertex_count;287288unsigned int cache[512];289memset(cache, -1, sizeof(cache));290291unsigned int corners[kMeshletMaxTriangles * 3 + 1]; // +1 for branchless slot292size_t corner_count = 0;293294for (size_t i = 0; i < index_count; ++i)295{296unsigned int v = indices[i];297assert(v < vertex_count);298299unsigned int& c = cache[v & (sizeof(cache) / sizeof(cache[0]) - 1)];300301// branchless append if vertex isn't in cache302corners[corner_count] = v;303corner_count += (c != v);304c = v;305}306307return computeClusterBounds(indices, index_count, corners, corner_count, vertex_positions, vertex_positions_stride);308}309310meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)311{312using namespace meshopt;313314assert(triangle_count <= kMeshletMaxTriangles);315assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);316assert(vertex_positions_stride % sizeof(float) == 0);317318(void)vertex_count;319320unsigned int indices[kMeshletMaxTriangles * 3];321size_t corner_count = 0;322323for (size_t i = 0; i < triangle_count * 3; ++i)324{325unsigned char t = meshlet_triangles[i];326unsigned int index = meshlet_vertices[t];327assert(index < vertex_count);328329indices[i] = index;330331// meshlet_vertices[] slice should only contain vertices used by triangle indices, which is the case for any well formed meshlet332corner_count = t >= corner_count ? t + 1 : corner_count;333}334335return computeClusterBounds(indices, triangle_count * 3, meshlet_vertices, corner_count, vertex_positions, vertex_positions_stride);336}337338meshopt_Bounds meshopt_computeSphereBounds(const float* positions, size_t count, size_t positions_stride, const float* radii, size_t radii_stride)339{340using namespace meshopt;341342assert(positions_stride >= 12 && positions_stride <= 256);343assert(positions_stride % sizeof(float) == 0);344assert((radii_stride >= 4 && radii_stride <= 256) || radii == NULL);345assert(radii_stride % sizeof(float) == 0);346347meshopt_Bounds bounds = {};348349if (count == 0)350return bounds;351352const float rzero = 0.f;353354float psphere[4] = {};355computeBoundingSphere(psphere, positions, count, positions_stride, radii ? radii : &rzero, radii ? radii_stride : 0, 7);356357bounds.center[0] = psphere[0];358bounds.center[1] = psphere[1];359bounds.center[2] = psphere[2];360bounds.radius = psphere[3];361362return bounds;363}364365void meshopt_optimizeMeshletLevel(unsigned int* meshlet_vertices, size_t vertex_count, unsigned char* meshlet_triangles, size_t triangle_count, int level)366{367using namespace meshopt;368369assert(triangle_count <= kMeshletMaxTriangles);370assert(vertex_count <= kMeshletMaxVertices);371assert(level >= 0 && level <= 9);372373unsigned char* indices = meshlet_triangles;374unsigned int* vertices = meshlet_vertices;375376// cache tracks vertex timestamps (corresponding to triangle index! all 3 vertices are added at the same time and never removed)377unsigned char cache[kMeshletMaxVertices];378memset(cache, 0, vertex_count);379380// note that we start from a value that means all vertices aren't in cache381unsigned char cache_last = 128;382const unsigned char cache_cutoff = 3; // 3 triangles = ~5..9 vertices depending on reuse383384// vertex valence is used to prioritize triangles for level>0385// note: we use 8-bit counters for performance; for outlier vertices the valence is incorrect but that just affects the heuristic386unsigned char valence[kMeshletMaxVertices];387memset(valence, 0, vertex_count);388389for (size_t i = 0; i < triangle_count; ++i)390{391unsigned char a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];392assert(a < vertex_count && b < vertex_count && c < vertex_count);393394valence[a]++;395valence[b]++;396valence[c]++;397}398399for (size_t i = 0; i < triangle_count; ++i)400{401int next = -1;402int next_score = -1;403int edges = 0;404405for (size_t j = i; j < triangle_count; ++j)406{407unsigned char a = indices[j * 3 + 0], b = indices[j * 3 + 1], c = indices[j * 3 + 2];408assert(a < vertex_count && b < vertex_count && c < vertex_count);409410// compute cache distance using unsigned 8-bit subtraction, so cache timestamp overflow is handled gracefully411unsigned char ad = (unsigned char)(cache_last - cache[a]);412unsigned char bd = (unsigned char)(cache_last - cache[b]);413unsigned char cd = (unsigned char)(cache_last - cache[c]);414415int match = (ad < cache_cutoff) + (bd < cache_cutoff) + (cd < cache_cutoff);416417if (level)418{419// prefer low minimum valence420int vmin = valence[a] < valence[b] ? valence[a] : valence[b];421vmin = valence[c] < vmin ? valence[c] : vmin;422423// prefer vertices with smaller cache distance and valence to improve traversal locality424int score = match * 1024 + (1023 - ad - bd - cd);425score = score * 256 + (255 - vmin);426427next = (score > next_score) ? int(j) : next;428next_score = (score > next_score) ? score : next_score;429430// terminate after finding enough edge matches431if (match >= 2 && ++edges >= level)432break;433}434else435{436int score = match;437438next = (score > next_score) ? int(j) : next;439next_score = (score > next_score) ? score : next_score;440441// settle for a first edge match, which makes the function ~linear in practice442if (match >= 2)443break;444}445}446447assert(next >= 0);448449unsigned char a = indices[next * 3 + 0], b = indices[next * 3 + 1], c = indices[next * 3 + 2];450451// shift triangles before the next one forward so that we always keep an ordered partition452// note: this could have swapped triangles [i] and [next] but that distorts the order and may skew the output sequence453memmove(indices + (i + 1) * 3, indices + i * 3, (next - i) * 3 * sizeof(unsigned char));454455indices[i * 3 + 0] = a;456indices[i * 3 + 1] = b;457indices[i * 3 + 2] = c;458459// cache timestamp is the same between all vertices of each triangle to reduce overflow460cache_last++;461cache[a] = cache_last;462cache[b] = cache_last;463cache[c] = cache_last;464465// update vertex valences for scoring heuristic466valence[a]--;467valence[b]--;468valence[c]--;469}470471// rotate triangles to maximize compressibility; only done at level >= 1 for compatibility472if (level >= 1)473{474memset(cache, 0, vertex_count);475476for (size_t i = 0; i < triangle_count; ++i)477{478unsigned char a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];479480// if only the middle vertex has been used, rotate triangle to ensure new vertices are always sequential481if (!cache[a] && cache[b] && !cache[c])482{483// abc -> bca484unsigned char t = a;485a = b, b = c, c = t;486}487else if (!cache[a] && !cache[b] && !cache[c])488{489// out of three edges, the edge ab can not be reused by subsequent triangles in some encodings490// if subsequent triangles don't share edges ca or bc, we can rotate the triangle to fix this491bool needab = false, needbc = false, needca = false;492493for (size_t j = i + 1; j < triangle_count && j <= i + 3; ++j)494{495unsigned char oa = indices[j * 3 + 0], ob = indices[j * 3 + 1], oc = indices[j * 3 + 2];496497// note: edge comparisons are reversed as reused edges are flipped498needab |= (oa == b && ob == a) || (ob == b && oc == a) || (oc == b && oa == a);499needbc |= (oa == c && ob == b) || (ob == c && oc == b) || (oc == c && oa == b);500needca |= (oa == a && ob == c) || (ob == a && oc == c) || (oc == a && oa == c);501}502503if (needab && !needbc)504{505// abc -> bca506unsigned char t = a;507a = b, b = c, c = t;508}509else if (needab && !needca)510{511// abc -> cab512unsigned char t = c;513c = b, b = a, a = t;514}515}516517indices[i * 3 + 0] = a, indices[i * 3 + 1] = b, indices[i * 3 + 2] = c;518519cache[a] = cache[b] = cache[c] = 1;520}521}522523// reorder meshlet vertices for access locality assuming index buffer is scanned sequentially524unsigned int order[kMeshletMaxVertices];525526short remap[kMeshletMaxVertices];527memset(remap, -1, vertex_count * sizeof(short));528529size_t vertex_offset = 0;530531for (size_t i = 0; i < triangle_count * 3; ++i)532{533short& r = remap[indices[i]];534535if (r < 0)536{537r = short(vertex_offset);538order[vertex_offset] = vertices[indices[i]];539vertex_offset++;540}541542indices[i] = (unsigned char)r;543}544545assert(vertex_offset <= vertex_count);546memcpy(vertices, order, vertex_offset * sizeof(unsigned int));547}548549void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count)550{551meshopt_optimizeMeshletLevel(meshlet_vertices, vertex_count, meshlet_triangles, triangle_count, 0);552}553554size_t meshopt_extractMeshletIndices(unsigned int* vertices, unsigned char* triangles, const unsigned int* indices, size_t index_count)555{556using namespace meshopt;557558assert(index_count % 3 == 0);559assert(index_count / 3 <= kMeshletMaxTriangles);560561size_t unique = 0;562563// direct mapped cache for fast lookups based on low index bits; inspired by vk_lod_clusters from NVIDIA564short cache[1024];565memset(cache, -1, sizeof(cache));566567for (size_t i = 0; i < index_count; ++i)568{569unsigned int v = indices[i];570unsigned int key = v & (sizeof(cache) / sizeof(cache[0]) - 1);571short c = cache[key];572573// fast path: vertex has been seen before574if (c >= 0 && vertices[c] == v)575{576triangles[i] = (unsigned char)c;577continue;578}579580// fast path: vertex has never been seen before581if (c < 0)582{583assert(unique < kMeshletMaxVertices);584cache[key] = short(unique);585triangles[i] = (unsigned char)unique;586vertices[unique++] = v;587continue;588}589590// slow path: collision with a different vertex, so we need to look through all vertices591int pos = -1;592for (size_t j = 0; j < unique; ++j)593if (vertices[j] == v)594{595pos = int(j);596break;597}598599if (pos < 0)600{601assert(unique < kMeshletMaxVertices);602pos = int(unique);603vertices[unique++] = v;604}605606cache[key] = short(pos);607triangles[i] = (unsigned char)pos;608}609610assert(unique <= kMeshletMaxVertices);611return unique;612}613614615