Path: blob/master/thirdparty/meshoptimizer/partition.cpp
20811 views
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details1#include "meshoptimizer.h"23#include <assert.h>4#include <math.h>5#include <string.h>67// This work is based on:8// Takio Kurita. An efficient agglomerative clustering algorithm using a heap. 19919namespace meshopt10{1112// To avoid excessive recursion for malformed inputs, we switch to bisection after some depth13const int kMergeDepthCutoff = 40;1415struct ClusterAdjacency16{17unsigned int* offsets;18unsigned int* clusters;19unsigned int* shared;20};2122static void filterClusterIndices(unsigned int* data, unsigned int* offsets, const unsigned int* cluster_indices, const unsigned int* cluster_index_counts, size_t cluster_count, unsigned char* used, size_t vertex_count, size_t total_index_count)23{24(void)vertex_count;25(void)total_index_count;2627size_t cluster_start = 0;28size_t cluster_write = 0;2930for (size_t i = 0; i < cluster_count; ++i)31{32offsets[i] = unsigned(cluster_write);3334// copy cluster indices, skipping duplicates35for (size_t j = 0; j < cluster_index_counts[i]; ++j)36{37unsigned int v = cluster_indices[cluster_start + j];38assert(v < vertex_count);3940data[cluster_write] = v;41cluster_write += 1 - used[v];42used[v] = 1;43}4445// reset used flags for the next cluster46for (size_t j = offsets[i]; j < cluster_write; ++j)47used[data[j]] = 0;4849cluster_start += cluster_index_counts[i];50}5152assert(cluster_start == total_index_count);53assert(cluster_write <= total_index_count);54offsets[cluster_count] = unsigned(cluster_write);55}5657static float computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, float* out_center)58{59size_t vertex_stride_float = vertex_positions_stride / sizeof(float);6061float center[3] = {0, 0, 0};6263// approximate center of the cluster by averaging all vertex positions64for (size_t j = 0; j < index_count; ++j)65{66const float* p = vertex_positions + indices[j] * vertex_stride_float;6768center[0] += p[0];69center[1] += p[1];70center[2] += p[2];71}7273// note: technically clusters can't be empty per meshopt_partitionCluster but we check for a division by zero in case that changes74if (index_count)75{76center[0] /= float(index_count);77center[1] /= float(index_count);78center[2] /= float(index_count);79}8081// compute radius of the bounding sphere for each cluster82float radiussq = 0;8384for (size_t j = 0; j < index_count; ++j)85{86const float* p = vertex_positions + indices[j] * vertex_stride_float;8788float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);8990radiussq = radiussq < d2 ? d2 : radiussq;91}9293memcpy(out_center, center, sizeof(center));94return sqrtf(radiussq);95}9697static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, size_t vertex_count, meshopt_Allocator& allocator)98{99unsigned int* ref_offsets = allocator.allocate<unsigned int>(vertex_count + 1);100101// compute number of clusters referenced by each vertex102memset(ref_offsets, 0, vertex_count * sizeof(unsigned int));103104for (size_t i = 0; i < cluster_count; ++i)105{106for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)107ref_offsets[cluster_indices[j]]++;108}109110// compute (worst-case) number of adjacent clusters for each cluster111size_t total_adjacency = 0;112113for (size_t i = 0; i < cluster_count; ++i)114{115size_t count = 0;116117// worst case is every vertex has a disjoint cluster list118for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)119count += ref_offsets[cluster_indices[j]] - 1;120121// ... but only every other cluster can be adjacent in the end122total_adjacency += count < cluster_count - 1 ? count : cluster_count - 1;123}124125// we can now allocate adjacency buffers126adjacency.offsets = allocator.allocate<unsigned int>(cluster_count + 1);127adjacency.clusters = allocator.allocate<unsigned int>(total_adjacency);128adjacency.shared = allocator.allocate<unsigned int>(total_adjacency);129130// convert ref counts to offsets131size_t total_refs = 0;132133for (size_t i = 0; i < vertex_count; ++i)134{135size_t count = ref_offsets[i];136ref_offsets[i] = unsigned(total_refs);137total_refs += count;138}139140unsigned int* ref_data = allocator.allocate<unsigned int>(total_refs);141142// fill cluster refs for each vertex143for (size_t i = 0; i < cluster_count; ++i)144{145for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)146ref_data[ref_offsets[cluster_indices[j]]++] = unsigned(i);147}148149// after the previous pass, ref_offsets contain the end of the data for each vertex; shift it forward to get the start150memmove(ref_offsets + 1, ref_offsets, vertex_count * sizeof(unsigned int));151ref_offsets[0] = 0;152153// fill cluster adjacency for each cluster...154adjacency.offsets[0] = 0;155156for (size_t i = 0; i < cluster_count; ++i)157{158unsigned int* adj = adjacency.clusters + adjacency.offsets[i];159unsigned int* shd = adjacency.shared + adjacency.offsets[i];160size_t count = 0;161162for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)163{164unsigned int v = cluster_indices[j];165166// merge the entire cluster list of each vertex into current list167for (size_t k = ref_offsets[v]; k < ref_offsets[v + 1]; ++k)168{169unsigned int c = ref_data[k];170assert(c < cluster_count);171172if (c == unsigned(i))173continue;174175// if the cluster is already in the list, increment the shared count176bool found = false;177for (size_t l = 0; l < count; ++l)178if (adj[l] == c)179{180found = true;181shd[l]++;182break;183}184185// .. or append a new cluster186if (!found)187{188adj[count] = c;189shd[count] = 1;190count++;191}192}193}194195// mark the end of the adjacency list; the next cluster will start there as well196adjacency.offsets[i + 1] = adjacency.offsets[i] + unsigned(count);197}198199assert(adjacency.offsets[cluster_count] <= total_adjacency);200201// ref_offsets can't be deallocated as it was allocated before adjacency202allocator.deallocate(ref_data);203}204205struct ClusterGroup206{207int group;208int next;209unsigned int size; // 0 unless root210unsigned int vertices;211212float center[3];213float radius;214};215216struct GroupOrder217{218unsigned int id;219int order;220};221222static void heapPush(GroupOrder* heap, size_t size, GroupOrder item)223{224// insert a new element at the end (breaks heap invariant)225heap[size++] = item;226227// bubble up the new element to its correct position228size_t i = size - 1;229while (i > 0 && heap[i].order < heap[(i - 1) / 2].order)230{231size_t p = (i - 1) / 2;232233GroupOrder temp = heap[i];234heap[i] = heap[p];235heap[p] = temp;236i = p;237}238}239240static GroupOrder heapPop(GroupOrder* heap, size_t size)241{242assert(size > 0);243GroupOrder top = heap[0];244245// move the last element to the top (breaks heap invariant)246heap[0] = heap[--size];247248// bubble down the new top element to its correct position249size_t i = 0;250while (i * 2 + 1 < size)251{252// find the smallest child253size_t j = i * 2 + 1;254j += (j + 1 < size && heap[j + 1].order < heap[j].order);255256// if the parent is already smaller than both children, we're done257if (heap[j].order >= heap[i].order)258break;259260// otherwise, swap the parent and child and continue261GroupOrder temp = heap[i];262heap[i] = heap[j];263heap[j] = temp;264i = j;265}266267return top;268}269270static unsigned int countShared(const ClusterGroup* groups, int group1, int group2, const ClusterAdjacency& adjacency)271{272unsigned int total = 0;273274for (int i1 = group1; i1 >= 0; i1 = groups[i1].next)275for (int i2 = group2; i2 >= 0; i2 = groups[i2].next)276{277for (unsigned int adj = adjacency.offsets[i1]; adj < adjacency.offsets[i1 + 1]; ++adj)278if (adjacency.clusters[adj] == unsigned(i2))279{280total += adjacency.shared[adj];281break;282}283}284285return total;286}287288static void mergeBounds(ClusterGroup& target, const ClusterGroup& source)289{290float r1 = target.radius, r2 = source.radius;291float dx = source.center[0] - target.center[0], dy = source.center[1] - target.center[1], dz = source.center[2] - target.center[2];292float d = sqrtf(dx * dx + dy * dy + dz * dz);293294if (d + r1 < r2)295{296target.center[0] = source.center[0];297target.center[1] = source.center[1];298target.center[2] = source.center[2];299target.radius = source.radius;300return;301}302303if (d + r2 > r1)304{305float k = d > 0 ? (d + r2 - r1) / (2 * d) : 0.f;306307target.center[0] += dx * k;308target.center[1] += dy * k;309target.center[2] += dz * k;310target.radius = (d + r2 + r1) / 2;311}312}313314static float boundsScore(const ClusterGroup& target, const ClusterGroup& source)315{316float r1 = target.radius, r2 = source.radius;317float dx = source.center[0] - target.center[0], dy = source.center[1] - target.center[1], dz = source.center[2] - target.center[2];318float d = sqrtf(dx * dx + dy * dy + dz * dz);319320float mr = d + r1 < r2 ? r2 : (d + r2 < r1 ? r1 : (d + r2 + r1) / 2);321322return mr > 0 ? r1 / mr : 0.f;323}324325static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdjacency& adjacency, size_t max_partition_size, bool use_bounds)326{327assert(groups[id].size > 0);328329float group_rsqrt = 1.f / sqrtf(float(int(groups[id].vertices)));330331int best_group = -1;332float best_score = 0;333334for (int ci = id; ci >= 0; ci = groups[ci].next)335{336for (unsigned int adj = adjacency.offsets[ci]; adj != adjacency.offsets[ci + 1]; ++adj)337{338int other = groups[adjacency.clusters[adj]].group;339if (other < 0)340continue;341342assert(groups[other].size > 0);343if (groups[id].size + groups[other].size > max_partition_size)344continue;345346unsigned int shared = countShared(groups, id, other, adjacency);347float other_rsqrt = 1.f / sqrtf(float(int(groups[other].vertices)));348349// normalize shared count by the expected boundary of each group (+ keeps scoring symmetric)350float score = float(int(shared)) * (group_rsqrt + other_rsqrt);351352// incorporate spatial score to favor merging nearby groups353if (use_bounds)354score *= 1.f + 0.4f * boundsScore(groups[id], groups[other]);355356if (score > best_score)357{358best_group = other;359best_score = score;360}361}362}363364return best_group;365}366367static void mergeLeaf(ClusterGroup* groups, unsigned int* order, size_t count, size_t target_partition_size, size_t max_partition_size)368{369for (size_t i = 0; i < count; ++i)370{371unsigned int id = order[i];372if (groups[id].size == 0 || groups[id].size >= target_partition_size)373continue;374375float best_score = -1.f;376int best_group = -1;377378for (size_t j = 0; j < count; ++j)379{380unsigned int other = order[j];381if (id == other || groups[other].size == 0)382continue;383384if (groups[id].size + groups[other].size > max_partition_size)385continue;386387// favor merging nearby groups388float score = boundsScore(groups[id], groups[other]);389390if (score > best_score)391{392best_score = score;393best_group = other;394}395}396397// merge id *into* best_group; that way, we may merge more groups into the same best_group, maximizing the chance of reaching target398if (best_group != -1)399{400// combine groups by linking them together401unsigned int tail = best_group;402while (groups[tail].next >= 0)403tail = groups[tail].next;404405groups[tail].next = id;406407// update group sizes; note, we omit vertices update for simplicity as it's not used for spatial merge408groups[best_group].size += groups[id].size;409groups[id].size = 0;410411// merge bounding spheres412mergeBounds(groups[best_group], groups[id]);413groups[id].radius = 0.f;414}415}416}417418static size_t mergePartition(unsigned int* order, size_t count, const ClusterGroup* groups, int axis, float pivot)419{420size_t m = 0;421422// invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot423for (size_t i = 0; i < count; ++i)424{425float v = groups[order[i]].center[axis];426427// swap(m, i) unconditionally428unsigned int t = order[m];429order[m] = order[i];430order[i] = t;431432// when v >= pivot, we swap i with m without advancing it, preserving invariants433m += v < pivot;434}435436return m;437}438439static void mergeSpatial(ClusterGroup* groups, unsigned int* order, size_t count, size_t target_partition_size, size_t max_partition_size, size_t leaf_size, int depth)440{441size_t total = 0;442for (size_t i = 0; i < count; ++i)443total += groups[order[i]].size;444445if (total <= max_partition_size || count <= leaf_size)446return mergeLeaf(groups, order, count, target_partition_size, max_partition_size);447448float mean[3] = {};449float vars[3] = {};450float runc = 1, runs = 1;451452// gather statistics on the points in the subtree using Welford's algorithm453for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)454{455const float* point = groups[order[i]].center;456457for (int k = 0; k < 3; ++k)458{459float delta = point[k] - mean[k];460mean[k] += delta * runs;461vars[k] += delta * (point[k] - mean[k]);462}463}464465// split axis is one where the variance is largest466int axis = (vars[0] >= vars[1] && vars[0] >= vars[2]) ? 0 : (vars[1] >= vars[2] ? 1 : 2);467468float split = mean[axis];469size_t middle = mergePartition(order, count, groups, axis, split);470471// enforce balance for degenerate partitions472// this also ensures recursion depth is bounded on pathological inputs473if (middle <= leaf_size / 2 || count - middle <= leaf_size / 2 || depth >= kMergeDepthCutoff)474middle = count / 2;475476// recursion depth is logarithmic and bounded due to max depth check above477mergeSpatial(groups, order, middle, target_partition_size, max_partition_size, leaf_size, depth + 1);478mergeSpatial(groups, order + middle, count - middle, target_partition_size, max_partition_size, leaf_size, depth + 1);479}480481} // namespace meshopt482483size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size)484{485using namespace meshopt;486487assert((vertex_positions == NULL || vertex_positions_stride >= 12) && vertex_positions_stride <= 256);488assert(vertex_positions_stride % sizeof(float) == 0);489assert(target_partition_size > 0);490491size_t max_partition_size = target_partition_size + target_partition_size / 3;492493meshopt_Allocator allocator;494495unsigned char* used = allocator.allocate<unsigned char>(vertex_count);496memset(used, 0, vertex_count);497498unsigned int* cluster_newindices = allocator.allocate<unsigned int>(total_index_count);499unsigned int* cluster_offsets = allocator.allocate<unsigned int>(cluster_count + 1);500501// make new cluster index list that filters out duplicate indices502filterClusterIndices(cluster_newindices, cluster_offsets, cluster_indices, cluster_index_counts, cluster_count, used, vertex_count, total_index_count);503cluster_indices = cluster_newindices;504505// build cluster adjacency along with edge weights (shared vertex count)506ClusterAdjacency adjacency = {};507buildClusterAdjacency(adjacency, cluster_indices, cluster_offsets, cluster_count, vertex_count, allocator);508509ClusterGroup* groups = allocator.allocate<ClusterGroup>(cluster_count);510memset(groups, 0, sizeof(ClusterGroup) * cluster_count);511512GroupOrder* order = allocator.allocate<GroupOrder>(cluster_count);513size_t pending = 0;514515// create a singleton group for each cluster and order them by priority516for (size_t i = 0; i < cluster_count; ++i)517{518groups[i].group = int(i);519groups[i].next = -1;520groups[i].size = 1;521groups[i].vertices = cluster_offsets[i + 1] - cluster_offsets[i];522assert(groups[i].vertices > 0);523524// compute bounding sphere for each cluster if positions are provided525if (vertex_positions)526groups[i].radius = computeClusterBounds(cluster_indices + cluster_offsets[i], cluster_offsets[i + 1] - cluster_offsets[i], vertex_positions, vertex_positions_stride, groups[i].center);527528GroupOrder item = {};529item.id = unsigned(i);530item.order = groups[i].vertices;531532heapPush(order, pending++, item);533}534535// iteratively merge the smallest group with the best group536while (pending)537{538GroupOrder top = heapPop(order, pending--);539540// this group was merged into another group earlier541if (groups[top.id].size == 0)542continue;543544// disassociate clusters from the group to prevent them from being merged again; we will re-associate them if the group is reinserted545for (int i = top.id; i >= 0; i = groups[i].next)546{547assert(groups[i].group == int(top.id));548groups[i].group = -1;549}550551// the group is large enough, emit as is552if (groups[top.id].size >= target_partition_size)553continue;554555int best_group = pickGroupToMerge(groups, top.id, adjacency, max_partition_size, /* use_bounds= */ vertex_positions);556557// we can't grow the group any more, emit as is558if (best_group == -1)559continue;560561// compute shared vertices to adjust the total vertices estimate after merging562unsigned int shared = countShared(groups, top.id, best_group, adjacency);563564// combine groups by linking them together565unsigned int tail = top.id;566while (groups[tail].next >= 0)567tail = groups[tail].next;568569groups[tail].next = best_group;570571// update group sizes; note, the vertex update is a O(1) approximation which avoids recomputing the true size572groups[top.id].size += groups[best_group].size;573groups[top.id].vertices += groups[best_group].vertices;574groups[top.id].vertices = (groups[top.id].vertices > shared) ? groups[top.id].vertices - shared : 1;575576groups[best_group].size = 0;577groups[best_group].vertices = 0;578579// merge bounding spheres if bounds are available580if (vertex_positions)581{582mergeBounds(groups[top.id], groups[best_group]);583groups[best_group].radius = 0;584}585586// re-associate all clusters back to the merged group587for (int i = top.id; i >= 0; i = groups[i].next)588groups[i].group = int(top.id);589590top.order = groups[top.id].vertices;591heapPush(order, pending++, top);592}593594// if vertex positions are provided, we do a final pass to see if we can merge small groups based on spatial locality alone595if (vertex_positions)596{597unsigned int* merge_order = reinterpret_cast<unsigned int*>(order);598size_t merge_offset = 0;599600for (size_t i = 0; i < cluster_count; ++i)601if (groups[i].size)602merge_order[merge_offset++] = unsigned(i);603604mergeSpatial(groups, merge_order, merge_offset, target_partition_size, max_partition_size, /* leaf_size= */ 8, 0);605}606607// output each remaining group608size_t next_group = 0;609610for (size_t i = 0; i < cluster_count; ++i)611{612if (groups[i].size == 0)613continue;614615for (int j = int(i); j >= 0; j = groups[j].next)616destination[j] = unsigned(next_group);617618next_group++;619}620621assert(next_group <= cluster_count);622return next_group;623}624625626