CoCalc -- simplifier.cpp

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/meshoptimizer/simplifier.cpp
⁹⁸⁹⁶ views
1
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
2
#include "meshoptimizer.h"
3

4
#include <assert.h>
5
#include <float.h>
6
#include <math.h>
7
#include <string.h>
8

9
#ifndef TRACE
10
#define TRACE 0
11
#endif
12

13
#if TRACE
14
#include <stdio.h>
15
#endif
16

17
#if TRACE
18
#define TRACESTATS(i) stats[i]++;
19
#else
20
#define TRACESTATS(i) (void)0
21
#endif
22

23
// This work is based on:
24
// Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
25
// Michael Garland. Quadric-based polygonal surface simplification. 1999
26
// Peter Lindstrom. Out-of-Core Simplification of Large Polygonal Models. 2000
27
// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003
28
// Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019
29
// Hugues Hoppe. New Quadric Metric for Simplifying Meshes with Appearance Attributes. 1999
30
namespace meshopt
31
{
32

33
struct EdgeAdjacency
34
{
35
	struct Edge
36
	{
37
		unsigned int next;
38
		unsigned int prev;
39
	};
40

41
	unsigned int* offsets;
42
	Edge* data;
43
};
44

45
static void prepareEdgeAdjacency(EdgeAdjacency& adjacency, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
46
{
47
	adjacency.offsets = allocator.allocate<unsigned int>(vertex_count + 1);
48
	adjacency.data = allocator.allocate<EdgeAdjacency::Edge>(index_count);
49
}
50

51
static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* remap)
52
{
53
	size_t face_count = index_count / 3;
54
	unsigned int* offsets = adjacency.offsets + 1;
55
	EdgeAdjacency::Edge* data = adjacency.data;
56

57
	// fill edge counts
58
	memset(offsets, 0, vertex_count * sizeof(unsigned int));
59

60
	for (size_t i = 0; i < index_count; ++i)
61
	{
62
		unsigned int v = remap ? remap[indices[i]] : indices[i];
63
		assert(v < vertex_count);
64

65
		offsets[v]++;
66
	}
67

68
	// fill offset table
69
	unsigned int offset = 0;
70

71
	for (size_t i = 0; i < vertex_count; ++i)
72
	{
73
		unsigned int count = offsets[i];
74
		offsets[i] = offset;
75
		offset += count;
76
	}
77

78
	assert(offset == index_count);
79

80
	// fill edge data
81
	for (size_t i = 0; i < face_count; ++i)
82
	{
83
		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
84

85
		if (remap)
86
		{
87
			a = remap[a];
88
			b = remap[b];
89
			c = remap[c];
90
		}
91

92
		data[offsets[a]].next = b;
93
		data[offsets[a]].prev = c;
94
		offsets[a]++;
95

96
		data[offsets[b]].next = c;
97
		data[offsets[b]].prev = a;
98
		offsets[b]++;
99

100
		data[offsets[c]].next = a;
101
		data[offsets[c]].prev = b;
102
		offsets[c]++;
103
	}
104

105
	// finalize offsets
106
	adjacency.offsets[0] = 0;
107
	assert(adjacency.offsets[vertex_count] == index_count);
108
}
109

110
struct PositionHasher
111
{
112
	const float* vertex_positions;
113
	size_t vertex_stride_float;
114
	const unsigned int* sparse_remap;
115

116
	size_t hash(unsigned int index) const
117
	{
118
		unsigned int ri = sparse_remap ? sparse_remap[index] : index;
119
		const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + ri * vertex_stride_float);
120

121
		unsigned int x = key[0], y = key[1], z = key[2];
122

123
		// replace negative zero with zero
124
		x = (x == 0x80000000) ? 0 : x;
125
		y = (y == 0x80000000) ? 0 : y;
126
		z = (z == 0x80000000) ? 0 : z;
127

128
		// scramble bits to make sure that integer coordinates have entropy in lower bits
129
		x ^= x >> 17;
130
		y ^= y >> 17;
131
		z ^= z >> 17;
132

133
		// Optimized Spatial Hashing for Collision Detection of Deformable Objects
134
		return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
135
	}
136

137
	bool equal(unsigned int lhs, unsigned int rhs) const
138
	{
139
		unsigned int li = sparse_remap ? sparse_remap[lhs] : lhs;
140
		unsigned int ri = sparse_remap ? sparse_remap[rhs] : rhs;
141

142
		const float* lv = vertex_positions + li * vertex_stride_float;
143
		const float* rv = vertex_positions + ri * vertex_stride_float;
144

145
		return lv[0] == rv[0] && lv[1] == rv[1] && lv[2] == rv[2];
146
	}
147
};
148

149
struct RemapHasher
150
{
151
	unsigned int* remap;
152

153
	size_t hash(unsigned int id) const
154
	{
155
		return id * 0x5bd1e995;
156
	}
157

158
	bool equal(unsigned int lhs, unsigned int rhs) const
159
	{
160
		return remap[lhs] == rhs;
161
	}
162
};
163

164
static size_t hashBuckets2(size_t count)
165
{
166
	size_t buckets = 1;
167
	while (buckets < count + count / 4)
168
		buckets *= 2;
169

170
	return buckets;
171
}
172

173
template <typename T, typename Hash>
174
static T* hashLookup2(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
175
{
176
	assert(buckets > 0);
177
	assert((buckets & (buckets - 1)) == 0);
178

179
	size_t hashmod = buckets - 1;
180
	size_t bucket = hash.hash(key) & hashmod;
181

182
	for (size_t probe = 0; probe <= hashmod; ++probe)
183
	{
184
		T& item = table[bucket];
185

186
		if (item == empty)
187
			return &item;
188

189
		if (hash.equal(item, key))
190
			return &item;
191

192
		// hash collision, quadratic probing
193
		bucket = (bucket + probe + 1) & hashmod;
194
	}
195

196
	assert(false && "Hash table is full"); // unreachable
197
	return NULL;
198
}
199

200
static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* sparse_remap, meshopt_Allocator& allocator)
201
{
202
	PositionHasher hasher = {vertex_positions_data, vertex_positions_stride / sizeof(float), sparse_remap};
203

204
	size_t table_size = hashBuckets2(vertex_count);
205
	unsigned int* table = allocator.allocate<unsigned int>(table_size);
206
	memset(table, -1, table_size * sizeof(unsigned int));
207

208
	// build forward remap: for each vertex, which other (canonical) vertex does it map to?
209
	// we use position equivalence for this, and remap vertices to other existing vertices
210
	for (size_t i = 0; i < vertex_count; ++i)
211
	{
212
		unsigned int index = unsigned(i);
213
		unsigned int* entry = hashLookup2(table, table_size, hasher, index, ~0u);
214

215
		if (*entry == ~0u)
216
			*entry = index;
217

218
		remap[index] = *entry;
219
	}
220

221
	allocator.deallocate(table);
222

223
	if (!wedge)
224
		return;
225

226
	// build wedge table: for each vertex, which other vertex is the next wedge that also maps to the same vertex?
227
	// entries in table form a (cyclic) wedge loop per vertex; for manifold vertices, wedge[i] == remap[i] == i
228
	for (size_t i = 0; i < vertex_count; ++i)
229
		wedge[i] = unsigned(i);
230

231
	for (size_t i = 0; i < vertex_count; ++i)
232
		if (remap[i] != i)
233
		{
234
			unsigned int r = remap[i];
235

236
			wedge[i] = wedge[r];
237
			wedge[r] = unsigned(i);
238
		}
239
}
240

241
static unsigned int* buildSparseRemap(unsigned int* indices, size_t index_count, size_t vertex_count, size_t* out_vertex_count, meshopt_Allocator& allocator)
242
{
243
	// use a bit set to compute the precise number of unique vertices
244
	unsigned char* filter = allocator.allocate<unsigned char>((vertex_count + 7) / 8);
245
	memset(filter, 0, (vertex_count + 7) / 8);
246

247
	size_t unique = 0;
248
	for (size_t i = 0; i < index_count; ++i)
249
	{
250
		unsigned int index = indices[i];
251
		assert(index < vertex_count);
252

253
		unique += (filter[index / 8] & (1 << (index % 8))) == 0;
254
		filter[index / 8] |= 1 << (index % 8);
255
	}
256

257
	unsigned int* remap = allocator.allocate<unsigned int>(unique);
258
	size_t offset = 0;
259

260
	// temporary map dense => sparse; we allocate it last so that we can deallocate it
261
	size_t revremap_size = hashBuckets2(unique);
262
	unsigned int* revremap = allocator.allocate<unsigned int>(revremap_size);
263
	memset(revremap, -1, revremap_size * sizeof(unsigned int));
264

265
	// fill remap, using revremap as a helper, and rewrite indices in the same pass
266
	RemapHasher hasher = {remap};
267

268
	for (size_t i = 0; i < index_count; ++i)
269
	{
270
		unsigned int index = indices[i];
271

272
		unsigned int* entry = hashLookup2(revremap, revremap_size, hasher, index, ~0u);
273

274
		if (*entry == ~0u)
275
		{
276
			remap[offset] = index;
277
			*entry = unsigned(offset);
278
			offset++;
279
		}
280

281
		indices[i] = *entry;
282
	}
283

284
	allocator.deallocate(revremap);
285

286
	assert(offset == unique);
287
	*out_vertex_count = unique;
288

289
	return remap;
290
}
291

292
enum VertexKind
293
{
294
	Kind_Manifold, // not on an attribute seam, not on any boundary
295
	Kind_Border,   // not on an attribute seam, has exactly two open edges
296
	Kind_Seam,     // on an attribute seam with exactly two attribute seam edges
297
	Kind_Complex,  // none of the above; these vertices can move as long as all wedges move to the target vertex
298
	Kind_Locked,   // none of the above; these vertices can't move
299

300
	Kind_Count
301
};
302

303
// manifold vertices can collapse onto anything
304
// border/seam vertices can collapse onto border/seam respectively, or locked
305
// complex vertices can collapse onto complex/locked
306
// a rule of thumb is that collapsing kind A into kind B preserves the kind B in the target vertex
307
// for example, while we could collapse Complex into Manifold, this would mean the target vertex isn't Manifold anymore
308
const unsigned char kCanCollapse[Kind_Count][Kind_Count] = {
309
    {1, 1, 1, 1, 1},
310
    {0, 1, 0, 0, 1},
311
    {0, 0, 1, 0, 1},
312
    {0, 0, 0, 1, 1},
313
    {0, 0, 0, 0, 0},
314
};
315

316
// if a vertex is manifold or seam, adjoining edges are guaranteed to have an opposite edge
317
// note that for seam edges, the opposite edge isn't present in the attribute-based topology
318
// but is present if you consider a position-only mesh variant
319
const unsigned char kHasOpposite[Kind_Count][Kind_Count] = {
320
    {1, 1, 1, 0, 1},
321
    {1, 0, 1, 0, 0},
322
    {1, 1, 1, 0, 1},
323
    {0, 0, 0, 0, 0},
324
    {1, 0, 1, 0, 0},
325
};
326

327
static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b)
328
{
329
	unsigned int count = adjacency.offsets[a + 1] - adjacency.offsets[a];
330
	const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[a];
331

332
	for (size_t i = 0; i < count; ++i)
333
		if (edges[i].next == b)
334
			return true;
335

336
	return false;
337
}
338

339
static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_lock, const unsigned int* sparse_remap, unsigned int options)
340
{
341
	memset(loop, -1, vertex_count * sizeof(unsigned int));
342
	memset(loopback, -1, vertex_count * sizeof(unsigned int));
343

344
	// incoming & outgoing open edges: ~0u if no open edges, i if there are more than 1
345
	// note that this is the same data as required in loop[] arrays; loop[] data is only valid for border/seam
346
	// but here it's okay to fill the data out for other types of vertices as well
347
	unsigned int* openinc = loopback;
348
	unsigned int* openout = loop;
349

350
	for (size_t i = 0; i < vertex_count; ++i)
351
	{
352
		unsigned int vertex = unsigned(i);
353

354
		unsigned int count = adjacency.offsets[vertex + 1] - adjacency.offsets[vertex];
355
		const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[vertex];
356

357
		for (size_t j = 0; j < count; ++j)
358
		{
359
			unsigned int target = edges[j].next;
360

361
			if (target == vertex)
362
			{
363
				// degenerate triangles have two distinct edges instead of three, and the self edge
364
				// is bi-directional by definition; this can break border/seam classification by "closing"
365
				// the open edge from another triangle and falsely marking the vertex as manifold
366
				// instead we mark the vertex as having >1 open edges which turns it into locked/complex
367
				openinc[vertex] = openout[vertex] = vertex;
368
			}
369
			else if (!hasEdge(adjacency, target, vertex))
370
			{
371
				openinc[target] = (openinc[target] == ~0u) ? vertex : target;
372
				openout[vertex] = (openout[vertex] == ~0u) ? target : vertex;
373
			}
374
		}
375
	}
376

377
#if TRACE
378
	size_t stats[4] = {};
379
#endif
380

381
	for (size_t i = 0; i < vertex_count; ++i)
382
	{
383
		if (remap[i] == i)
384
		{
385
			if (wedge[i] == i)
386
			{
387
				// no attribute seam, need to check if it's manifold
388
				unsigned int openi = openinc[i], openo = openout[i];
389

390
				// note: we classify any vertices with no open edges as manifold
391
				// this is technically incorrect - if 4 triangles share an edge, we'll classify vertices as manifold
392
				// it's unclear if this is a problem in practice
393
				if (openi == ~0u && openo == ~0u)
394
				{
395
					result[i] = Kind_Manifold;
396
				}
397
				else if (openi != i && openo != i)
398
				{
399
					result[i] = Kind_Border;
400
				}
401
				else
402
				{
403
					result[i] = Kind_Locked;
404
					TRACESTATS(0);
405
				}
406
			}
407
			else if (wedge[wedge[i]] == i)
408
			{
409
				// attribute seam; need to distinguish between Seam and Locked
410
				unsigned int w = wedge[i];
411
				unsigned int openiv = openinc[i], openov = openout[i];
412
				unsigned int openiw = openinc[w], openow = openout[w];
413

414
				// seam should have one open half-edge for each vertex, and the edges need to "connect" - point to the same vertex post-remap
415
				if (openiv != ~0u && openiv != i && openov != ~0u && openov != i &&
416
				    openiw != ~0u && openiw != w && openow != ~0u && openow != w)
417
				{
418
					if (remap[openiv] == remap[openow] && remap[openov] == remap[openiw] && remap[openiv] != remap[openov])
419
					{
420
						result[i] = Kind_Seam;
421
					}
422
					else
423
					{
424
						result[i] = Kind_Locked;
425
						TRACESTATS(1);
426
					}
427
				}
428
				else
429
				{
430
					result[i] = Kind_Locked;
431
					TRACESTATS(2);
432
				}
433
			}
434
			else
435
			{
436
				// more than one vertex maps to this one; we don't have classification available
437
				result[i] = Kind_Locked;
438
				TRACESTATS(3);
439
			}
440
		}
441
		else
442
		{
443
			assert(remap[i] < i);
444

445
			result[i] = result[remap[i]];
446
		}
447
	}
448

449
	if (vertex_lock)
450
	{
451
		// vertex_lock may lock any wedge, not just the primary vertex, so we need to lock the primary vertex and relock any wedges
452
		for (size_t i = 0; i < vertex_count; ++i)
453
		{
454
			unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i);
455
			assert(vertex_lock[ri] <= 1); // values other than 0/1 are reserved for future use
456

457
			if (vertex_lock[ri])
458
				result[remap[i]] = Kind_Locked;
459
		}
460

461
		for (size_t i = 0; i < vertex_count; ++i)
462
			if (result[remap[i]] == Kind_Locked)
463
				result[i] = Kind_Locked;
464
	}
465

466
	if (options & meshopt_SimplifyLockBorder)
467
		for (size_t i = 0; i < vertex_count; ++i)
468
			if (result[i] == Kind_Border)
469
				result[i] = Kind_Locked;
470

471
#if TRACE
472
	printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n",
473
	    int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3]));
474
#endif
475
}
476

477
struct Vector3
478
{
479
	float x, y, z;
480
};
481

482
static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* sparse_remap = NULL)
483
{
484
	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
485

486
	float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
487
	float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
488

489
	for (size_t i = 0; i < vertex_count; ++i)
490
	{
491
		unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i);
492
		const float* v = vertex_positions_data + ri * vertex_stride_float;
493

494
		if (result)
495
		{
496
			result[i].x = v[0];
497
			result[i].y = v[1];
498
			result[i].z = v[2];
499
		}
500

501
		for (int j = 0; j < 3; ++j)
502
		{
503
			float vj = v[j];
504

505
			minv[j] = minv[j] > vj ? vj : minv[j];
506
			maxv[j] = maxv[j] < vj ? vj : maxv[j];
507
		}
508
	}
509

510
	float extent = 0.f;
511

512
	extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
513
	extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
514
	extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
515

516
	if (result)
517
	{
518
		float scale = extent == 0 ? 0.f : 1.f / extent;
519

520
		for (size_t i = 0; i < vertex_count; ++i)
521
		{
522
			result[i].x = (result[i].x - minv[0]) * scale;
523
			result[i].y = (result[i].y - minv[1]) * scale;
524
			result[i].z = (result[i].z - minv[2]) * scale;
525
		}
526
	}
527

528
	return extent;
529
}
530

531
static void rescaleAttributes(float* result, const float* vertex_attributes_data, size_t vertex_count, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned int* attribute_remap, const unsigned int* sparse_remap)
532
{
533
	size_t vertex_attributes_stride_float = vertex_attributes_stride / sizeof(float);
534

535
	for (size_t i = 0; i < vertex_count; ++i)
536
	{
537
		unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i);
538

539
		for (size_t k = 0; k < attribute_count; ++k)
540
		{
541
			unsigned int rk = attribute_remap[k];
542
			float a = vertex_attributes_data[ri * vertex_attributes_stride_float + rk];
543

544
			result[i * attribute_count + k] = a * attribute_weights[rk];
545
		}
546
	}
547
}
548

549
static const size_t kMaxAttributes = 32;
550

551
struct Quadric
552
{
553
	// a00*x^2 + a11*y^2 + a22*z^2 + 2*(a10*xy + a20*xz + a21*yz) + b0*x + b1*y + b2*z + c
554
	float a00, a11, a22;
555
	float a10, a20, a21;
556
	float b0, b1, b2, c;
557
	float w;
558
};
559

560
struct QuadricGrad
561
{
562
	// gx*x + gy*y + gz*z + gw
563
	float gx, gy, gz, gw;
564
};
565

566
struct Reservoir
567
{
568
	float x, y, z;
569
	float r, g, b;
570
	float w;
571
};
572

573
struct Collapse
574
{
575
	unsigned int v0;
576
	unsigned int v1;
577

578
	union
579
	{
580
		unsigned int bidi;
581
		float error;
582
		unsigned int errorui;
583
	};
584
};
585

586
static float normalize(Vector3& v)
587
{
588
	float length = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
589

590
	if (length > 0)
591
	{
592
		v.x /= length;
593
		v.y /= length;
594
		v.z /= length;
595
	}
596

597
	return length;
598
}
599

600
static void quadricAdd(Quadric& Q, const Quadric& R)
601
{
602
	Q.a00 += R.a00;
603
	Q.a11 += R.a11;
604
	Q.a22 += R.a22;
605
	Q.a10 += R.a10;
606
	Q.a20 += R.a20;
607
	Q.a21 += R.a21;
608
	Q.b0 += R.b0;
609
	Q.b1 += R.b1;
610
	Q.b2 += R.b2;
611
	Q.c += R.c;
612
	Q.w += R.w;
613
}
614

615
static void quadricAdd(QuadricGrad* G, const QuadricGrad* R, size_t attribute_count)
616
{
617
	for (size_t k = 0; k < attribute_count; ++k)
618
	{
619
		G[k].gx += R[k].gx;
620
		G[k].gy += R[k].gy;
621
		G[k].gz += R[k].gz;
622
		G[k].gw += R[k].gw;
623
	}
624
}
625

626
static float quadricEval(const Quadric& Q, const Vector3& v)
627
{
628
	float rx = Q.b0;
629
	float ry = Q.b1;
630
	float rz = Q.b2;
631

632
	rx += Q.a10 * v.y;
633
	ry += Q.a21 * v.z;
634
	rz += Q.a20 * v.x;
635

636
	rx *= 2;
637
	ry *= 2;
638
	rz *= 2;
639

640
	rx += Q.a00 * v.x;
641
	ry += Q.a11 * v.y;
642
	rz += Q.a22 * v.z;
643

644
	float r = Q.c;
645
	r += rx * v.x;
646
	r += ry * v.y;
647
	r += rz * v.z;
648

649
	return r;
650
}
651

652
static float quadricError(const Quadric& Q, const Vector3& v)
653
{
654
	float r = quadricEval(Q, v);
655
	float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
656

657
	return fabsf(r) * s;
658
}
659

660
static float quadricError(const Quadric& Q, const QuadricGrad* G, size_t attribute_count, const Vector3& v, const float* va)
661
{
662
	float r = quadricEval(Q, v);
663

664
	// see quadricFromAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr
665
	for (size_t k = 0; k < attribute_count; ++k)
666
	{
667
		float a = va[k];
668
		float g = v.x * G[k].gx + v.y * G[k].gy + v.z * G[k].gz + G[k].gw;
669

670
		r += a * (a * Q.w - 2 * g);
671
	}
672

673
	// note: unlike position error, we do not normalize by Q.w to retain edge scaling as described in quadricFromAttributes
674
	return fabsf(r);
675
}
676

677
static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w)
678
{
679
	float aw = a * w;
680
	float bw = b * w;
681
	float cw = c * w;
682
	float dw = d * w;
683

684
	Q.a00 = a * aw;
685
	Q.a11 = b * bw;
686
	Q.a22 = c * cw;
687
	Q.a10 = a * bw;
688
	Q.a20 = a * cw;
689
	Q.a21 = b * cw;
690
	Q.b0 = a * dw;
691
	Q.b1 = b * dw;
692
	Q.b2 = c * dw;
693
	Q.c = d * dw;
694
	Q.w = w;
695
}
696

697
static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight)
698
{
699
	Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
700
	Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
701

702
	// normal = cross(p1 - p0, p2 - p0)
703
	Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x};
704
	float area = normalize(normal);
705

706
	float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z;
707

708
	// we use sqrtf(area) so that the error is scaled linearly; this tends to improve silhouettes
709
	quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, sqrtf(area) * weight);
710
}
711

712
static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight)
713
{
714
	Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
715

716
	// edge length; keep squared length around for projection correction
717
	float lengthsq = p10.x * p10.x + p10.y * p10.y + p10.z * p10.z;
718
	float length = sqrtf(lengthsq);
719

720
	// p20p = length of projection of p2-p0 onto p1-p0; note that p10 is unnormalized so we need to correct it later
721
	Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
722
	float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z;
723

724
	// perp = perpendicular vector from p2 to line segment p1-p0
725
	// note: since p10 is unnormalized we need to correct the projection; we scale p20 instead to take advantage of normalize below
726
	Vector3 perp = {p20.x * lengthsq - p10.x * p20p, p20.y * lengthsq - p10.y * p20p, p20.z * lengthsq - p10.z * p20p};
727
	normalize(perp);
728

729
	float distance = perp.x * p0.x + perp.y * p0.y + perp.z * p0.z;
730

731
	// note: the weight is scaled linearly with edge length; this has to match the triangle weight
732
	quadricFromPlane(Q, perp.x, perp.y, perp.z, -distance, length * weight);
733
}
734

735
static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0, const Vector3& p1, const Vector3& p2, const float* va0, const float* va1, const float* va2, size_t attribute_count)
736
{
737
	// for each attribute we want to encode the following function into the quadric:
738
	// (eval(pos) - attr)^2
739
	// where eval(pos) interpolates attribute across the triangle like so:
740
	// eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw
741
	// where gx/gy/gz/gw are gradients
742
	Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
743
	Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
744

745
	// normal = cross(p1 - p0, p2 - p0)
746
	Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x};
747
	float area = sqrtf(normal.x * normal.x + normal.y * normal.y + normal.z * normal.z) * 0.5f;
748

749
	// quadric is weighted with the square of edge length (= area)
750
	// this equalizes the units with the positional error (which, after normalization, is a square of distance)
751
	// as a result, a change in weighted attribute of 1 along distance d is approximately equivalent to a change in position of d
752
	float w = area;
753

754
	// we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows:
755
	// v = (d11 * d20 - d01 * d21) / denom
756
	// w = (d00 * d21 - d01 * d20) / denom
757
	// u = 1 - v - w
758
	// here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj)
759
	// note: v2 and d20/d21 can not be evaluated here as v2 is effectively an unknown variable; we need these only as variables for derivation of gradients
760
	const Vector3& v0 = p10;
761
	const Vector3& v1 = p20;
762
	float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z;
763
	float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
764
	float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z;
765
	float denom = d00 * d11 - d01 * d01;
766
	float denomr = denom == 0 ? 0.f : 1.f / denom;
767

768
	// precompute gradient factors
769
	// these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out expressions that are shared between attributes
770
	float gx1 = (d11 * v0.x - d01 * v1.x) * denomr;
771
	float gx2 = (d00 * v1.x - d01 * v0.x) * denomr;
772
	float gy1 = (d11 * v0.y - d01 * v1.y) * denomr;
773
	float gy2 = (d00 * v1.y - d01 * v0.y) * denomr;
774
	float gz1 = (d11 * v0.z - d01 * v1.z) * denomr;
775
	float gz2 = (d00 * v1.z - d01 * v0.z) * denomr;
776

777
	memset(&Q, 0, sizeof(Quadric));
778

779
	Q.w = w;
780

781
	for (size_t k = 0; k < attribute_count; ++k)
782
	{
783
		float a0 = va0[k], a1 = va1[k], a2 = va2[k];
784

785
		// compute gradient of eval(pos) for x/y/z/w
786
		// the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w
787
		float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0);
788
		float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0);
789
		float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0);
790
		float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz;
791

792
		// quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K
793
		// since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields
794
		// note: for simplicity we scale all factors by weight here instead of outside the loop
795
		Q.a00 += w * (gx * gx);
796
		Q.a11 += w * (gy * gy);
797
		Q.a22 += w * (gz * gz);
798

799
		Q.a10 += w * (gy * gx);
800
		Q.a20 += w * (gz * gx);
801
		Q.a21 += w * (gz * gy);
802

803
		Q.b0 += w * (gx * gw);
804
		Q.b1 += w * (gy * gw);
805
		Q.b2 += w * (gz * gw);
806

807
		Q.c += w * (gw * gw);
808

809
		// the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError
810
		G[k].gx = w * gx;
811
		G[k].gy = w * gy;
812
		G[k].gz = w * gz;
813
		G[k].gw = w * gw;
814
	}
815
}
816

817
static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
818
{
819
	for (size_t i = 0; i < index_count; i += 3)
820
	{
821
		unsigned int i0 = indices[i + 0];
822
		unsigned int i1 = indices[i + 1];
823
		unsigned int i2 = indices[i + 2];
824

825
		Quadric Q;
826
		quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
827

828
		quadricAdd(vertex_quadrics[remap[i0]], Q);
829
		quadricAdd(vertex_quadrics[remap[i1]], Q);
830
		quadricAdd(vertex_quadrics[remap[i2]], Q);
831
	}
832
}
833

834
static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
835
{
836
	for (size_t i = 0; i < index_count; i += 3)
837
	{
838
		static const int next[4] = {1, 2, 0, 1};
839

840
		for (int e = 0; e < 3; ++e)
841
		{
842
			unsigned int i0 = indices[i + e];
843
			unsigned int i1 = indices[i + next[e]];
844

845
			unsigned char k0 = vertex_kind[i0];
846
			unsigned char k1 = vertex_kind[i1];
847

848
			// check that either i0 or i1 are border/seam and are on the same edge loop
849
			// note that we need to add the error even for edged that connect e.g. border & locked
850
			// if we don't do that, the adjacent border->border edge won't have correct errors for corners
851
			if (k0 != Kind_Border && k0 != Kind_Seam && k1 != Kind_Border && k1 != Kind_Seam)
852
				continue;
853

854
			if ((k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
855
				continue;
856

857
			if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0)
858
				continue;
859

860
			// seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges
861
			if (kHasOpposite[k0][k1] && remap[i1] > remap[i0])
862
				continue;
863

864
			unsigned int i2 = indices[i + next[e + 1]];
865

866
			// we try hard to maintain border edge geometry; seam edges can move more freely
867
			// due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical
868
			const float kEdgeWeightSeam = 1.f;
869
			const float kEdgeWeightBorder = 10.f;
870

871
			float edgeWeight = (k0 == Kind_Border || k1 == Kind_Border) ? kEdgeWeightBorder : kEdgeWeightSeam;
872

873
			Quadric Q;
874
			quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight);
875

876
			quadricAdd(vertex_quadrics[remap[i0]], Q);
877
			quadricAdd(vertex_quadrics[remap[i1]], Q);
878
		}
879
	}
880
}
881

882
static void fillAttributeQuadrics(Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const float* vertex_attributes, size_t attribute_count)
883
{
884
	for (size_t i = 0; i < index_count; i += 3)
885
	{
886
		unsigned int i0 = indices[i + 0];
887
		unsigned int i1 = indices[i + 1];
888
		unsigned int i2 = indices[i + 2];
889

890
		Quadric QA;
891
		QuadricGrad G[kMaxAttributes];
892
		quadricFromAttributes(QA, G, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], &vertex_attributes[i0 * attribute_count], &vertex_attributes[i1 * attribute_count], &vertex_attributes[i2 * attribute_count], attribute_count);
893

894
		quadricAdd(attribute_quadrics[i0], QA);
895
		quadricAdd(attribute_quadrics[i1], QA);
896
		quadricAdd(attribute_quadrics[i2], QA);
897

898
		quadricAdd(&attribute_gradients[i0 * attribute_count], G, attribute_count);
899
		quadricAdd(&attribute_gradients[i1 * attribute_count], G, attribute_count);
900
		quadricAdd(&attribute_gradients[i2 * attribute_count], G, attribute_count);
901
	}
902
}
903

904
// does triangle ABC flip when C is replaced with D?
905
static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c, const Vector3& d)
906
{
907
	Vector3 eb = {b.x - a.x, b.y - a.y, b.z - a.z};
908
	Vector3 ec = {c.x - a.x, c.y - a.y, c.z - a.z};
909
	Vector3 ed = {d.x - a.x, d.y - a.y, d.z - a.z};
910

911
	Vector3 nbc = {eb.y * ec.z - eb.z * ec.y, eb.z * ec.x - eb.x * ec.z, eb.x * ec.y - eb.y * ec.x};
912
	Vector3 nbd = {eb.y * ed.z - eb.z * ed.y, eb.z * ed.x - eb.x * ed.z, eb.x * ed.y - eb.y * ed.x};
913

914
	float ndp = nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z;
915
	float abc = nbc.x * nbc.x + nbc.y * nbc.y + nbc.z * nbc.z;
916
	float abd = nbd.x * nbd.x + nbd.y * nbd.y + nbd.z * nbd.z;
917

918
	// scale is cos(angle); somewhat arbitrarily set to ~75 degrees
919
	// note that the "pure" check is ndp <= 0 (90 degree cutoff) but that allows flipping through a series of close-to-90 collapses
920
	return ndp <= 0.25f * sqrtf(abc * abd);
921
}
922

923
static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, const unsigned int* collapse_remap, unsigned int i0, unsigned int i1)
924
{
925
	assert(collapse_remap[i0] == i0);
926
	assert(collapse_remap[i1] == i1);
927

928
	const Vector3& v0 = vertex_positions[i0];
929
	const Vector3& v1 = vertex_positions[i1];
930

931
	const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]];
932
	size_t count = adjacency.offsets[i0 + 1] - adjacency.offsets[i0];
933

934
	for (size_t i = 0; i < count; ++i)
935
	{
936
		unsigned int a = collapse_remap[edges[i].next];
937
		unsigned int b = collapse_remap[edges[i].prev];
938

939
		// skip triangles that will get collapsed by i0->i1 collapse or already got collapsed previously
940
		if (a == i1 || b == i1 || a == b)
941
			continue;
942

943
		// early-out when at least one triangle flips due to a collapse
944
		if (hasTriangleFlip(vertex_positions[a], vertex_positions[b], v0, v1))
945
		{
946
#if TRACE >= 2
947
			printf("edge block %d -> %d: flip welded %d %d %d\n", i0, i1, a, i0, b);
948
#endif
949

950
			return true;
951
		}
952
	}
953

954
	return false;
955
}
956

957
static size_t boundEdgeCollapses(const EdgeAdjacency& adjacency, size_t vertex_count, size_t index_count, unsigned char* vertex_kind)
958
{
959
	size_t dual_count = 0;
960

961
	for (size_t i = 0; i < vertex_count; ++i)
962
	{
963
		unsigned char k = vertex_kind[i];
964
		unsigned int e = adjacency.offsets[i + 1] - adjacency.offsets[i];
965

966
		dual_count += (k == Kind_Manifold || k == Kind_Seam) ? e : 0;
967
	}
968

969
	assert(dual_count <= index_count);
970

971
	// pad capacity by 3 so that we can check for overflow once per triangle instead of once per edge
972
	return (index_count - dual_count / 2) + 3;
973
}
974

975
static size_t pickEdgeCollapses(Collapse* collapses, size_t collapse_capacity, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
976
{
977
	size_t collapse_count = 0;
978

979
	for (size_t i = 0; i < index_count; i += 3)
980
	{
981
		static const int next[3] = {1, 2, 0};
982

983
		// this should never happen as boundEdgeCollapses should give an upper bound for the collapse count, but in an unlikely event it does we can just drop extra collapses
984
		if (collapse_count + 3 > collapse_capacity)
985
			break;
986

987
		for (int e = 0; e < 3; ++e)
988
		{
989
			unsigned int i0 = indices[i + e];
990
			unsigned int i1 = indices[i + next[e]];
991

992
			// this can happen either when input has a zero-length edge, or when we perform collapses for complex
993
			// topology w/seams and collapse a manifold vertex that connects to both wedges onto one of them
994
			// we leave edges like this alone since they may be important for preserving mesh integrity
995
			if (remap[i0] == remap[i1])
996
				continue;
997

998
			unsigned char k0 = vertex_kind[i0];
999
			unsigned char k1 = vertex_kind[i1];
1000

1001
			// the edge has to be collapsible in at least one direction
1002
			if (!(kCanCollapse[k0][k1] | kCanCollapse[k1][k0]))
1003
				continue;
1004

1005
			// manifold and seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges
1006
			if (kHasOpposite[k0][k1] && remap[i1] > remap[i0])
1007
				continue;
1008

1009
			// two vertices are on a border or a seam, but there's no direct edge between them
1010
			// this indicates that they belong to two different edge loops and we should not collapse this edge
1011
			// loop[] tracks half edges so we only need to check i0->i1
1012
			if (k0 == k1 && (k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
1013
				continue;
1014

1015
			if (k0 == Kind_Locked || k1 == Kind_Locked)
1016
			{
1017
				// the same check as above, but for border/seam -> locked collapses
1018
				// loop[] and loopback[] track half edges so we only need to check one of them
1019
				if ((k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
1020
					continue;
1021
				if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0)
1022
					continue;
1023
			}
1024

1025
			// edge can be collapsed in either direction - we will pick the one with minimum error
1026
			// note: we evaluate error later during collapse ranking, here we just tag the edge as bidirectional
1027
			if (kCanCollapse[k0][k1] & kCanCollapse[k1][k0])
1028
			{
1029
				Collapse c = {i0, i1, {/* bidi= */ 1}};
1030
				collapses[collapse_count++] = c;
1031
			}
1032
			else
1033
			{
1034
				// edge can only be collapsed in one direction
1035
				unsigned int e0 = kCanCollapse[k0][k1] ? i0 : i1;
1036
				unsigned int e1 = kCanCollapse[k0][k1] ? i1 : i0;
1037

1038
				Collapse c = {e0, e1, {/* bidi= */ 0}};
1039
				collapses[collapse_count++] = c;
1040
			}
1041
		}
1042
	}
1043

1044
	return collapse_count;
1045
}
1046

1047
static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const float* vertex_attributes, const Quadric* vertex_quadrics, const Quadric* attribute_quadrics, const QuadricGrad* attribute_gradients, size_t attribute_count, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
1048
{
1049
	for (size_t i = 0; i < collapse_count; ++i)
1050
	{
1051
		Collapse& c = collapses[i];
1052

1053
		unsigned int i0 = c.v0;
1054
		unsigned int i1 = c.v1;
1055

1056
		// most edges are bidirectional which means we need to evaluate errors for two collapses
1057
		// to keep this code branchless we just use the same edge for unidirectional edges
1058
		unsigned int j0 = c.bidi ? i1 : i0;
1059
		unsigned int j1 = c.bidi ? i0 : i1;
1060

1061
		float ei = quadricError(vertex_quadrics[remap[i0]], vertex_positions[i1]);
1062
		float ej = c.bidi ? quadricError(vertex_quadrics[remap[j0]], vertex_positions[j1]) : FLT_MAX;
1063

1064
#if TRACE >= 3
1065
		float di = ei, dj = ej;
1066
#endif
1067

1068
		if (attribute_count)
1069
		{
1070
			ei += quadricError(attribute_quadrics[i0], &attribute_gradients[i0 * attribute_count], attribute_count, vertex_positions[i1], &vertex_attributes[i1 * attribute_count]);
1071
			ej += c.bidi ? quadricError(attribute_quadrics[j0], &attribute_gradients[j0 * attribute_count], attribute_count, vertex_positions[j1], &vertex_attributes[j1 * attribute_count]) : 0;
1072

1073
			// note: seam edges need to aggregate attribute errors between primary and secondary edges, as attribute quadrics are separate
1074
			if (vertex_kind[i0] == Kind_Seam)
1075
			{
1076
				// for seam collapses we need to find the seam pair; this is a bit tricky since we need to rely on edge loops as target vertex may be locked (and thus have more than two wedges)
1077
				unsigned int s0 = wedge[i0];
1078
				unsigned int s1 = loop[i0] == i1 ? loopback[s0] : loop[s0];
1079

1080
				assert(s0 != i0 && wedge[s0] == i0);
1081
				assert(s1 != ~0u && remap[s1] == remap[i1]);
1082

1083
				// note: this should never happen due to the assertion above, but when disabled if we ever hit this case we'll get a memory safety issue; for now play it safe
1084
				s1 = (s1 != ~0u) ? s1 : wedge[i1];
1085

1086
				ei += quadricError(attribute_quadrics[s0], &attribute_gradients[s0 * attribute_count], attribute_count, vertex_positions[s1], &vertex_attributes[s1 * attribute_count]);
1087
				ej += c.bidi ? quadricError(attribute_quadrics[s1], &attribute_gradients[s1 * attribute_count], attribute_count, vertex_positions[s0], &vertex_attributes[s0 * attribute_count]) : 0;
1088
			}
1089
		}
1090

1091
		// pick edge direction with minimal error
1092
		c.v0 = ei <= ej ? i0 : j0;
1093
		c.v1 = ei <= ej ? i1 : j1;
1094
		c.error = ei <= ej ? ei : ej;
1095

1096
#if TRACE >= 3
1097
		if (i0 == j0) // c.bidi has been overwritten
1098
			printf("edge eval %d -> %d: error %f (pos %f, attr %f)\n", c.v0, c.v1,
1099
			    sqrtf(c.error), sqrtf(ei <= ej ? di : dj), sqrtf(ei <= ej ? ei - di : ej - dj));
1100
		else
1101
			printf("edge eval %d -> %d: error %f (pos %f, attr %f); reverse %f (pos %f, attr %f)\n", c.v0, c.v1,
1102
			    sqrtf(ei <= ej ? ei : ej), sqrtf(ei <= ej ? di : dj), sqrtf(ei <= ej ? ei - di : ej - dj),
1103
			    sqrtf(ei <= ej ? ej : ei), sqrtf(ei <= ej ? dj : di), sqrtf(ei <= ej ? ej - dj : ei - di));
1104
#endif
1105
	}
1106
}
1107

1108
static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count)
1109
{
1110
	// we use counting sort to order collapses by error; since the exact sort order is not as critical,
1111
	// only top 12 bits of exponent+mantissa (8 bits of exponent and 4 bits of mantissa) are used.
1112
	// to avoid excessive stack usage, we clamp the exponent range as collapses with errors much higher than 1 are not useful.
1113
	const unsigned int sort_bits = 12;
1114
	const unsigned int sort_bins = 2048 + 512; // exponent range [-127, 32)
1115

1116
	// fill histogram for counting sort
1117
	unsigned int histogram[sort_bins];
1118
	memset(histogram, 0, sizeof(histogram));
1119

1120
	for (size_t i = 0; i < collapse_count; ++i)
1121
	{
1122
		// skip sign bit since error is non-negative
1123
		unsigned int error = collapses[i].errorui;
1124
		unsigned int key = (error << 1) >> (32 - sort_bits);
1125
		key = key < sort_bins ? key : sort_bins - 1;
1126

1127
		histogram[key]++;
1128
	}
1129

1130
	// compute offsets based on histogram data
1131
	size_t histogram_sum = 0;
1132

1133
	for (size_t i = 0; i < sort_bins; ++i)
1134
	{
1135
		size_t count = histogram[i];
1136
		histogram[i] = unsigned(histogram_sum);
1137
		histogram_sum += count;
1138
	}
1139

1140
	assert(histogram_sum == collapse_count);
1141

1142
	// compute sort order based on offsets
1143
	for (size_t i = 0; i < collapse_count; ++i)
1144
	{
1145
		// skip sign bit since error is non-negative
1146
		unsigned int error = collapses[i].errorui;
1147
		unsigned int key = (error << 1) >> (32 - sort_bits);
1148
		key = key < sort_bins ? key : sort_bins - 1;
1149

1150
		sort_order[histogram[key]++] = unsigned(i);
1151
	}
1152
}
1153

1154
static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error)
1155
{
1156
	size_t edge_collapses = 0;
1157
	size_t triangle_collapses = 0;
1158

1159
	// most collapses remove 2 triangles; use this to establish a bound on the pass in terms of error limit
1160
	// note that edge_collapse_goal is an estimate; triangle_collapse_goal will be used to actually limit collapses
1161
	size_t edge_collapse_goal = triangle_collapse_goal / 2;
1162

1163
#if TRACE
1164
	size_t stats[7] = {};
1165
#endif
1166

1167
	for (size_t i = 0; i < collapse_count; ++i)
1168
	{
1169
		const Collapse& c = collapses[collapse_order[i]];
1170

1171
		TRACESTATS(0);
1172

1173
		if (c.error > error_limit)
1174
		{
1175
			TRACESTATS(4);
1176
			break;
1177
		}
1178

1179
		if (triangle_collapses >= triangle_collapse_goal)
1180
		{
1181
			TRACESTATS(5);
1182
			break;
1183
		}
1184

1185
		// we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked
1186
		// as they will share vertices with other successfull collapses, we need to increase the acceptable error by some factor
1187
		float error_goal = edge_collapse_goal < collapse_count ? 1.5f * collapses[collapse_order[edge_collapse_goal]].error : FLT_MAX;
1188

1189
		// on average, each collapse is expected to lock 6 other collapses; to avoid degenerate passes on meshes with odd
1190
		// topology, we only abort if we got over 1/6 collapses accordingly.
1191
		if (c.error > error_goal && c.error > result_error && triangle_collapses > triangle_collapse_goal / 6)
1192
		{
1193
			TRACESTATS(6);
1194
			break;
1195
		}
1196

1197
		unsigned int i0 = c.v0;
1198
		unsigned int i1 = c.v1;
1199

1200
		unsigned int r0 = remap[i0];
1201
		unsigned int r1 = remap[i1];
1202

1203
		unsigned char kind = vertex_kind[i0];
1204

1205
		// we don't collapse vertices that had source or target vertex involved in a collapse
1206
		// it's important to not move the vertices twice since it complicates the tracking/remapping logic
1207
		// it's important to not move other vertices towards a moved vertex to preserve error since we don't re-rank collapses mid-pass
1208
		if (collapse_locked[r0] | collapse_locked[r1])
1209
		{
1210
			TRACESTATS(1);
1211
			continue;
1212
		}
1213

1214
		if (hasTriangleFlips(adjacency, vertex_positions, collapse_remap, r0, r1))
1215
		{
1216
			// adjust collapse goal since this collapse is invalid and shouldn't factor into error goal
1217
			edge_collapse_goal++;
1218

1219
			TRACESTATS(2);
1220
			continue;
1221
		}
1222

1223
#if TRACE >= 2
1224
		printf("edge commit %d -> %d: kind %d->%d, error %f\n", i0, i1, vertex_kind[i0], vertex_kind[i1], sqrtf(c.error));
1225
#endif
1226

1227
		assert(collapse_remap[r0] == r0);
1228
		assert(collapse_remap[r1] == r1);
1229

1230
		if (kind == Kind_Complex)
1231
		{
1232
			// remap all vertices in the complex to the target vertex
1233
			unsigned int v = i0;
1234

1235
			do
1236
			{
1237
				collapse_remap[v] = i1;
1238
				v = wedge[v];
1239
			} while (v != i0);
1240
		}
1241
		else if (kind == Kind_Seam)
1242
		{
1243
			// for seam collapses we need to move the seam pair together; this is a bit tricky since we need to rely on edge loops as target vertex may be locked (and thus have more than two wedges)
1244
			unsigned int s0 = wedge[i0];
1245
			unsigned int s1 = loop[i0] == i1 ? loopback[s0] : loop[s0];
1246
			assert(s0 != i0 && wedge[s0] == i0);
1247
			assert(s1 != ~0u && remap[s1] == r1);
1248

1249
			// additional asserts to verify that the seam pair is consistent
1250
			assert(kind != vertex_kind[i1] || s1 == wedge[i1]);
1251
			assert(loop[i0] == i1 || loopback[i0] == i1);
1252
			assert(loop[s0] == s1 || loopback[s0] == s1);
1253

1254
			// note: this should never happen due to the assertion above, but when disabled if we ever hit this case we'll get a memory safety issue; for now play it safe
1255
			s1 = (s1 != ~0u) ? s1 : wedge[i1];
1256

1257
			collapse_remap[i0] = i1;
1258
			collapse_remap[s0] = s1;
1259
		}
1260
		else
1261
		{
1262
			assert(wedge[i0] == i0);
1263

1264
			collapse_remap[i0] = i1;
1265
		}
1266

1267
		// note: we technically don't need to lock r1 if it's a locked vertex, as it can't move and its quadric won't be used
1268
		// however, this results in slightly worse error on some meshes because the locked collapses get an unfair advantage wrt scheduling
1269
		collapse_locked[r0] = 1;
1270
		collapse_locked[r1] = 1;
1271

1272
		// border edges collapse 1 triangle, other edges collapse 2 or more
1273
		triangle_collapses += (kind == Kind_Border) ? 1 : 2;
1274
		edge_collapses++;
1275

1276
		result_error = result_error < c.error ? c.error : result_error;
1277
	}
1278

1279
#if TRACE
1280
	float error_goal_last = edge_collapse_goal < collapse_count ? 1.5f * collapses[collapse_order[edge_collapse_goal]].error : FLT_MAX;
1281
	float error_goal_limit = error_goal_last < error_limit ? error_goal_last : error_limit;
1282

1283
	printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d); %s\n",
1284
	    int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_limit),
1285
	    int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2]),
1286
	    stats[4] ? "error limit" : (stats[5] ? "count limit" : (stats[6] ? "error goal" : "out of collapses")));
1287
#endif
1288

1289
	return edge_collapses;
1290
}
1291

1292
static void updateQuadrics(const unsigned int* collapse_remap, size_t vertex_count, Quadric* vertex_quadrics, Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, size_t attribute_count, const Vector3* vertex_positions, const unsigned int* remap, float& vertex_error)
1293
{
1294
	for (size_t i = 0; i < vertex_count; ++i)
1295
	{
1296
		if (collapse_remap[i] == i)
1297
			continue;
1298

1299
		unsigned int i0 = unsigned(i);
1300
		unsigned int i1 = collapse_remap[i];
1301

1302
		unsigned int r0 = remap[i0];
1303
		unsigned int r1 = remap[i1];
1304

1305
		// ensure we only update vertex_quadrics once: primary vertex must be moved if any wedge is moved
1306
		if (i0 == r0)
1307
			quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]);
1308

1309
		if (attribute_count)
1310
		{
1311
			quadricAdd(attribute_quadrics[i1], attribute_quadrics[i0]);
1312
			quadricAdd(&attribute_gradients[i1 * attribute_count], &attribute_gradients[i0 * attribute_count], attribute_count);
1313

1314
			if (i0 == r0)
1315
			{
1316
				// when attributes are used, distance error needs to be recomputed as collapses don't track it; it is safe to do this after the quadric adjustment
1317
				float derr = quadricError(vertex_quadrics[r0], vertex_positions[r1]);
1318
				vertex_error = vertex_error < derr ? derr : vertex_error;
1319
			}
1320
		}
1321
	}
1322
}
1323

1324
static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap)
1325
{
1326
	size_t write = 0;
1327

1328
	for (size_t i = 0; i < index_count; i += 3)
1329
	{
1330
		unsigned int v0 = collapse_remap[indices[i + 0]];
1331
		unsigned int v1 = collapse_remap[indices[i + 1]];
1332
		unsigned int v2 = collapse_remap[indices[i + 2]];
1333

1334
		// we never move the vertex twice during a single pass
1335
		assert(collapse_remap[v0] == v0);
1336
		assert(collapse_remap[v1] == v1);
1337
		assert(collapse_remap[v2] == v2);
1338

1339
		if (v0 != v1 && v0 != v2 && v1 != v2)
1340
		{
1341
			indices[write + 0] = v0;
1342
			indices[write + 1] = v1;
1343
			indices[write + 2] = v2;
1344
			write += 3;
1345
		}
1346
	}
1347

1348
	return write;
1349
}
1350

1351
static void remapEdgeLoops(unsigned int* loop, size_t vertex_count, const unsigned int* collapse_remap)
1352
{
1353
	for (size_t i = 0; i < vertex_count; ++i)
1354
	{
1355
		// note: this is a no-op for vertices that were remapped
1356
		// ideally we would clear the loop entries for those for consistency, even though they aren't going to be used
1357
		// however, the remapping process needs loop information for remapped vertices, so this would require a separate pass
1358
		if (loop[i] != ~0u)
1359
		{
1360
			unsigned int l = loop[i];
1361
			unsigned int r = collapse_remap[l];
1362

1363
			// i == r is a special case when the seam edge is collapsed in a direction opposite to where loop goes
1364
			if (i == r)
1365
				loop[i] = (loop[l] != ~0u) ? collapse_remap[loop[l]] : ~0u;
1366
			else
1367
				loop[i] = r;
1368
		}
1369
	}
1370
}
1371

1372
static unsigned int follow(unsigned int* parents, unsigned int index)
1373
{
1374
	while (index != parents[index])
1375
	{
1376
		unsigned int parent = parents[index];
1377
		parents[index] = parents[parent];
1378
		index = parent;
1379
	}
1380

1381
	return index;
1382
}
1383

1384
static size_t buildComponents(unsigned int* components, size_t vertex_count, const unsigned int* indices, size_t index_count, const unsigned int* remap)
1385
{
1386
	for (size_t i = 0; i < vertex_count; ++i)
1387
		components[i] = unsigned(i);
1388

1389
	// compute a unique (but not sequential!) index for each component via union-find
1390
	for (size_t i = 0; i < index_count; i += 3)
1391
	{
1392
		static const int next[4] = {1, 2, 0, 1};
1393

1394
		for (int e = 0; e < 3; ++e)
1395
		{
1396
			unsigned int i0 = indices[i + e];
1397
			unsigned int i1 = indices[i + next[e]];
1398

1399
			unsigned int r0 = remap[i0];
1400
			unsigned int r1 = remap[i1];
1401

1402
			r0 = follow(components, r0);
1403
			r1 = follow(components, r1);
1404

1405
			// merge components with larger indices into components with smaller indices
1406
			// this guarantees that the root of the component is always the one with the smallest index
1407
			if (r0 != r1)
1408
				components[r0 < r1 ? r1 : r0] = r0 < r1 ? r0 : r1;
1409
		}
1410
	}
1411

1412
	// make sure each element points to the component root *before* we renumber the components
1413
	for (size_t i = 0; i < vertex_count; ++i)
1414
		if (remap[i] == i)
1415
			components[i] = follow(components, unsigned(i));
1416

1417
	unsigned int next_component = 0;
1418

1419
	// renumber components using sequential indices
1420
	// a sequential pass is sufficient because component root always has the smallest index
1421
	// note: it is unsafe to use follow() in this pass because we're replacing component links with sequential indices inplace
1422
	for (size_t i = 0; i < vertex_count; ++i)
1423
	{
1424
		if (remap[i] == i)
1425
		{
1426
			unsigned int root = components[i];
1427
			assert(root <= i); // make sure we already computed the component for non-roots
1428
			components[i] = (root == i) ? next_component++ : components[root];
1429
		}
1430
		else
1431
		{
1432
			assert(remap[i] < i); // make sure we already computed the component
1433
			components[i] = components[remap[i]];
1434
		}
1435
	}
1436

1437
	return next_component;
1438
}
1439

1440
static void measureComponents(float* component_errors, size_t component_count, const unsigned int* components, const Vector3* vertex_positions, size_t vertex_count)
1441
{
1442
	memset(component_errors, 0, component_count * 4 * sizeof(float));
1443

1444
	// compute approximate sphere center for each component as an average
1445
	for (size_t i = 0; i < vertex_count; ++i)
1446
	{
1447
		unsigned int c = components[i];
1448
		assert(components[i] < component_count);
1449

1450
		Vector3 v = vertex_positions[i]; // copy avoids aliasing issues
1451

1452
		component_errors[c * 4 + 0] += v.x;
1453
		component_errors[c * 4 + 1] += v.y;
1454
		component_errors[c * 4 + 2] += v.z;
1455
		component_errors[c * 4 + 3] += 1; // weight
1456
	}
1457

1458
	// complete the center computation, and reinitialize [3] as a radius
1459
	for (size_t i = 0; i < component_count; ++i)
1460
	{
1461
		float w = component_errors[i * 4 + 3];
1462
		float iw = w == 0.f ? 0.f : 1.f / w;
1463

1464
		component_errors[i * 4 + 0] *= iw;
1465
		component_errors[i * 4 + 1] *= iw;
1466
		component_errors[i * 4 + 2] *= iw;
1467
		component_errors[i * 4 + 3] = 0; // radius
1468
	}
1469

1470
	// compute squared radius for each component
1471
	for (size_t i = 0; i < vertex_count; ++i)
1472
	{
1473
		unsigned int c = components[i];
1474

1475
		float dx = vertex_positions[i].x - component_errors[c * 4 + 0];
1476
		float dy = vertex_positions[i].y - component_errors[c * 4 + 1];
1477
		float dz = vertex_positions[i].z - component_errors[c * 4 + 2];
1478
		float r = dx * dx + dy * dy + dz * dz;
1479

1480
		component_errors[c * 4 + 3] = component_errors[c * 4 + 3] < r ? r : component_errors[c * 4 + 3];
1481
	}
1482

1483
	// we've used the output buffer as scratch space, so we need to move the results to proper indices
1484
	for (size_t i = 0; i < component_count; ++i)
1485
	{
1486
#if TRACE >= 2
1487
		printf("component %d: center %f %f %f, error %e\n", int(i),
1488
		    component_errors[i * 4 + 0], component_errors[i * 4 + 1], component_errors[i * 4 + 2], sqrtf(component_errors[i * 4 + 3]));
1489
#endif
1490
		// note: we keep the squared error to make it match quadric error metric
1491
		component_errors[i] = component_errors[i * 4 + 3];
1492
	}
1493
}
1494

1495
static size_t pruneComponents(unsigned int* indices, size_t index_count, const unsigned int* components, const float* component_errors, size_t component_count, float error_cutoff, float& nexterror)
1496
{
1497
	size_t write = 0;
1498

1499
	for (size_t i = 0; i < index_count; i += 3)
1500
	{
1501
		unsigned int c = components[indices[i]];
1502
		assert(c == components[indices[i + 1]] && c == components[indices[i + 2]]);
1503

1504
		if (component_errors[c] > error_cutoff)
1505
		{
1506
			indices[write + 0] = indices[i + 0];
1507
			indices[write + 1] = indices[i + 1];
1508
			indices[write + 2] = indices[i + 2];
1509
			write += 3;
1510
		}
1511
	}
1512

1513
#if TRACE
1514
	size_t pruned_components = 0;
1515
	for (size_t i = 0; i < component_count; ++i)
1516
		pruned_components += (component_errors[i] >= nexterror && component_errors[i] <= error_cutoff);
1517

1518
	printf("pruned %d triangles in %d components (goal %e)\n", int((index_count - write) / 3), int(pruned_components), sqrtf(error_cutoff));
1519
#endif
1520

1521
	// update next error with the smallest error of the remaining components for future pruning
1522
	nexterror = FLT_MAX;
1523
	for (size_t i = 0; i < component_count; ++i)
1524
		if (component_errors[i] > error_cutoff)
1525
			nexterror = nexterror > component_errors[i] ? component_errors[i] : nexterror;
1526

1527
	return write;
1528
}
1529

1530
struct CellHasher
1531
{
1532
	const unsigned int* vertex_ids;
1533

1534
	size_t hash(unsigned int i) const
1535
	{
1536
		unsigned int h = vertex_ids[i];
1537

1538
		// MurmurHash2 finalizer
1539
		h ^= h >> 13;
1540
		h *= 0x5bd1e995;
1541
		h ^= h >> 15;
1542
		return h;
1543
	}
1544

1545
	bool equal(unsigned int lhs, unsigned int rhs) const
1546
	{
1547
		return vertex_ids[lhs] == vertex_ids[rhs];
1548
	}
1549
};
1550

1551
struct IdHasher
1552
{
1553
	size_t hash(unsigned int id) const
1554
	{
1555
		unsigned int h = id;
1556

1557
		// MurmurHash2 finalizer
1558
		h ^= h >> 13;
1559
		h *= 0x5bd1e995;
1560
		h ^= h >> 15;
1561
		return h;
1562
	}
1563

1564
	bool equal(unsigned int lhs, unsigned int rhs) const
1565
	{
1566
		return lhs == rhs;
1567
	}
1568
};
1569

1570
struct TriangleHasher
1571
{
1572
	const unsigned int* indices;
1573

1574
	size_t hash(unsigned int i) const
1575
	{
1576
		const unsigned int* tri = indices + i * 3;
1577

1578
		// Optimized Spatial Hashing for Collision Detection of Deformable Objects
1579
		return (tri[0] * 73856093) ^ (tri[1] * 19349663) ^ (tri[2] * 83492791);
1580
	}
1581

1582
	bool equal(unsigned int lhs, unsigned int rhs) const
1583
	{
1584
		const unsigned int* lt = indices + lhs * 3;
1585
		const unsigned int* rt = indices + rhs * 3;
1586

1587
		return lt[0] == rt[0] && lt[1] == rt[1] && lt[2] == rt[2];
1588
	}
1589
};
1590

1591
static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, size_t vertex_count, int grid_size)
1592
{
1593
	assert(grid_size >= 1 && grid_size <= 1024);
1594
	float cell_scale = float(grid_size - 1);
1595

1596
	for (size_t i = 0; i < vertex_count; ++i)
1597
	{
1598
		const Vector3& v = vertex_positions[i];
1599

1600
		int xi = int(v.x * cell_scale + 0.5f);
1601
		int yi = int(v.y * cell_scale + 0.5f);
1602
		int zi = int(v.z * cell_scale + 0.5f);
1603

1604
		vertex_ids[i] = (xi << 20) | (yi << 10) | zi;
1605
	}
1606
}
1607

1608
static size_t countTriangles(const unsigned int* vertex_ids, const unsigned int* indices, size_t index_count)
1609
{
1610
	size_t result = 0;
1611

1612
	for (size_t i = 0; i < index_count; i += 3)
1613
	{
1614
		unsigned int id0 = vertex_ids[indices[i + 0]];
1615
		unsigned int id1 = vertex_ids[indices[i + 1]];
1616
		unsigned int id2 = vertex_ids[indices[i + 2]];
1617

1618
		result += (id0 != id1) & (id0 != id2) & (id1 != id2);
1619
	}
1620

1621
	return result;
1622
}
1623

1624
static size_t fillVertexCells(unsigned int* table, size_t table_size, unsigned int* vertex_cells, const unsigned int* vertex_ids, size_t vertex_count)
1625
{
1626
	CellHasher hasher = {vertex_ids};
1627

1628
	memset(table, -1, table_size * sizeof(unsigned int));
1629

1630
	size_t result = 0;
1631

1632
	for (size_t i = 0; i < vertex_count; ++i)
1633
	{
1634
		unsigned int* entry = hashLookup2(table, table_size, hasher, unsigned(i), ~0u);
1635

1636
		if (*entry == ~0u)
1637
		{
1638
			*entry = unsigned(i);
1639
			vertex_cells[i] = unsigned(result++);
1640
		}
1641
		else
1642
		{
1643
			vertex_cells[i] = vertex_cells[*entry];
1644
		}
1645
	}
1646

1647
	return result;
1648
}
1649

1650
static size_t countVertexCells(unsigned int* table, size_t table_size, const unsigned int* vertex_ids, size_t vertex_count)
1651
{
1652
	IdHasher hasher;
1653

1654
	memset(table, -1, table_size * sizeof(unsigned int));
1655

1656
	size_t result = 0;
1657

1658
	for (size_t i = 0; i < vertex_count; ++i)
1659
	{
1660
		unsigned int id = vertex_ids[i];
1661
		unsigned int* entry = hashLookup2(table, table_size, hasher, id, ~0u);
1662

1663
		result += (*entry == ~0u);
1664
		*entry = id;
1665
	}
1666

1667
	return result;
1668
}
1669

1670
static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* vertex_cells)
1671
{
1672
	for (size_t i = 0; i < index_count; i += 3)
1673
	{
1674
		unsigned int i0 = indices[i + 0];
1675
		unsigned int i1 = indices[i + 1];
1676
		unsigned int i2 = indices[i + 2];
1677

1678
		unsigned int c0 = vertex_cells[i0];
1679
		unsigned int c1 = vertex_cells[i1];
1680
		unsigned int c2 = vertex_cells[i2];
1681

1682
		int single_cell = (c0 == c1) & (c0 == c2);
1683

1684
		Quadric Q;
1685
		quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], single_cell ? 3.f : 1.f);
1686

1687
		if (single_cell)
1688
		{
1689
			quadricAdd(cell_quadrics[c0], Q);
1690
		}
1691
		else
1692
		{
1693
			quadricAdd(cell_quadrics[c0], Q);
1694
			quadricAdd(cell_quadrics[c1], Q);
1695
			quadricAdd(cell_quadrics[c2], Q);
1696
		}
1697
	}
1698
}
1699

1700
static void fillCellReservoirs(Reservoir* cell_reservoirs, size_t cell_count, const Vector3* vertex_positions, const float* vertex_colors, size_t vertex_colors_stride, size_t vertex_count, const unsigned int* vertex_cells)
1701
{
1702
	static const float dummy_color[] = {0.f, 0.f, 0.f};
1703

1704
	size_t vertex_colors_stride_float = vertex_colors_stride / sizeof(float);
1705

1706
	for (size_t i = 0; i < vertex_count; ++i)
1707
	{
1708
		unsigned int cell = vertex_cells[i];
1709
		const Vector3& v = vertex_positions[i];
1710
		Reservoir& r = cell_reservoirs[cell];
1711

1712
		const float* color = vertex_colors ? &vertex_colors[i * vertex_colors_stride_float] : dummy_color;
1713

1714
		r.x += v.x;
1715
		r.y += v.y;
1716
		r.z += v.z;
1717
		r.r += color[0];
1718
		r.g += color[1];
1719
		r.b += color[2];
1720
		r.w += 1.f;
1721
	}
1722

1723
	for (size_t i = 0; i < cell_count; ++i)
1724
	{
1725
		Reservoir& r = cell_reservoirs[i];
1726

1727
		float iw = r.w == 0.f ? 0.f : 1.f / r.w;
1728

1729
		r.x *= iw;
1730
		r.y *= iw;
1731
		r.z *= iw;
1732
		r.r *= iw;
1733
		r.g *= iw;
1734
		r.b *= iw;
1735
	}
1736
}
1737

1738
static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count)
1739
{
1740
	memset(cell_remap, -1, cell_count * sizeof(unsigned int));
1741

1742
	for (size_t i = 0; i < vertex_count; ++i)
1743
	{
1744
		unsigned int cell = vertex_cells[i];
1745
		float error = quadricError(cell_quadrics[cell], vertex_positions[i]);
1746

1747
		if (cell_remap[cell] == ~0u || cell_errors[cell] > error)
1748
		{
1749
			cell_remap[cell] = unsigned(i);
1750
			cell_errors[cell] = error;
1751
		}
1752
	}
1753
}
1754

1755
static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Reservoir* cell_reservoirs, const Vector3* vertex_positions, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t vertex_count)
1756
{
1757
	static const float dummy_color[] = {0.f, 0.f, 0.f};
1758

1759
	size_t vertex_colors_stride_float = vertex_colors_stride / sizeof(float);
1760

1761
	memset(cell_remap, -1, cell_count * sizeof(unsigned int));
1762

1763
	for (size_t i = 0; i < vertex_count; ++i)
1764
	{
1765
		unsigned int cell = vertex_cells[i];
1766
		const Vector3& v = vertex_positions[i];
1767
		const Reservoir& r = cell_reservoirs[cell];
1768

1769
		const float* color = vertex_colors ? &vertex_colors[i * vertex_colors_stride_float] : dummy_color;
1770

1771
		float pos_error = (v.x - r.x) * (v.x - r.x) + (v.y - r.y) * (v.y - r.y) + (v.z - r.z) * (v.z - r.z);
1772
		float col_error = (color[0] - r.r) * (color[0] - r.r) + (color[1] - r.g) * (color[1] - r.g) + (color[2] - r.b) * (color[2] - r.b);
1773
		float error = pos_error + color_weight * col_error;
1774

1775
		if (cell_remap[cell] == ~0u || cell_errors[cell] > error)
1776
		{
1777
			cell_remap[cell] = unsigned(i);
1778
			cell_errors[cell] = error;
1779
		}
1780
	}
1781
}
1782

1783
static size_t filterTriangles(unsigned int* destination, unsigned int* tritable, size_t tritable_size, const unsigned int* indices, size_t index_count, const unsigned int* vertex_cells, const unsigned int* cell_remap)
1784
{
1785
	TriangleHasher hasher = {destination};
1786

1787
	memset(tritable, -1, tritable_size * sizeof(unsigned int));
1788

1789
	size_t result = 0;
1790

1791
	for (size_t i = 0; i < index_count; i += 3)
1792
	{
1793
		unsigned int c0 = vertex_cells[indices[i + 0]];
1794
		unsigned int c1 = vertex_cells[indices[i + 1]];
1795
		unsigned int c2 = vertex_cells[indices[i + 2]];
1796

1797
		if (c0 != c1 && c0 != c2 && c1 != c2)
1798
		{
1799
			unsigned int a = cell_remap[c0];
1800
			unsigned int b = cell_remap[c1];
1801
			unsigned int c = cell_remap[c2];
1802

1803
			if (b < a && b < c)
1804
			{
1805
				unsigned int t = a;
1806
				a = b, b = c, c = t;
1807
			}
1808
			else if (c < a && c < b)
1809
			{
1810
				unsigned int t = c;
1811
				c = b, b = a, a = t;
1812
			}
1813

1814
			destination[result * 3 + 0] = a;
1815
			destination[result * 3 + 1] = b;
1816
			destination[result * 3 + 2] = c;
1817

1818
			unsigned int* entry = hashLookup2(tritable, tritable_size, hasher, unsigned(result), ~0u);
1819

1820
			if (*entry == ~0u)
1821
				*entry = unsigned(result++);
1822
		}
1823
	}
1824

1825
	return result * 3;
1826
}
1827

1828
static float interpolate(float y, float x0, float y0, float x1, float y1, float x2, float y2)
1829
{
1830
	// three point interpolation from "revenge of interpolation search" paper
1831
	float num = (y1 - y) * (x1 - x2) * (x1 - x0) * (y2 - y0);
1832
	float den = (y2 - y) * (x1 - x2) * (y0 - y1) + (y0 - y) * (x1 - x0) * (y1 - y2);
1833
	return x1 + num / den;
1834
}
1835

1836
} // namespace meshopt
1837

1838
// Note: this is only exposed for debug visualization purposes; do *not* use
1839
enum
1840
{
1841
	meshopt_SimplifyInternalDebug = 1 << 30
1842
};
1843

1844
size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
1845
{
1846
	using namespace meshopt;
1847

1848
	assert(index_count % 3 == 0);
1849
	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
1850
	assert(vertex_positions_stride % sizeof(float) == 0);
1851
	assert(target_index_count <= index_count);
1852
	assert(target_error >= 0);
1853
	assert((options & ~(meshopt_SimplifyLockBorder | meshopt_SimplifySparse | meshopt_SimplifyErrorAbsolute | meshopt_SimplifyPrune | meshopt_SimplifyInternalDebug)) == 0);
1854
	assert(vertex_attributes_stride >= attribute_count * sizeof(float) && vertex_attributes_stride <= 256);
1855
	assert(vertex_attributes_stride % sizeof(float) == 0);
1856
	assert(attribute_count <= kMaxAttributes);
1857
	for (size_t i = 0; i < attribute_count; ++i)
1858
		assert(attribute_weights[i] >= 0);
1859

1860
	meshopt_Allocator allocator;
1861

1862
	unsigned int* result = destination;
1863
	if (result != indices)
1864
		memcpy(result, indices, index_count * sizeof(unsigned int));
1865

1866
	// build an index remap and update indices/vertex_count to minimize the subsequent work
1867
	// note: as a consequence, errors will be computed relative to the subset extent
1868
	unsigned int* sparse_remap = NULL;
1869
	if (options & meshopt_SimplifySparse)
1870
		sparse_remap = buildSparseRemap(result, index_count, vertex_count, &vertex_count, allocator);
1871

1872
	// build adjacency information
1873
	EdgeAdjacency adjacency = {};
1874
	prepareEdgeAdjacency(adjacency, index_count, vertex_count, allocator);
1875
	updateEdgeAdjacency(adjacency, result, index_count, vertex_count, NULL);
1876

1877
	// build position remap that maps each vertex to the one with identical position
1878
	// wedge table stores next vertex with identical position for each vertex
1879
	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
1880
	unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count);
1881
	buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap, allocator);
1882

1883
	// classify vertices; vertex kind determines collapse rules, see kCanCollapse
1884
	unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count);
1885
	unsigned int* loop = allocator.allocate<unsigned int>(vertex_count);
1886
	unsigned int* loopback = allocator.allocate<unsigned int>(vertex_count);
1887
	classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge, vertex_lock, sparse_remap, options);
1888

1889
#if TRACE
1890
	size_t unique_positions = 0;
1891
	for (size_t i = 0; i < vertex_count; ++i)
1892
		unique_positions += remap[i] == i;
1893

1894
	printf("position remap: %d vertices => %d positions\n", int(vertex_count), int(unique_positions));
1895

1896
	size_t kinds[Kind_Count] = {};
1897
	for (size_t i = 0; i < vertex_count; ++i)
1898
		kinds[vertex_kind[i]] += remap[i] == i;
1899

1900
	printf("kinds: manifold %d, border %d, seam %d, complex %d, locked %d\n",
1901
	    int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked]));
1902
#endif
1903

1904
	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
1905
	float vertex_scale = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap);
1906

1907
	float* vertex_attributes = NULL;
1908

1909
	if (attribute_count)
1910
	{
1911
		unsigned int attribute_remap[kMaxAttributes];
1912

1913
		// remap attributes to only include ones with weight > 0 to minimize memory/compute overhead for quadrics
1914
		size_t attributes_used = 0;
1915
		for (size_t i = 0; i < attribute_count; ++i)
1916
			if (attribute_weights[i] > 0)
1917
				attribute_remap[attributes_used++] = unsigned(i);
1918

1919
		attribute_count = attributes_used;
1920
		vertex_attributes = allocator.allocate<float>(vertex_count * attribute_count);
1921
		rescaleAttributes(vertex_attributes, vertex_attributes_data, vertex_count, vertex_attributes_stride, attribute_weights, attribute_count, attribute_remap, sparse_remap);
1922
	}
1923

1924
	Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
1925
	memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
1926

1927
	Quadric* attribute_quadrics = NULL;
1928
	QuadricGrad* attribute_gradients = NULL;
1929

1930
	if (attribute_count)
1931
	{
1932
		attribute_quadrics = allocator.allocate<Quadric>(vertex_count);
1933
		memset(attribute_quadrics, 0, vertex_count * sizeof(Quadric));
1934

1935
		attribute_gradients = allocator.allocate<QuadricGrad>(vertex_count * attribute_count);
1936
		memset(attribute_gradients, 0, vertex_count * attribute_count * sizeof(QuadricGrad));
1937
	}
1938

1939
	fillFaceQuadrics(vertex_quadrics, result, index_count, vertex_positions, remap);
1940
	fillEdgeQuadrics(vertex_quadrics, result, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
1941

1942
	if (attribute_count)
1943
		fillAttributeQuadrics(attribute_quadrics, attribute_gradients, result, index_count, vertex_positions, vertex_attributes, attribute_count);
1944

1945
	unsigned int* components = NULL;
1946
	float* component_errors = NULL;
1947
	size_t component_count = 0;
1948
	float component_nexterror = 0;
1949

1950
	if (options & meshopt_SimplifyPrune)
1951
	{
1952
		components = allocator.allocate<unsigned int>(vertex_count);
1953
		component_count = buildComponents(components, vertex_count, result, index_count, remap);
1954

1955
		component_errors = allocator.allocate<float>(component_count * 4); // overallocate for temporary use inside measureComponents
1956
		measureComponents(component_errors, component_count, components, vertex_positions, vertex_count);
1957

1958
		component_nexterror = FLT_MAX;
1959
		for (size_t i = 0; i < component_count; ++i)
1960
			component_nexterror = component_nexterror > component_errors[i] ? component_errors[i] : component_nexterror;
1961

1962
#if TRACE
1963
		printf("components: %d (min error %e)\n", int(component_count), sqrtf(component_nexterror));
1964
#endif
1965
	}
1966

1967
#if TRACE
1968
	size_t pass_count = 0;
1969
#endif
1970

1971
	size_t collapse_capacity = boundEdgeCollapses(adjacency, vertex_count, index_count, vertex_kind);
1972

1973
	Collapse* edge_collapses = allocator.allocate<Collapse>(collapse_capacity);
1974
	unsigned int* collapse_order = allocator.allocate<unsigned int>(collapse_capacity);
1975
	unsigned int* collapse_remap = allocator.allocate<unsigned int>(vertex_count);
1976
	unsigned char* collapse_locked = allocator.allocate<unsigned char>(vertex_count);
1977

1978
	size_t result_count = index_count;
1979
	float result_error = 0;
1980
	float vertex_error = 0;
1981

1982
	// target_error input is linear; we need to adjust it to match quadricError units
1983
	float error_scale = (options & meshopt_SimplifyErrorAbsolute) ? vertex_scale : 1.f;
1984
	float error_limit = (target_error * target_error) / (error_scale * error_scale);
1985

1986
	while (result_count > target_index_count)
1987
	{
1988
		// note: throughout the simplification process adjacency structure reflects welded topology for result-in-progress
1989
		updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap);
1990

1991
		size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, collapse_capacity, result, result_count, remap, vertex_kind, loop, loopback);
1992
		assert(edge_collapse_count <= collapse_capacity);
1993

1994
		// no edges can be collapsed any more due to topology restrictions
1995
		if (edge_collapse_count == 0)
1996
			break;
1997

1998
#if TRACE
1999
		printf("pass %d:%c", int(pass_count++), TRACE >= 2 ? '\n' : ' ');
2000
#endif
2001

2002
		rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_attributes, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, remap, wedge, vertex_kind, loop, loopback);
2003

2004
		sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count);
2005

2006
		size_t triangle_collapse_goal = (result_count - target_index_count) / 3;
2007

2008
		for (size_t i = 0; i < vertex_count; ++i)
2009
			collapse_remap[i] = unsigned(i);
2010

2011
		memset(collapse_locked, 0, vertex_count);
2012

2013
		size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, loop, loopback, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error);
2014

2015
		// no edges can be collapsed any more due to hitting the error limit or triangle collapse limit
2016
		if (collapses == 0)
2017
			break;
2018

2019
		updateQuadrics(collapse_remap, vertex_count, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, vertex_positions, remap, vertex_error);
2020

2021
		// updateQuadrics will update vertex error if we use attributes, but if we don't then result_error and vertex_error are equivalent
2022
		vertex_error = attribute_count == 0 ? result_error : vertex_error;
2023

2024
		remapEdgeLoops(loop, vertex_count, collapse_remap);
2025
		remapEdgeLoops(loopback, vertex_count, collapse_remap);
2026

2027
		size_t new_count = remapIndexBuffer(result, result_count, collapse_remap);
2028
		assert(new_count < result_count);
2029

2030
		result_count = new_count;
2031

2032
		if ((options & meshopt_SimplifyPrune) && result_count > target_index_count && component_nexterror <= vertex_error)
2033
			result_count = pruneComponents(result, result_count, components, component_errors, component_count, vertex_error, component_nexterror);
2034
	}
2035

2036
	// we're done with the regular simplification but we're still short of the target; try pruning more aggressively towards error_limit
2037
	while ((options & meshopt_SimplifyPrune) && result_count > target_index_count && component_nexterror <= error_limit)
2038
	{
2039
#if TRACE
2040
		printf("pass %d: cleanup; ", int(pass_count++));
2041
#endif
2042

2043
		float component_cutoff = component_nexterror * 1.5f < error_limit ? component_nexterror * 1.5f : error_limit;
2044

2045
		// track maximum error in eligible components as we are increasing resulting error
2046
		float component_maxerror = 0;
2047
		for (size_t i = 0; i < component_count; ++i)
2048
			if (component_errors[i] > component_maxerror && component_errors[i] <= component_cutoff)
2049
				component_maxerror = component_errors[i];
2050

2051
		size_t new_count = pruneComponents(result, result_count, components, component_errors, component_count, component_cutoff, component_nexterror);
2052
		if (new_count == result_count)
2053
			break;
2054

2055
		result_count = new_count;
2056
		result_error = result_error < component_maxerror ? component_maxerror : result_error;
2057
		vertex_error = vertex_error < component_maxerror ? component_maxerror : vertex_error;
2058
	}
2059

2060
#if TRACE
2061
	printf("result: %d triangles, error: %e; total %d passes\n", int(result_count / 3), sqrtf(result_error), int(pass_count));
2062
#endif
2063

2064
	// if debug visualization data is requested, fill it instead of index data; for simplicity, this doesn't work with sparsity
2065
	if ((options & meshopt_SimplifyInternalDebug) && !sparse_remap)
2066
	{
2067
		assert(Kind_Count <= 8 && vertex_count < (1 << 28)); // 3 bit kind, 1 bit loop
2068

2069
		for (size_t i = 0; i < result_count; i += 3)
2070
		{
2071
			unsigned int a = result[i + 0], b = result[i + 1], c = result[i + 2];
2072

2073
			result[i + 0] |= (vertex_kind[a] << 28) | (unsigned(loop[a] == b || loopback[b] == a) << 31);
2074
			result[i + 1] |= (vertex_kind[b] << 28) | (unsigned(loop[b] == c || loopback[c] == b) << 31);
2075
			result[i + 2] |= (vertex_kind[c] << 28) | (unsigned(loop[c] == a || loopback[a] == c) << 31);
2076
		}
2077
	}
2078

2079
	// convert resulting indices back into the dense space of the larger mesh
2080
	if (sparse_remap)
2081
		for (size_t i = 0; i < result_count; ++i)
2082
			result[i] = sparse_remap[result[i]];
2083

2084
	// result_error is quadratic; we need to remap it back to linear
2085
	if (out_result_error)
2086
		*out_result_error = sqrtf(result_error) * error_scale;
2087

2088
	return result_count;
2089
}
2090

2091
size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
2092
{
2093
	return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, 0, NULL, 0, NULL, target_index_count, target_error, options, out_result_error);
2094
}
2095

2096
size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
2097
{
2098
	return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, out_result_error);
2099
}
2100

2101
size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
2102
{
2103
	using namespace meshopt;
2104

2105
	assert(index_count % 3 == 0);
2106
	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
2107
	assert(vertex_positions_stride % sizeof(float) == 0);
2108
	assert(target_index_count <= index_count);
2109

2110
	// we expect to get ~2 triangles/vertex in the output
2111
	size_t target_cell_count = target_index_count / 6;
2112

2113
	meshopt_Allocator allocator;
2114

2115
	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
2116
	rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
2117

2118
	// find the optimal grid size using guided binary search
2119
#if TRACE
2120
	printf("source: %d vertices, %d triangles\n", int(vertex_count), int(index_count / 3));
2121
	printf("target: %d cells, %d triangles\n", int(target_cell_count), int(target_index_count / 3));
2122
#endif
2123

2124
	unsigned int* vertex_ids = allocator.allocate<unsigned int>(vertex_count);
2125

2126
	const int kInterpolationPasses = 5;
2127

2128
	// invariant: # of triangles in min_grid <= target_count
2129
	int min_grid = int(1.f / (target_error < 1e-3f ? 1e-3f : target_error));
2130
	int max_grid = 1025;
2131
	size_t min_triangles = 0;
2132
	size_t max_triangles = index_count / 3;
2133

2134
	// when we're error-limited, we compute the triangle count for the min. size; this accelerates convergence and provides the correct answer when we can't use a larger grid
2135
	if (min_grid > 1)
2136
	{
2137
		computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
2138
		min_triangles = countTriangles(vertex_ids, indices, index_count);
2139
	}
2140

2141
	// instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size...
2142
	int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f);
2143

2144
	for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass)
2145
	{
2146
		if (min_triangles >= target_index_count / 3 || max_grid - min_grid <= 1)
2147
			break;
2148

2149
		// we clamp the prediction of the grid size to make sure that the search converges
2150
		int grid_size = next_grid_size;
2151
		grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid ? max_grid - 1 : grid_size);
2152

2153
		computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size);
2154
		size_t triangles = countTriangles(vertex_ids, indices, index_count);
2155

2156
#if TRACE
2157
		printf("pass %d (%s): grid size %d, triangles %d, %s\n",
2158
		    pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses ? "lerp" : "binary"),
2159
		    grid_size, int(triangles),
2160
		    (triangles <= target_index_count / 3) ? "under" : "over");
2161
#endif
2162

2163
		float tip = interpolate(float(size_t(target_index_count / 3)), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles));
2164

2165
		if (triangles <= target_index_count / 3)
2166
		{
2167
			min_grid = grid_size;
2168
			min_triangles = triangles;
2169
		}
2170
		else
2171
		{
2172
			max_grid = grid_size;
2173
			max_triangles = triangles;
2174
		}
2175

2176
		// we start by using interpolation search - it usually converges faster
2177
		// however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN)
2178
		next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2;
2179
	}
2180

2181
	if (min_triangles == 0)
2182
	{
2183
		if (out_result_error)
2184
			*out_result_error = 1.f;
2185

2186
		return 0;
2187
	}
2188

2189
	// build vertex->cell association by mapping all vertices with the same quantized position to the same cell
2190
	size_t table_size = hashBuckets2(vertex_count);
2191
	unsigned int* table = allocator.allocate<unsigned int>(table_size);
2192

2193
	unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count);
2194

2195
	computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
2196
	size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count);
2197

2198
	// build a quadric for each target cell
2199
	Quadric* cell_quadrics = allocator.allocate<Quadric>(cell_count);
2200
	memset(cell_quadrics, 0, cell_count * sizeof(Quadric));
2201

2202
	fillCellQuadrics(cell_quadrics, indices, index_count, vertex_positions, vertex_cells);
2203

2204
	// for each target cell, find the vertex with the minimal error
2205
	unsigned int* cell_remap = allocator.allocate<unsigned int>(cell_count);
2206
	float* cell_errors = allocator.allocate<float>(cell_count);
2207

2208
	fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count);
2209

2210
	// compute error
2211
	float result_error = 0.f;
2212

2213
	for (size_t i = 0; i < cell_count; ++i)
2214
		result_error = result_error < cell_errors[i] ? cell_errors[i] : result_error;
2215

2216
	// collapse triangles!
2217
	// note that we need to filter out triangles that we've already output because we very frequently generate redundant triangles between cells :(
2218
	size_t tritable_size = hashBuckets2(min_triangles);
2219
	unsigned int* tritable = allocator.allocate<unsigned int>(tritable_size);
2220

2221
	size_t write = filterTriangles(destination, tritable, tritable_size, indices, index_count, vertex_cells, cell_remap);
2222

2223
#if TRACE
2224
	printf("result: %d cells, %d triangles (%d unfiltered), error %e\n", int(cell_count), int(write / 3), int(min_triangles), sqrtf(result_error));
2225
#endif
2226

2227
	if (out_result_error)
2228
		*out_result_error = sqrtf(result_error);
2229

2230
	return write;
2231
}
2232

2233
size_t meshopt_simplifyPrune(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, float target_error)
2234
{
2235
	using namespace meshopt;
2236

2237
	assert(index_count % 3 == 0);
2238
	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
2239
	assert(vertex_positions_stride % sizeof(float) == 0);
2240
	assert(target_error >= 0);
2241

2242
	meshopt_Allocator allocator;
2243

2244
	unsigned int* result = destination;
2245
	if (result != indices)
2246
		memcpy(result, indices, index_count * sizeof(unsigned int));
2247

2248
	// build position remap that maps each vertex to the one with identical position
2249
	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
2250
	buildPositionRemap(remap, NULL, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, allocator);
2251

2252
	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
2253
	rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, NULL);
2254

2255
	unsigned int* components = allocator.allocate<unsigned int>(vertex_count);
2256
	size_t component_count = buildComponents(components, vertex_count, indices, index_count, remap);
2257

2258
	float* component_errors = allocator.allocate<float>(component_count * 4); // overallocate for temporary use inside measureComponents
2259
	measureComponents(component_errors, component_count, components, vertex_positions, vertex_count);
2260

2261
	float component_nexterror = 0;
2262
	size_t result_count = pruneComponents(result, index_count, components, component_errors, component_count, target_error * target_error, component_nexterror);
2263

2264
	return result_count;
2265
}
2266

2267
size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count)
2268
{
2269
	using namespace meshopt;
2270

2271
	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
2272
	assert(vertex_positions_stride % sizeof(float) == 0);
2273
	assert(vertex_colors_stride == 0 || (vertex_colors_stride >= 12 && vertex_colors_stride <= 256));
2274
	assert(vertex_colors_stride % sizeof(float) == 0);
2275
	assert(vertex_colors == NULL || vertex_colors_stride != 0);
2276
	assert(target_vertex_count <= vertex_count);
2277

2278
	size_t target_cell_count = target_vertex_count;
2279

2280
	if (target_cell_count == 0)
2281
		return 0;
2282

2283
	meshopt_Allocator allocator;
2284

2285
	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
2286
	rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
2287

2288
	// find the optimal grid size using guided binary search
2289
#if TRACE
2290
	printf("source: %d vertices\n", int(vertex_count));
2291
	printf("target: %d cells\n", int(target_cell_count));
2292
#endif
2293

2294
	unsigned int* vertex_ids = allocator.allocate<unsigned int>(vertex_count);
2295

2296
	size_t table_size = hashBuckets2(vertex_count);
2297
	unsigned int* table = allocator.allocate<unsigned int>(table_size);
2298

2299
	const int kInterpolationPasses = 5;
2300

2301
	// invariant: # of vertices in min_grid <= target_count
2302
	int min_grid = 0;
2303
	int max_grid = 1025;
2304
	size_t min_vertices = 0;
2305
	size_t max_vertices = vertex_count;
2306

2307
	// instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size...
2308
	int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f);
2309

2310
	for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass)
2311
	{
2312
		assert(min_vertices < target_vertex_count);
2313
		assert(max_grid - min_grid > 1);
2314

2315
		// we clamp the prediction of the grid size to make sure that the search converges
2316
		int grid_size = next_grid_size;
2317
		grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid ? max_grid - 1 : grid_size);
2318

2319
		computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size);
2320
		size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count);
2321

2322
#if TRACE
2323
		printf("pass %d (%s): grid size %d, vertices %d, %s\n",
2324
		    pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses ? "lerp" : "binary"),
2325
		    grid_size, int(vertices),
2326
		    (vertices <= target_vertex_count) ? "under" : "over");
2327
#endif
2328

2329
		float tip = interpolate(float(target_vertex_count), float(min_grid), float(min_vertices), float(grid_size), float(vertices), float(max_grid), float(max_vertices));
2330

2331
		if (vertices <= target_vertex_count)
2332
		{
2333
			min_grid = grid_size;
2334
			min_vertices = vertices;
2335
		}
2336
		else
2337
		{
2338
			max_grid = grid_size;
2339
			max_vertices = vertices;
2340
		}
2341

2342
		if (vertices == target_vertex_count || max_grid - min_grid <= 1)
2343
			break;
2344

2345
		// we start by using interpolation search - it usually converges faster
2346
		// however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN)
2347
		next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2;
2348
	}
2349

2350
	if (min_vertices == 0)
2351
		return 0;
2352

2353
	// build vertex->cell association by mapping all vertices with the same quantized position to the same cell
2354
	unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count);
2355

2356
	computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
2357
	size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count);
2358

2359
	// accumulate points into a reservoir for each target cell
2360
	Reservoir* cell_reservoirs = allocator.allocate<Reservoir>(cell_count);
2361
	memset(cell_reservoirs, 0, cell_count * sizeof(Reservoir));
2362

2363
	fillCellReservoirs(cell_reservoirs, cell_count, vertex_positions, vertex_colors, vertex_colors_stride, vertex_count, vertex_cells);
2364

2365
	// for each target cell, find the vertex with the minimal error
2366
	unsigned int* cell_remap = allocator.allocate<unsigned int>(cell_count);
2367
	float* cell_errors = allocator.allocate<float>(cell_count);
2368

2369
	// we scale the color weight to bring it to the same scale as position so that error addition makes sense
2370
	float color_weight_scaled = color_weight * (min_grid == 1 ? 1.f : 1.f / (min_grid - 1));
2371

2372
	fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_reservoirs, vertex_positions, vertex_colors, vertex_colors_stride, color_weight_scaled * color_weight_scaled, vertex_count);
2373

2374
	// copy results to the output
2375
	assert(cell_count <= target_vertex_count);
2376
	memcpy(destination, cell_remap, sizeof(unsigned int) * cell_count);
2377

2378
#if TRACE
2379
	// compute error
2380
	float result_error = 0.f;
2381

2382
	for (size_t i = 0; i < cell_count; ++i)
2383
		result_error = result_error < cell_errors[i] ? cell_errors[i] : result_error;
2384

2385
	printf("result: %d cells, %e error\n", int(cell_count), sqrtf(result_error));
2386
#endif
2387

2388
	return cell_count;
2389
}
2390

2391
float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
2392
{
2393
	using namespace meshopt;
2394

2395
	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
2396
	assert(vertex_positions_stride % sizeof(float) == 0);
2397

2398
	float extent = rescalePositions(NULL, vertex_positions, vertex_count, vertex_positions_stride);
2399

2400
	return extent;
2401
}
2402

2403
Product

Resources

Company