Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bevyengine
GitHub Repository: bevyengine/bevy
Path: blob/main/crates/bevy_pbr/src/meshlet/from_mesh.rs
9423 views
1
use crate::meshlet::asset::{MeshletAabb, MeshletAabbErrorOffset, MeshletCullData};
2
3
use super::asset::{BvhNode, Meshlet, MeshletBoundingSphere, MeshletMesh};
4
use alloc::borrow::Cow;
5
use bevy_math::{
6
bounding::{Aabb3d, BoundingSphere, BoundingVolume},
7
ops::log2,
8
IVec3, Isometry3d, Vec2, Vec3, Vec3A, Vec3Swizzles,
9
};
10
use bevy_mesh::{Indices, Mesh};
11
use bevy_platform::collections::HashMap;
12
use bevy_render::render_resource::PrimitiveTopology;
13
use bevy_tasks::{AsyncComputeTaskPool, ParallelSlice};
14
use bitvec::{order::Lsb0, vec::BitVec, view::BitView};
15
use core::{f32, ops::Range};
16
use itertools::Itertools;
17
use meshopt::{
18
build_meshlets, ffi::meshopt_Meshlet, generate_position_remap,
19
simplify_with_attributes_and_locks, Meshlets, SimplifyOptions, VertexDataAdapter,
20
};
21
use metis::{option::Opt, Graph};
22
use smallvec::SmallVec;
23
use thiserror::Error;
24
use tracing::debug_span;
25
26
// Aim to have 8 meshlets per group
27
const TARGET_MESHLETS_PER_GROUP: usize = 8;
28
// Reject groups that keep over 60% of their original triangles. We'd much rather render a few
29
// extra triangles than create too many meshlets, increasing cull overhead.
30
const SIMPLIFICATION_FAILURE_PERCENTAGE: f32 = 0.60;
31
32
/// Default vertex position quantization factor for use with [`MeshletMesh::from_mesh`].
33
///
34
/// Snaps vertices to the nearest 1/16th of a centimeter (1/2^4).
35
pub const MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR: u8 = 4;
36
37
const CENTIMETERS_PER_METER: f32 = 100.0;
38
39
impl MeshletMesh {
40
/// Process a [`Mesh`] to generate a [`MeshletMesh`].
41
///
42
/// This process is very slow, and should be done ahead of time, and not at runtime.
43
///
44
/// # Requirements
45
///
46
/// This function requires the `meshlet_processor` cargo feature.
47
///
48
/// The input mesh must:
49
/// 1. Use [`PrimitiveTopology::TriangleList`]
50
/// 2. Use indices
51
/// 3. Have the exact following set of vertex attributes: `{POSITION, NORMAL, UV_0}` (tangents can be used in material shaders, but are calculated at runtime and are not stored in the mesh)
52
///
53
/// # Vertex precision
54
///
55
/// `vertex_position_quantization_factor` is the amount of precision to use when quantizing vertex positions.
56
///
57
/// Vertices are snapped to the nearest (1/2^x)th of a centimeter, where x = `vertex_position_quantization_factor`.
58
/// E.g. if x = 4, then vertices are snapped to the nearest 1/2^4 = 1/16th of a centimeter.
59
///
60
/// Use [`MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR`] as a default, adjusting lower to save memory and disk space, and higher to prevent artifacts if needed.
61
///
62
/// To ensure that two different meshes do not have cracks between them when placed directly next to each other:
63
/// * Use the same quantization factor when converting each mesh to a meshlet mesh
64
/// * Ensure that their [`bevy_transform::components::Transform::translation`]s are a multiple of 1/2^x centimeters (note that translations are in meters)
65
/// * Ensure that their [`bevy_transform::components::Transform::scale`]s are the same
66
/// * Ensure that their [`bevy_transform::components::Transform::rotation`]s are a multiple of 90 degrees
67
pub fn from_mesh(
68
mesh: &Mesh,
69
vertex_position_quantization_factor: u8,
70
) -> Result<Self, MeshToMeshletMeshConversionError> {
71
let s = debug_span!("build meshlet mesh");
72
let _e = s.enter();
73
74
// Validate mesh format
75
let indices = validate_input_mesh(mesh)?;
76
77
// Get meshlet vertices
78
let vertex_buffer = mesh.create_packed_vertex_buffer_data();
79
let vertex_stride = mesh.get_vertex_size() as usize;
80
let vertices = VertexDataAdapter::new(&vertex_buffer, vertex_stride, 0).unwrap();
81
let vertex_normals = bytemuck::cast_slice(&vertex_buffer[12..16]);
82
83
// Generate a position-only vertex buffer for determining triangle/meshlet connectivity
84
let position_only_vertex_remap = generate_position_remap(&vertices);
85
86
// Split the mesh into an initial list of meshlets (LOD 0)
87
let (mut meshlets, mut cull_data) =
88
compute_meshlets(&indices, &vertices, &position_only_vertex_remap, None);
89
90
let mut vertex_locks = vec![false; vertices.vertex_count];
91
92
// Build further LODs
93
let mut bvh = BvhBuilder::default();
94
let mut all_groups = Vec::new();
95
let mut simplification_queue: Vec<_> = (0..meshlets.len() as u32).collect();
96
let mut stuck = Vec::new();
97
while !simplification_queue.is_empty() {
98
let s = debug_span!("simplify lod", meshlets = simplification_queue.len());
99
let _e = s.enter();
100
101
// For each meshlet build a list of connected meshlets (meshlets that share a vertex)
102
let connected_meshlets_per_meshlet = find_connected_meshlets(
103
&simplification_queue,
104
&meshlets,
105
&position_only_vertex_remap,
106
);
107
108
// Group meshlets into roughly groups of size TARGET_MESHLETS_PER_GROUP,
109
// grouping meshlets with a high number of shared vertices
110
let groups = group_meshlets(
111
&simplification_queue,
112
&cull_data,
113
&connected_meshlets_per_meshlet,
114
);
115
simplification_queue.clear();
116
117
// Lock borders between groups to prevent cracks when simplifying
118
lock_group_borders(
119
&mut vertex_locks,
120
&groups,
121
&meshlets,
122
&position_only_vertex_remap,
123
);
124
125
let simplified = groups.par_chunk_map(AsyncComputeTaskPool::get(), 1, |_, groups| {
126
let mut group = groups[0].clone();
127
128
// If the group only has a single meshlet we can't simplify it
129
if group.meshlets.len() == 1 {
130
return Err(group);
131
}
132
133
let s = debug_span!("simplify group", meshlets = group.meshlets.len());
134
let _e = s.enter();
135
136
// Simplify the group to ~50% triangle count
137
let Some((simplified_group_indices, mut group_error)) = simplify_meshlet_group(
138
&group,
139
&meshlets,
140
&vertices,
141
vertex_normals,
142
vertex_stride,
143
&vertex_locks,
144
) else {
145
// Couldn't simplify the group enough
146
return Err(group);
147
};
148
149
// Force the group error to be atleast as large as all of its constituent meshlet's
150
// individual errors.
151
for &id in group.meshlets.iter() {
152
group_error = group_error.max(cull_data[id as usize].error);
153
}
154
group.parent_error = group_error;
155
156
// Build new meshlets using the simplified group
157
let new_meshlets = compute_meshlets(
158
&simplified_group_indices,
159
&vertices,
160
&position_only_vertex_remap,
161
Some((group.lod_bounds, group.parent_error)),
162
);
163
164
Ok((group, new_meshlets))
165
});
166
167
let first_group = all_groups.len() as u32;
168
let mut passed_tris = 0;
169
let mut stuck_tris = 0;
170
for group in simplified {
171
match group {
172
Ok((group, (new_meshlets, new_cull_data))) => {
173
let start = meshlets.len();
174
merge_meshlets(&mut meshlets, new_meshlets);
175
cull_data.extend(new_cull_data);
176
let end = meshlets.len();
177
let new_meshlet_ids = start as u32..end as u32;
178
179
passed_tris += triangles_in_meshlets(&meshlets, new_meshlet_ids.clone());
180
simplification_queue.extend(new_meshlet_ids);
181
all_groups.push(group);
182
}
183
Err(group) => {
184
stuck_tris +=
185
triangles_in_meshlets(&meshlets, group.meshlets.iter().copied());
186
stuck.push(group);
187
}
188
}
189
}
190
191
// If we have enough triangles that passed, we can retry simplifying the stuck
192
// meshlets.
193
if passed_tris > stuck_tris / 3 {
194
simplification_queue.extend(stuck.drain(..).flat_map(|group| group.meshlets));
195
}
196
197
bvh.add_lod(first_group, &all_groups);
198
}
199
200
// If there's any stuck meshlets left, add another LOD level with only them
201
if !stuck.is_empty() {
202
let first_group = all_groups.len() as u32;
203
all_groups.extend(stuck);
204
bvh.add_lod(first_group, &all_groups);
205
}
206
207
let (bvh, aabb, depth) = bvh.build(&mut meshlets, all_groups, &mut cull_data);
208
209
// Copy vertex attributes per meshlet and compress
210
let mut vertex_positions = BitVec::<u32, Lsb0>::new();
211
let mut vertex_normals = Vec::new();
212
let mut vertex_uvs = Vec::new();
213
let mut bevy_meshlets = Vec::with_capacity(meshlets.len());
214
for (i, meshlet) in meshlets.meshlets.iter().enumerate() {
215
build_and_compress_per_meshlet_vertex_data(
216
meshlet,
217
meshlets.get(i).vertices,
218
&vertex_buffer,
219
vertex_stride,
220
&mut vertex_positions,
221
&mut vertex_normals,
222
&mut vertex_uvs,
223
&mut bevy_meshlets,
224
vertex_position_quantization_factor,
225
);
226
}
227
vertex_positions.set_uninitialized(false);
228
229
Ok(Self {
230
vertex_positions: vertex_positions.into_vec().into(),
231
vertex_normals: vertex_normals.into(),
232
vertex_uvs: vertex_uvs.into(),
233
indices: meshlets.triangles.into(),
234
bvh: bvh.into(),
235
meshlets: bevy_meshlets.into(),
236
meshlet_cull_data: cull_data
237
.into_iter()
238
.map(|cull_data| MeshletCullData {
239
aabb: aabb_to_meshlet(cull_data.aabb, cull_data.error, 0),
240
lod_group_sphere: sphere_to_meshlet(cull_data.lod_group_sphere),
241
})
242
.collect(),
243
aabb,
244
bvh_depth: depth,
245
})
246
}
247
}
248
249
fn validate_input_mesh(mesh: &Mesh) -> Result<Cow<'_, [u32]>, MeshToMeshletMeshConversionError> {
250
if mesh.primitive_topology() != PrimitiveTopology::TriangleList {
251
return Err(MeshToMeshletMeshConversionError::WrongMeshPrimitiveTopology);
252
}
253
254
if mesh.attributes().map(|(attribute, _)| attribute.id).ne([
255
Mesh::ATTRIBUTE_POSITION.id,
256
Mesh::ATTRIBUTE_NORMAL.id,
257
Mesh::ATTRIBUTE_UV_0.id,
258
]) {
259
return Err(MeshToMeshletMeshConversionError::WrongMeshVertexAttributes(
260
mesh.attributes()
261
.map(|(attribute, _)| format!("{attribute:?}"))
262
.collect(),
263
));
264
}
265
266
match mesh.indices() {
267
Some(Indices::U32(indices)) => Ok(Cow::Borrowed(indices.as_slice())),
268
Some(Indices::U16(indices)) => Ok(indices.iter().map(|i| *i as u32).collect()),
269
_ => Err(MeshToMeshletMeshConversionError::MeshMissingIndices),
270
}
271
}
272
273
fn triangles_in_meshlets(meshlets: &Meshlets, ids: impl IntoIterator<Item = u32>) -> u32 {
274
ids.into_iter()
275
.map(|id| meshlets.get(id as _).triangles.len() as u32 / 3)
276
.sum()
277
}
278
279
fn compute_meshlets(
280
indices: &[u32],
281
vertices: &VertexDataAdapter,
282
position_only_vertex_remap: &[u32],
283
prev_lod_data: Option<(BoundingSphere, f32)>,
284
) -> (Meshlets, Vec<TempMeshletCullData>) {
285
// For each vertex, build a list of all triangles that use it
286
let mut vertices_to_triangles = vec![Vec::new(); position_only_vertex_remap.len()];
287
for (i, index) in indices.iter().enumerate() {
288
let vertex_id = position_only_vertex_remap[*index as usize];
289
let vertex_to_triangles = &mut vertices_to_triangles[vertex_id as usize];
290
vertex_to_triangles.push(i / 3);
291
}
292
293
// For each triangle pair, count how many vertices they share
294
let mut triangle_pair_to_shared_vertex_count = <HashMap<_, _>>::default();
295
for vertex_triangle_ids in vertices_to_triangles {
296
for (triangle_id1, triangle_id2) in vertex_triangle_ids.into_iter().tuple_combinations() {
297
let count = triangle_pair_to_shared_vertex_count
298
.entry((
299
triangle_id1.min(triangle_id2),
300
triangle_id1.max(triangle_id2),
301
))
302
.or_insert(0);
303
*count += 1;
304
}
305
}
306
307
// For each triangle, gather all other triangles that share at least one vertex along with their shared vertex count
308
let triangle_count = indices.len() / 3;
309
let mut connected_triangles_per_triangle = vec![Vec::new(); triangle_count];
310
for ((triangle_id1, triangle_id2), shared_vertex_count) in triangle_pair_to_shared_vertex_count
311
{
312
// We record both id1->id2 and id2->id1 as adjacency is symmetrical
313
connected_triangles_per_triangle[triangle_id1].push((triangle_id2, shared_vertex_count));
314
connected_triangles_per_triangle[triangle_id2].push((triangle_id1, shared_vertex_count));
315
}
316
317
// The order of triangles depends on hash traversal order; to produce deterministic results, sort them
318
// TODO: Wouldn't it be faster to use a `BTreeMap` above instead of `HashMap` + sorting?
319
for list in connected_triangles_per_triangle.iter_mut() {
320
list.sort_unstable();
321
}
322
323
let mut xadj = Vec::with_capacity(triangle_count + 1);
324
let mut adjncy = Vec::new();
325
let mut adjwgt = Vec::new();
326
for connected_triangles in connected_triangles_per_triangle {
327
xadj.push(adjncy.len() as i32);
328
for (connected_triangle_id, shared_vertex_count) in connected_triangles {
329
adjncy.push(connected_triangle_id as i32);
330
adjwgt.push(shared_vertex_count);
331
// TODO: Additional weight based on triangle center spatial proximity?
332
}
333
}
334
xadj.push(adjncy.len() as i32);
335
336
let mut options = [-1; metis::NOPTIONS];
337
options[metis::option::Seed::INDEX] = 17;
338
options[metis::option::UFactor::INDEX] = 1; // Important that there's very little imbalance between partitions
339
340
let mut meshlet_per_triangle = vec![0; triangle_count];
341
let partition_count = triangle_count.div_ceil(126); // Need to undershoot to prevent METIS from going over 128 triangles per meshlet
342
Graph::new(1, partition_count as i32, &xadj, &adjncy)
343
.unwrap()
344
.set_options(&options)
345
.set_adjwgt(&adjwgt)
346
.part_recursive(&mut meshlet_per_triangle)
347
.unwrap();
348
349
let mut indices_per_meshlet = vec![Vec::new(); partition_count];
350
for (triangle_id, meshlet) in meshlet_per_triangle.into_iter().enumerate() {
351
let meshlet_indices = &mut indices_per_meshlet[meshlet as usize];
352
let base_index = triangle_id * 3;
353
meshlet_indices.extend_from_slice(&indices[base_index..(base_index + 3)]);
354
}
355
356
// Use meshopt to build meshlets from the sets of triangles
357
let mut meshlets = Meshlets {
358
meshlets: Vec::new(),
359
vertices: Vec::new(),
360
triangles: Vec::new(),
361
};
362
let mut cull_data = Vec::new();
363
let get_vertex = |&v: &u32| {
364
*bytemuck::from_bytes::<Vec3>(
365
&vertices.reader.get_ref()
366
[vertices.position_offset + v as usize * vertices.vertex_stride..][..12],
367
)
368
};
369
for meshlet_indices in &indices_per_meshlet {
370
let meshlet = build_meshlets(meshlet_indices, vertices, 256, 128, 0.0);
371
for meshlet in meshlet.iter() {
372
let (lod_group_sphere, error) = prev_lod_data.unwrap_or_else(|| {
373
let bounds = meshopt::compute_meshlet_bounds(meshlet, vertices);
374
(BoundingSphere::new(bounds.center, bounds.radius), 0.0)
375
});
376
377
cull_data.push(TempMeshletCullData {
378
aabb: Aabb3d::from_point_cloud(
379
Isometry3d::IDENTITY,
380
meshlet.vertices.iter().map(get_vertex),
381
),
382
lod_group_sphere,
383
error,
384
});
385
}
386
merge_meshlets(&mut meshlets, meshlet);
387
}
388
(meshlets, cull_data)
389
}
390
391
fn find_connected_meshlets(
392
simplification_queue: &[u32],
393
meshlets: &Meshlets,
394
position_only_vertex_remap: &[u32],
395
) -> Vec<Vec<(usize, usize)>> {
396
// For each vertex, build a list of all meshlets that use it
397
let mut vertices_to_meshlets = vec![Vec::new(); position_only_vertex_remap.len()];
398
for (id_index, &meshlet_id) in simplification_queue.iter().enumerate() {
399
let meshlet = meshlets.get(meshlet_id as _);
400
for index in meshlet.triangles {
401
let vertex_id = position_only_vertex_remap[meshlet.vertices[*index as usize] as usize];
402
let vertex_to_meshlets = &mut vertices_to_meshlets[vertex_id as usize];
403
// Meshlets are added in order, so we can just check the last element to deduplicate,
404
// in the case of two triangles sharing the same vertex within a single meshlet
405
if vertex_to_meshlets.last() != Some(&id_index) {
406
vertex_to_meshlets.push(id_index);
407
}
408
}
409
}
410
411
// For each meshlet pair, count how many vertices they share
412
let mut meshlet_pair_to_shared_vertex_count = <HashMap<_, _>>::default();
413
for vertex_meshlet_ids in vertices_to_meshlets {
414
for (meshlet_id1, meshlet_id2) in vertex_meshlet_ids.into_iter().tuple_combinations() {
415
let count = meshlet_pair_to_shared_vertex_count
416
.entry((meshlet_id1.min(meshlet_id2), meshlet_id1.max(meshlet_id2)))
417
.or_insert(0);
418
*count += 1;
419
}
420
}
421
422
// For each meshlet, gather all other meshlets that share at least one vertex along with their shared vertex count
423
let mut connected_meshlets_per_meshlet = vec![Vec::new(); simplification_queue.len()];
424
for ((meshlet_id1, meshlet_id2), shared_vertex_count) in meshlet_pair_to_shared_vertex_count {
425
// We record both id1->id2 and id2->id1 as adjacency is symmetrical
426
connected_meshlets_per_meshlet[meshlet_id1].push((meshlet_id2, shared_vertex_count));
427
connected_meshlets_per_meshlet[meshlet_id2].push((meshlet_id1, shared_vertex_count));
428
}
429
430
// The order of meshlets depends on hash traversal order; to produce deterministic results, sort them
431
// TODO: Wouldn't it be faster to use a `BTreeMap` above instead of `HashMap` + sorting?
432
for list in connected_meshlets_per_meshlet.iter_mut() {
433
list.sort_unstable();
434
}
435
436
connected_meshlets_per_meshlet
437
}
438
439
// METIS manual: https://github.com/KarypisLab/METIS/blob/e0f1b88b8efcb24ffa0ec55eabb78fbe61e58ae7/manual/manual.pdf
440
fn group_meshlets(
441
simplification_queue: &[u32],
442
meshlet_cull_data: &[TempMeshletCullData],
443
connected_meshlets_per_meshlet: &[Vec<(usize, usize)>],
444
) -> Vec<TempMeshletGroup> {
445
let mut xadj = Vec::with_capacity(simplification_queue.len() + 1);
446
let mut adjncy = Vec::new();
447
let mut adjwgt = Vec::new();
448
for connected_meshlets in connected_meshlets_per_meshlet {
449
xadj.push(adjncy.len() as i32);
450
for (connected_meshlet_id, shared_vertex_count) in connected_meshlets {
451
adjncy.push(*connected_meshlet_id as i32);
452
adjwgt.push(*shared_vertex_count as i32);
453
// TODO: Additional weight based on meshlet spatial proximity
454
}
455
}
456
xadj.push(adjncy.len() as i32);
457
458
let mut options = [-1; metis::NOPTIONS];
459
options[metis::option::Seed::INDEX] = 17;
460
options[metis::option::UFactor::INDEX] = 200;
461
462
let mut group_per_meshlet = vec![0; simplification_queue.len()];
463
let partition_count = simplification_queue
464
.len()
465
.div_ceil(TARGET_MESHLETS_PER_GROUP); // TODO: Nanite uses groups of 8-32, probably based on some kind of heuristic
466
Graph::new(1, partition_count as i32, &xadj, &adjncy)
467
.unwrap()
468
.set_options(&options)
469
.set_adjwgt(&adjwgt)
470
.part_recursive(&mut group_per_meshlet)
471
.unwrap();
472
473
let mut groups = vec![TempMeshletGroup::default(); partition_count];
474
for (i, meshlet_group) in group_per_meshlet.into_iter().enumerate() {
475
let group = &mut groups[meshlet_group as usize];
476
let meshlet_id = simplification_queue[i];
477
478
group.meshlets.push(meshlet_id);
479
let data = &meshlet_cull_data[meshlet_id as usize];
480
group.aabb = group.aabb.merge(&data.aabb);
481
group.lod_bounds = merge_spheres(group.lod_bounds, data.lod_group_sphere);
482
}
483
groups
484
}
485
486
fn lock_group_borders(
487
vertex_locks: &mut [bool],
488
groups: &[TempMeshletGroup],
489
meshlets: &Meshlets,
490
position_only_vertex_remap: &[u32],
491
) {
492
let mut position_only_locks = vec![-1; position_only_vertex_remap.len()];
493
494
// Iterate over position-only based vertices of all meshlets in all groups
495
for (group_id, group) in groups.iter().enumerate() {
496
for &meshlet_id in group.meshlets.iter() {
497
let meshlet = meshlets.get(meshlet_id as usize);
498
for index in meshlet.triangles {
499
let vertex_id =
500
position_only_vertex_remap[meshlet.vertices[*index as usize] as usize] as usize;
501
502
// If the vertex is not yet claimed by any group, or was already claimed by this group
503
if position_only_locks[vertex_id] == -1
504
|| position_only_locks[vertex_id] == group_id as i32
505
{
506
position_only_locks[vertex_id] = group_id as i32; // Then claim the vertex for this group
507
} else {
508
position_only_locks[vertex_id] = -2; // Else vertex was already claimed by another group or was already locked, lock it
509
}
510
}
511
}
512
}
513
514
// Lock vertices used by more than 1 group
515
for i in 0..vertex_locks.len() {
516
let vertex_id = position_only_vertex_remap[i] as usize;
517
vertex_locks[i] = position_only_locks[vertex_id] == -2;
518
}
519
}
520
521
fn simplify_meshlet_group(
522
group: &TempMeshletGroup,
523
meshlets: &Meshlets,
524
vertices: &VertexDataAdapter<'_>,
525
vertex_normals: &[f32],
526
vertex_stride: usize,
527
vertex_locks: &[bool],
528
) -> Option<(Vec<u32>, f32)> {
529
// Build a new index buffer into the mesh vertex data by combining all meshlet data in the group
530
let group_indices = group
531
.meshlets
532
.iter()
533
.flat_map(|&meshlet_id| {
534
let meshlet = meshlets.get(meshlet_id as _);
535
meshlet
536
.triangles
537
.iter()
538
.map(|&meshlet_index| meshlet.vertices[meshlet_index as usize])
539
})
540
.collect::<Vec<_>>();
541
542
// Simplify the group to ~50% triangle count
543
let mut error = 0.0;
544
let simplified_group_indices = simplify_with_attributes_and_locks(
545
&group_indices,
546
vertices,
547
vertex_normals,
548
&[0.5; 3],
549
vertex_stride,
550
vertex_locks,
551
group_indices.len() / 2,
552
f32::MAX,
553
SimplifyOptions::Sparse | SimplifyOptions::ErrorAbsolute,
554
Some(&mut error),
555
);
556
557
// Check if we were able to simplify
558
if simplified_group_indices.len() as f32 / group_indices.len() as f32
559
> SIMPLIFICATION_FAILURE_PERCENTAGE
560
{
561
return None;
562
}
563
564
Some((simplified_group_indices, error))
565
}
566
567
fn merge_meshlets(meshlets: &mut Meshlets, merge: Meshlets) {
568
let vertex_offset = meshlets.vertices.len() as u32;
569
let triangle_offset = meshlets.triangles.len() as u32;
570
meshlets.vertices.extend_from_slice(&merge.vertices);
571
meshlets.triangles.extend_from_slice(&merge.triangles);
572
meshlets
573
.meshlets
574
.extend(merge.meshlets.into_iter().map(|mut meshlet| {
575
meshlet.vertex_offset += vertex_offset;
576
meshlet.triangle_offset += triangle_offset;
577
meshlet
578
}));
579
}
580
581
fn build_and_compress_per_meshlet_vertex_data(
582
meshlet: &meshopt_Meshlet,
583
meshlet_vertex_ids: &[u32],
584
vertex_buffer: &[u8],
585
vertex_stride: usize,
586
vertex_positions: &mut BitVec<u32, Lsb0>,
587
vertex_normals: &mut Vec<u32>,
588
vertex_uvs: &mut Vec<Vec2>,
589
meshlets: &mut Vec<Meshlet>,
590
vertex_position_quantization_factor: u8,
591
) {
592
let start_vertex_position_bit = vertex_positions.len() as u32;
593
let start_vertex_attribute_id = vertex_normals.len() as u32;
594
595
let quantization_factor =
596
(1 << vertex_position_quantization_factor) as f32 * CENTIMETERS_PER_METER;
597
598
let mut min_quantized_position_channels = IVec3::MAX;
599
let mut max_quantized_position_channels = IVec3::MIN;
600
601
// Lossy vertex compression
602
let mut quantized_positions = [IVec3::ZERO; 256];
603
for (i, vertex_id) in meshlet_vertex_ids.iter().enumerate() {
604
// Load source vertex attributes
605
let vertex_id_byte = *vertex_id as usize * vertex_stride;
606
let vertex_data = &vertex_buffer[vertex_id_byte..(vertex_id_byte + vertex_stride)];
607
let position = Vec3::from_slice(bytemuck::cast_slice(&vertex_data[0..12]));
608
let normal = Vec3::from_slice(bytemuck::cast_slice(&vertex_data[12..24]));
609
let uv = Vec2::from_slice(bytemuck::cast_slice(&vertex_data[24..32]));
610
611
// Copy uncompressed UV
612
vertex_uvs.push(uv);
613
614
// Compress normal
615
vertex_normals.push(pack2x16snorm(octahedral_encode(normal)));
616
617
// Quantize position to a fixed-point IVec3
618
let quantized_position = (position * quantization_factor + 0.5).as_ivec3();
619
quantized_positions[i] = quantized_position;
620
621
// Compute per X/Y/Z-channel quantized position min/max for this meshlet
622
min_quantized_position_channels = min_quantized_position_channels.min(quantized_position);
623
max_quantized_position_channels = max_quantized_position_channels.max(quantized_position);
624
}
625
626
// Calculate bits needed to encode each quantized vertex position channel based on the range of each channel
627
let range = max_quantized_position_channels - min_quantized_position_channels + 1;
628
let bits_per_vertex_position_channel_x = log2(range.x as f32).ceil() as u8;
629
let bits_per_vertex_position_channel_y = log2(range.y as f32).ceil() as u8;
630
let bits_per_vertex_position_channel_z = log2(range.z as f32).ceil() as u8;
631
632
// Lossless encoding of vertex positions in the minimum number of bits per channel
633
for quantized_position in quantized_positions.iter().take(meshlet_vertex_ids.len()) {
634
// Remap [range_min, range_max] IVec3 to [0, range_max - range_min] UVec3
635
let position = (quantized_position - min_quantized_position_channels).as_uvec3();
636
637
// Store as a packed bitstream
638
vertex_positions.extend_from_bitslice(
639
&position.x.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_x as usize],
640
);
641
vertex_positions.extend_from_bitslice(
642
&position.y.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_y as usize],
643
);
644
vertex_positions.extend_from_bitslice(
645
&position.z.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_z as usize],
646
);
647
}
648
649
meshlets.push(Meshlet {
650
start_vertex_position_bit,
651
start_vertex_attribute_id,
652
start_index_id: meshlet.triangle_offset,
653
vertex_count_minus_one: (meshlet.vertex_count - 1) as u8,
654
triangle_count: meshlet.triangle_count as u8,
655
padding: 0,
656
bits_per_vertex_position_channel_x,
657
bits_per_vertex_position_channel_y,
658
bits_per_vertex_position_channel_z,
659
vertex_position_quantization_factor,
660
min_vertex_position_channel_x: min_quantized_position_channels.x as f32,
661
min_vertex_position_channel_y: min_quantized_position_channels.y as f32,
662
min_vertex_position_channel_z: min_quantized_position_channels.z as f32,
663
});
664
}
665
666
fn merge_spheres(a: BoundingSphere, b: BoundingSphere) -> BoundingSphere {
667
let sr = a.radius().min(b.radius());
668
let br = a.radius().max(b.radius());
669
let len = a.center.distance(b.center);
670
if len + sr <= br || sr == 0.0 || len == 0.0 {
671
if a.radius() > b.radius() {
672
a
673
} else {
674
b
675
}
676
} else {
677
let radius = (sr + br + len) / 2.0;
678
let center =
679
(a.center + b.center + (a.radius() - b.radius()) * (a.center - b.center) / len) / 2.0;
680
BoundingSphere::new(center, radius)
681
}
682
}
683
684
#[derive(Copy, Clone)]
685
struct TempMeshletCullData {
686
aabb: Aabb3d,
687
lod_group_sphere: BoundingSphere,
688
error: f32,
689
}
690
691
#[derive(Clone)]
692
struct TempMeshletGroup {
693
aabb: Aabb3d,
694
lod_bounds: BoundingSphere,
695
parent_error: f32,
696
meshlets: SmallVec<[u32; TARGET_MESHLETS_PER_GROUP]>,
697
}
698
699
impl Default for TempMeshletGroup {
700
fn default() -> Self {
701
Self {
702
aabb: aabb_default(), // Default AABB to merge into
703
lod_bounds: BoundingSphere::new(Vec3A::ZERO, 0.0),
704
parent_error: f32::MAX,
705
meshlets: SmallVec::new(),
706
}
707
}
708
}
709
710
// All the BVH build code was stolen from https://github.com/SparkyPotato/radiance/blob/4aa17a3a5be7a0466dc69713e249bbcee9f46057/crates/rad-renderer/src/assets/mesh/virtual_mesh.rs because it works and I'm lazy and don't want to reimplement it
711
struct TempBvhNode {
712
group: u32,
713
aabb: Aabb3d,
714
children: SmallVec<[u32; 8]>,
715
}
716
717
#[derive(Default)]
718
struct BvhBuilder {
719
nodes: Vec<TempBvhNode>,
720
lods: Vec<Range<u32>>,
721
}
722
723
impl BvhBuilder {
724
fn add_lod(&mut self, offset: u32, all_groups: &[TempMeshletGroup]) {
725
let first = self.nodes.len() as u32;
726
self.nodes.extend(
727
all_groups
728
.iter()
729
.enumerate()
730
.skip(offset as _)
731
.map(|(i, group)| TempBvhNode {
732
group: i as u32,
733
aabb: group.aabb,
734
children: SmallVec::new(),
735
}),
736
);
737
let end = self.nodes.len() as u32;
738
if first != end {
739
self.lods.push(first..end);
740
}
741
}
742
743
fn surface_area(&self, nodes: &[u32]) -> f32 {
744
nodes
745
.iter()
746
.map(|&x| self.nodes[x as usize].aabb)
747
.reduce(|a, b| a.merge(&b))
748
.expect("cannot find surface area of zero nodes")
749
.visible_area()
750
}
751
752
fn sort_nodes_by_sah(&self, nodes: &mut [u32], splits: [usize; 8]) {
753
// We use a BVH8, so just recursively binary split 3 times for near-optimal SAH
754
for i in 0..3 {
755
let parts = 1 << i; // 2^i
756
let nodes_per_split = 8 >> i; // 8 / 2^i
757
let half_count = nodes_per_split / 2;
758
let mut offset = 0;
759
for p in 0..parts {
760
let first = p * nodes_per_split;
761
let mut s0 = 0;
762
let mut s1 = 0;
763
for i in 0..half_count {
764
s0 += splits[first + i];
765
s1 += splits[first + half_count + i];
766
}
767
let c = s0 + s1;
768
let nodes = &mut nodes[offset..(offset + c)];
769
offset += c;
770
771
let mut cost = f32::MAX;
772
let mut axis = 0;
773
let key = |x, ax| self.nodes[x as usize].aabb.center()[ax];
774
for ax in 0..3 {
775
nodes.sort_unstable_by(|&x, &y| key(x, ax).partial_cmp(&key(y, ax)).unwrap());
776
let (left, right) = nodes.split_at(s0);
777
let c = self.surface_area(left) + self.surface_area(right);
778
if c < cost {
779
axis = ax;
780
cost = c;
781
}
782
}
783
if axis != 2 {
784
nodes.sort_unstable_by(|&x, &y| {
785
key(x, axis).partial_cmp(&key(y, axis)).unwrap()
786
});
787
}
788
}
789
}
790
}
791
792
fn build_temp_inner(&mut self, nodes: &mut [u32], optimize: bool) -> u32 {
793
let count = nodes.len();
794
if count == 1 {
795
nodes[0]
796
} else if count <= 8 {
797
let i = self.nodes.len();
798
self.nodes.push(TempBvhNode {
799
group: u32::MAX,
800
aabb: aabb_default(),
801
children: nodes.iter().copied().collect(),
802
});
803
i as _
804
} else {
805
// We need to split the nodes into 8 groups, with the smallest possible tree depth.
806
// Additionally, no child should be more than one level deeper than the others.
807
// At `l` levels, we can fit upto 8^l nodes.
808
// The `max_child_size` is the largest power of 8 <= `count` (any larger and we'd have
809
// unfilled nodes).
810
// The `min_child_size` is thus 1 level (8 times) smaller.
811
// After distributing `min_child_size` to all children, we have distributed
812
// `min_child_size * 8` nodes (== `max_child_size`).
813
// The remaining nodes are then distributed left to right.
814
let max_child_size = 1 << ((count.ilog2() / 3) * 3);
815
let min_child_size = max_child_size >> 3;
816
let max_extra_per_node = max_child_size - min_child_size;
817
let mut extra = count - max_child_size; // 8 * min_child_size
818
let splits = core::array::from_fn(|_| {
819
let size = extra.min(max_extra_per_node);
820
extra -= size;
821
min_child_size + size
822
});
823
824
if optimize {
825
self.sort_nodes_by_sah(nodes, splits);
826
}
827
828
let mut offset = 0;
829
let children = splits
830
.into_iter()
831
.map(|size| {
832
let i = self.build_temp_inner(&mut nodes[offset..(offset + size)], optimize);
833
offset += size;
834
i
835
})
836
.collect();
837
838
let i = self.nodes.len();
839
self.nodes.push(TempBvhNode {
840
group: u32::MAX,
841
aabb: aabb_default(),
842
children,
843
});
844
i as _
845
}
846
}
847
848
fn build_temp(&mut self) -> u32 {
849
let mut lods = Vec::with_capacity(self.lods.len());
850
for lod in core::mem::take(&mut self.lods) {
851
let mut lod: Vec<_> = lod.collect();
852
let root = self.build_temp_inner(&mut lod, true);
853
let node = &self.nodes[root as usize];
854
if node.group != u32::MAX || node.children.len() == 8 {
855
lods.push(root);
856
} else {
857
lods.extend(node.children.iter().copied());
858
}
859
}
860
self.build_temp_inner(&mut lods, false)
861
}
862
863
fn build_inner(
864
&self,
865
groups: &[TempMeshletGroup],
866
out: &mut Vec<BvhNode>,
867
max_depth: &mut u32,
868
node: u32,
869
depth: u32,
870
) -> u32 {
871
*max_depth = depth.max(*max_depth);
872
let node = &self.nodes[node as usize];
873
let onode = out.len();
874
out.push(BvhNode::default());
875
876
for (i, &child_id) in node.children.iter().enumerate() {
877
let child = &self.nodes[child_id as usize];
878
if child.group != u32::MAX {
879
let group = &groups[child.group as usize];
880
let out = &mut out[onode];
881
out.aabbs[i] = aabb_to_meshlet(group.aabb, group.parent_error, group.meshlets[0]);
882
out.lod_bounds[i] = sphere_to_meshlet(group.lod_bounds);
883
out.child_counts[i] = group.meshlets[1] as _;
884
} else {
885
let child_id = self.build_inner(groups, out, max_depth, child_id, depth + 1);
886
let child = &out[child_id as usize];
887
let mut aabb = aabb_default();
888
let mut parent_error = 0.0f32;
889
let mut lod_bounds = BoundingSphere::new(Vec3A::ZERO, 0.0);
890
for i in 0..8 {
891
if child.child_counts[i] == 0 {
892
break;
893
}
894
895
aabb = aabb.merge(&Aabb3d::new(
896
child.aabbs[i].center,
897
child.aabbs[i].half_extent,
898
));
899
lod_bounds = merge_spheres(
900
lod_bounds,
901
BoundingSphere::new(child.lod_bounds[i].center, child.lod_bounds[i].radius),
902
);
903
parent_error = parent_error.max(child.aabbs[i].error);
904
}
905
906
let out = &mut out[onode];
907
out.aabbs[i] = aabb_to_meshlet(aabb, parent_error, child_id);
908
out.lod_bounds[i] = sphere_to_meshlet(lod_bounds);
909
out.child_counts[i] = u8::MAX;
910
}
911
}
912
913
onode as _
914
}
915
916
fn build(
917
mut self,
918
meshlets: &mut Meshlets,
919
mut groups: Vec<TempMeshletGroup>,
920
cull_data: &mut Vec<TempMeshletCullData>,
921
) -> (Vec<BvhNode>, MeshletAabb, u32) {
922
// The BVH requires group meshlets to be contiguous, so remap them first.
923
let mut remap = Vec::with_capacity(meshlets.meshlets.len());
924
let mut remapped_cull_data = Vec::with_capacity(cull_data.len());
925
for group in groups.iter_mut() {
926
let first = remap.len() as u32;
927
let count = group.meshlets.len() as u32;
928
remap.extend(
929
group
930
.meshlets
931
.iter()
932
.map(|&m| meshlets.meshlets[m as usize]),
933
);
934
remapped_cull_data.extend(group.meshlets.iter().map(|&m| cull_data[m as usize]));
935
group.meshlets.resize(2, 0);
936
group.meshlets[0] = first;
937
group.meshlets[1] = count;
938
}
939
meshlets.meshlets = remap;
940
*cull_data = remapped_cull_data;
941
942
let mut out = vec![];
943
let mut aabb = aabb_default();
944
let mut max_depth = 0;
945
946
if self.nodes.len() == 1 {
947
let mut o = BvhNode::default();
948
let group = &groups[0];
949
o.aabbs[0] = aabb_to_meshlet(group.aabb, group.parent_error, group.meshlets[0]);
950
o.lod_bounds[0] = sphere_to_meshlet(group.lod_bounds);
951
o.child_counts[0] = group.meshlets[1] as _;
952
out.push(o);
953
aabb = group.aabb;
954
max_depth = 1;
955
} else {
956
let root = self.build_temp();
957
let root = self.build_inner(&groups, &mut out, &mut max_depth, root, 1);
958
assert_eq!(root, 0, "root must be 0");
959
960
let root = &out[0];
961
for i in 0..8 {
962
if root.child_counts[i] == 0 {
963
break;
964
}
965
966
aabb = aabb.merge(&Aabb3d::new(
967
root.aabbs[i].center,
968
root.aabbs[i].half_extent,
969
));
970
}
971
}
972
973
let mut reachable = vec![false; meshlets.meshlets.len()];
974
verify_bvh(&out, cull_data, &mut reachable, 0);
975
assert!(
976
reachable.iter().all(|&x| x),
977
"all meshlets must be reachable"
978
);
979
980
(
981
out,
982
MeshletAabb {
983
center: aabb.center().into(),
984
half_extent: aabb.half_size().into(),
985
},
986
max_depth,
987
)
988
}
989
}
990
991
fn verify_bvh(
992
out: &[BvhNode],
993
cull_data: &[TempMeshletCullData],
994
reachable: &mut [bool],
995
node: u32,
996
) {
997
let node = &out[node as usize];
998
for i in 0..8 {
999
let sphere = node.lod_bounds[i];
1000
let error = node.aabbs[i].error;
1001
if node.child_counts[i] == u8::MAX {
1002
let child = &out[node.aabbs[i].child_offset as usize];
1003
for i in 0..8 {
1004
if child.child_counts[i] == 0 {
1005
break;
1006
}
1007
assert!(
1008
child.aabbs[i].error <= error,
1009
"BVH errors are not monotonic"
1010
);
1011
let sphere_error = (sphere.center - child.lod_bounds[i].center).length()
1012
- (sphere.radius - child.lod_bounds[i].radius);
1013
assert!(
1014
sphere_error <= 0.0001,
1015
"BVH lod spheres are not monotonic ({sphere_error})"
1016
);
1017
}
1018
verify_bvh(out, cull_data, reachable, node.aabbs[i].child_offset);
1019
} else {
1020
for m in 0..node.child_counts[i] as u32 {
1021
let mid = (m + node.aabbs[i].child_offset) as usize;
1022
let meshlet = &cull_data[mid];
1023
assert!(meshlet.error <= error, "meshlet errors are not monotonic");
1024
let sphere_error = (Vec3A::from(sphere.center) - meshlet.lod_group_sphere.center)
1025
.length()
1026
- (sphere.radius - meshlet.lod_group_sphere.radius());
1027
assert!(
1028
sphere_error <= 0.0001,
1029
"meshlet lod spheres are not monotonic: ({sphere_error})"
1030
);
1031
reachable[mid] = true;
1032
}
1033
}
1034
}
1035
}
1036
1037
fn aabb_default() -> Aabb3d {
1038
Aabb3d {
1039
min: Vec3A::INFINITY,
1040
max: Vec3A::NEG_INFINITY,
1041
}
1042
}
1043
1044
fn aabb_to_meshlet(aabb: Aabb3d, error: f32, child_offset: u32) -> MeshletAabbErrorOffset {
1045
MeshletAabbErrorOffset {
1046
center: aabb.center().into(),
1047
error,
1048
half_extent: aabb.half_size().into(),
1049
child_offset,
1050
}
1051
}
1052
1053
fn sphere_to_meshlet(sphere: BoundingSphere) -> MeshletBoundingSphere {
1054
MeshletBoundingSphere {
1055
center: sphere.center.into(),
1056
radius: sphere.radius(),
1057
}
1058
}
1059
1060
// TODO: Precise encode variant
1061
fn octahedral_encode(v: Vec3) -> Vec2 {
1062
let n = v / (v.x.abs() + v.y.abs() + v.z.abs());
1063
let octahedral_wrap = (1.0 - n.yx().abs())
1064
* Vec2::new(
1065
if n.x >= 0.0 { 1.0 } else { -1.0 },
1066
if n.y >= 0.0 { 1.0 } else { -1.0 },
1067
);
1068
if n.z >= 0.0 {
1069
n.xy()
1070
} else {
1071
octahedral_wrap
1072
}
1073
}
1074
1075
// https://www.w3.org/TR/WGSL/#pack2x16snorm-builtin
1076
fn pack2x16snorm(v: Vec2) -> u32 {
1077
let v = v.clamp(Vec2::NEG_ONE, Vec2::ONE);
1078
let v = (v * 32767.0 + 0.5).floor().as_i16vec2();
1079
bytemuck::cast(v)
1080
}
1081
1082
/// An error produced by [`MeshletMesh::from_mesh`].
1083
#[derive(Error, Debug)]
1084
pub enum MeshToMeshletMeshConversionError {
1085
#[error("Mesh primitive topology is not TriangleList")]
1086
WrongMeshPrimitiveTopology,
1087
#[error("Mesh vertex attributes are not {{POSITION, NORMAL, UV_0}}: {0:?}")]
1088
WrongMeshVertexAttributes(Vec<String>),
1089
#[error("Mesh has no indices")]
1090
MeshMissingIndices,
1091
}
1092
1093