Path: blob/main/crates/bevy_pbr/src/meshlet/from_mesh.rs
9423 views
use crate::meshlet::asset::{MeshletAabb, MeshletAabbErrorOffset, MeshletCullData};12use super::asset::{BvhNode, Meshlet, MeshletBoundingSphere, MeshletMesh};3use alloc::borrow::Cow;4use bevy_math::{5bounding::{Aabb3d, BoundingSphere, BoundingVolume},6ops::log2,7IVec3, Isometry3d, Vec2, Vec3, Vec3A, Vec3Swizzles,8};9use bevy_mesh::{Indices, Mesh};10use bevy_platform::collections::HashMap;11use bevy_render::render_resource::PrimitiveTopology;12use bevy_tasks::{AsyncComputeTaskPool, ParallelSlice};13use bitvec::{order::Lsb0, vec::BitVec, view::BitView};14use core::{f32, ops::Range};15use itertools::Itertools;16use meshopt::{17build_meshlets, ffi::meshopt_Meshlet, generate_position_remap,18simplify_with_attributes_and_locks, Meshlets, SimplifyOptions, VertexDataAdapter,19};20use metis::{option::Opt, Graph};21use smallvec::SmallVec;22use thiserror::Error;23use tracing::debug_span;2425// Aim to have 8 meshlets per group26const TARGET_MESHLETS_PER_GROUP: usize = 8;27// Reject groups that keep over 60% of their original triangles. We'd much rather render a few28// extra triangles than create too many meshlets, increasing cull overhead.29const SIMPLIFICATION_FAILURE_PERCENTAGE: f32 = 0.60;3031/// Default vertex position quantization factor for use with [`MeshletMesh::from_mesh`].32///33/// Snaps vertices to the nearest 1/16th of a centimeter (1/2^4).34pub const MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR: u8 = 4;3536const CENTIMETERS_PER_METER: f32 = 100.0;3738impl MeshletMesh {39/// Process a [`Mesh`] to generate a [`MeshletMesh`].40///41/// This process is very slow, and should be done ahead of time, and not at runtime.42///43/// # Requirements44///45/// This function requires the `meshlet_processor` cargo feature.46///47/// The input mesh must:48/// 1. Use [`PrimitiveTopology::TriangleList`]49/// 2. Use indices50/// 3. Have the exact following set of vertex attributes: `{POSITION, NORMAL, UV_0}` (tangents can be used in material shaders, but are calculated at runtime and are not stored in the mesh)51///52/// # Vertex precision53///54/// `vertex_position_quantization_factor` is the amount of precision to use when quantizing vertex positions.55///56/// Vertices are snapped to the nearest (1/2^x)th of a centimeter, where x = `vertex_position_quantization_factor`.57/// E.g. if x = 4, then vertices are snapped to the nearest 1/2^4 = 1/16th of a centimeter.58///59/// Use [`MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR`] as a default, adjusting lower to save memory and disk space, and higher to prevent artifacts if needed.60///61/// To ensure that two different meshes do not have cracks between them when placed directly next to each other:62/// * Use the same quantization factor when converting each mesh to a meshlet mesh63/// * Ensure that their [`bevy_transform::components::Transform::translation`]s are a multiple of 1/2^x centimeters (note that translations are in meters)64/// * Ensure that their [`bevy_transform::components::Transform::scale`]s are the same65/// * Ensure that their [`bevy_transform::components::Transform::rotation`]s are a multiple of 90 degrees66pub fn from_mesh(67mesh: &Mesh,68vertex_position_quantization_factor: u8,69) -> Result<Self, MeshToMeshletMeshConversionError> {70let s = debug_span!("build meshlet mesh");71let _e = s.enter();7273// Validate mesh format74let indices = validate_input_mesh(mesh)?;7576// Get meshlet vertices77let vertex_buffer = mesh.create_packed_vertex_buffer_data();78let vertex_stride = mesh.get_vertex_size() as usize;79let vertices = VertexDataAdapter::new(&vertex_buffer, vertex_stride, 0).unwrap();80let vertex_normals = bytemuck::cast_slice(&vertex_buffer[12..16]);8182// Generate a position-only vertex buffer for determining triangle/meshlet connectivity83let position_only_vertex_remap = generate_position_remap(&vertices);8485// Split the mesh into an initial list of meshlets (LOD 0)86let (mut meshlets, mut cull_data) =87compute_meshlets(&indices, &vertices, &position_only_vertex_remap, None);8889let mut vertex_locks = vec![false; vertices.vertex_count];9091// Build further LODs92let mut bvh = BvhBuilder::default();93let mut all_groups = Vec::new();94let mut simplification_queue: Vec<_> = (0..meshlets.len() as u32).collect();95let mut stuck = Vec::new();96while !simplification_queue.is_empty() {97let s = debug_span!("simplify lod", meshlets = simplification_queue.len());98let _e = s.enter();99100// For each meshlet build a list of connected meshlets (meshlets that share a vertex)101let connected_meshlets_per_meshlet = find_connected_meshlets(102&simplification_queue,103&meshlets,104&position_only_vertex_remap,105);106107// Group meshlets into roughly groups of size TARGET_MESHLETS_PER_GROUP,108// grouping meshlets with a high number of shared vertices109let groups = group_meshlets(110&simplification_queue,111&cull_data,112&connected_meshlets_per_meshlet,113);114simplification_queue.clear();115116// Lock borders between groups to prevent cracks when simplifying117lock_group_borders(118&mut vertex_locks,119&groups,120&meshlets,121&position_only_vertex_remap,122);123124let simplified = groups.par_chunk_map(AsyncComputeTaskPool::get(), 1, |_, groups| {125let mut group = groups[0].clone();126127// If the group only has a single meshlet we can't simplify it128if group.meshlets.len() == 1 {129return Err(group);130}131132let s = debug_span!("simplify group", meshlets = group.meshlets.len());133let _e = s.enter();134135// Simplify the group to ~50% triangle count136let Some((simplified_group_indices, mut group_error)) = simplify_meshlet_group(137&group,138&meshlets,139&vertices,140vertex_normals,141vertex_stride,142&vertex_locks,143) else {144// Couldn't simplify the group enough145return Err(group);146};147148// Force the group error to be atleast as large as all of its constituent meshlet's149// individual errors.150for &id in group.meshlets.iter() {151group_error = group_error.max(cull_data[id as usize].error);152}153group.parent_error = group_error;154155// Build new meshlets using the simplified group156let new_meshlets = compute_meshlets(157&simplified_group_indices,158&vertices,159&position_only_vertex_remap,160Some((group.lod_bounds, group.parent_error)),161);162163Ok((group, new_meshlets))164});165166let first_group = all_groups.len() as u32;167let mut passed_tris = 0;168let mut stuck_tris = 0;169for group in simplified {170match group {171Ok((group, (new_meshlets, new_cull_data))) => {172let start = meshlets.len();173merge_meshlets(&mut meshlets, new_meshlets);174cull_data.extend(new_cull_data);175let end = meshlets.len();176let new_meshlet_ids = start as u32..end as u32;177178passed_tris += triangles_in_meshlets(&meshlets, new_meshlet_ids.clone());179simplification_queue.extend(new_meshlet_ids);180all_groups.push(group);181}182Err(group) => {183stuck_tris +=184triangles_in_meshlets(&meshlets, group.meshlets.iter().copied());185stuck.push(group);186}187}188}189190// If we have enough triangles that passed, we can retry simplifying the stuck191// meshlets.192if passed_tris > stuck_tris / 3 {193simplification_queue.extend(stuck.drain(..).flat_map(|group| group.meshlets));194}195196bvh.add_lod(first_group, &all_groups);197}198199// If there's any stuck meshlets left, add another LOD level with only them200if !stuck.is_empty() {201let first_group = all_groups.len() as u32;202all_groups.extend(stuck);203bvh.add_lod(first_group, &all_groups);204}205206let (bvh, aabb, depth) = bvh.build(&mut meshlets, all_groups, &mut cull_data);207208// Copy vertex attributes per meshlet and compress209let mut vertex_positions = BitVec::<u32, Lsb0>::new();210let mut vertex_normals = Vec::new();211let mut vertex_uvs = Vec::new();212let mut bevy_meshlets = Vec::with_capacity(meshlets.len());213for (i, meshlet) in meshlets.meshlets.iter().enumerate() {214build_and_compress_per_meshlet_vertex_data(215meshlet,216meshlets.get(i).vertices,217&vertex_buffer,218vertex_stride,219&mut vertex_positions,220&mut vertex_normals,221&mut vertex_uvs,222&mut bevy_meshlets,223vertex_position_quantization_factor,224);225}226vertex_positions.set_uninitialized(false);227228Ok(Self {229vertex_positions: vertex_positions.into_vec().into(),230vertex_normals: vertex_normals.into(),231vertex_uvs: vertex_uvs.into(),232indices: meshlets.triangles.into(),233bvh: bvh.into(),234meshlets: bevy_meshlets.into(),235meshlet_cull_data: cull_data236.into_iter()237.map(|cull_data| MeshletCullData {238aabb: aabb_to_meshlet(cull_data.aabb, cull_data.error, 0),239lod_group_sphere: sphere_to_meshlet(cull_data.lod_group_sphere),240})241.collect(),242aabb,243bvh_depth: depth,244})245}246}247248fn validate_input_mesh(mesh: &Mesh) -> Result<Cow<'_, [u32]>, MeshToMeshletMeshConversionError> {249if mesh.primitive_topology() != PrimitiveTopology::TriangleList {250return Err(MeshToMeshletMeshConversionError::WrongMeshPrimitiveTopology);251}252253if mesh.attributes().map(|(attribute, _)| attribute.id).ne([254Mesh::ATTRIBUTE_POSITION.id,255Mesh::ATTRIBUTE_NORMAL.id,256Mesh::ATTRIBUTE_UV_0.id,257]) {258return Err(MeshToMeshletMeshConversionError::WrongMeshVertexAttributes(259mesh.attributes()260.map(|(attribute, _)| format!("{attribute:?}"))261.collect(),262));263}264265match mesh.indices() {266Some(Indices::U32(indices)) => Ok(Cow::Borrowed(indices.as_slice())),267Some(Indices::U16(indices)) => Ok(indices.iter().map(|i| *i as u32).collect()),268_ => Err(MeshToMeshletMeshConversionError::MeshMissingIndices),269}270}271272fn triangles_in_meshlets(meshlets: &Meshlets, ids: impl IntoIterator<Item = u32>) -> u32 {273ids.into_iter()274.map(|id| meshlets.get(id as _).triangles.len() as u32 / 3)275.sum()276}277278fn compute_meshlets(279indices: &[u32],280vertices: &VertexDataAdapter,281position_only_vertex_remap: &[u32],282prev_lod_data: Option<(BoundingSphere, f32)>,283) -> (Meshlets, Vec<TempMeshletCullData>) {284// For each vertex, build a list of all triangles that use it285let mut vertices_to_triangles = vec![Vec::new(); position_only_vertex_remap.len()];286for (i, index) in indices.iter().enumerate() {287let vertex_id = position_only_vertex_remap[*index as usize];288let vertex_to_triangles = &mut vertices_to_triangles[vertex_id as usize];289vertex_to_triangles.push(i / 3);290}291292// For each triangle pair, count how many vertices they share293let mut triangle_pair_to_shared_vertex_count = <HashMap<_, _>>::default();294for vertex_triangle_ids in vertices_to_triangles {295for (triangle_id1, triangle_id2) in vertex_triangle_ids.into_iter().tuple_combinations() {296let count = triangle_pair_to_shared_vertex_count297.entry((298triangle_id1.min(triangle_id2),299triangle_id1.max(triangle_id2),300))301.or_insert(0);302*count += 1;303}304}305306// For each triangle, gather all other triangles that share at least one vertex along with their shared vertex count307let triangle_count = indices.len() / 3;308let mut connected_triangles_per_triangle = vec![Vec::new(); triangle_count];309for ((triangle_id1, triangle_id2), shared_vertex_count) in triangle_pair_to_shared_vertex_count310{311// We record both id1->id2 and id2->id1 as adjacency is symmetrical312connected_triangles_per_triangle[triangle_id1].push((triangle_id2, shared_vertex_count));313connected_triangles_per_triangle[triangle_id2].push((triangle_id1, shared_vertex_count));314}315316// The order of triangles depends on hash traversal order; to produce deterministic results, sort them317// TODO: Wouldn't it be faster to use a `BTreeMap` above instead of `HashMap` + sorting?318for list in connected_triangles_per_triangle.iter_mut() {319list.sort_unstable();320}321322let mut xadj = Vec::with_capacity(triangle_count + 1);323let mut adjncy = Vec::new();324let mut adjwgt = Vec::new();325for connected_triangles in connected_triangles_per_triangle {326xadj.push(adjncy.len() as i32);327for (connected_triangle_id, shared_vertex_count) in connected_triangles {328adjncy.push(connected_triangle_id as i32);329adjwgt.push(shared_vertex_count);330// TODO: Additional weight based on triangle center spatial proximity?331}332}333xadj.push(adjncy.len() as i32);334335let mut options = [-1; metis::NOPTIONS];336options[metis::option::Seed::INDEX] = 17;337options[metis::option::UFactor::INDEX] = 1; // Important that there's very little imbalance between partitions338339let mut meshlet_per_triangle = vec![0; triangle_count];340let partition_count = triangle_count.div_ceil(126); // Need to undershoot to prevent METIS from going over 128 triangles per meshlet341Graph::new(1, partition_count as i32, &xadj, &adjncy)342.unwrap()343.set_options(&options)344.set_adjwgt(&adjwgt)345.part_recursive(&mut meshlet_per_triangle)346.unwrap();347348let mut indices_per_meshlet = vec![Vec::new(); partition_count];349for (triangle_id, meshlet) in meshlet_per_triangle.into_iter().enumerate() {350let meshlet_indices = &mut indices_per_meshlet[meshlet as usize];351let base_index = triangle_id * 3;352meshlet_indices.extend_from_slice(&indices[base_index..(base_index + 3)]);353}354355// Use meshopt to build meshlets from the sets of triangles356let mut meshlets = Meshlets {357meshlets: Vec::new(),358vertices: Vec::new(),359triangles: Vec::new(),360};361let mut cull_data = Vec::new();362let get_vertex = |&v: &u32| {363*bytemuck::from_bytes::<Vec3>(364&vertices.reader.get_ref()365[vertices.position_offset + v as usize * vertices.vertex_stride..][..12],366)367};368for meshlet_indices in &indices_per_meshlet {369let meshlet = build_meshlets(meshlet_indices, vertices, 256, 128, 0.0);370for meshlet in meshlet.iter() {371let (lod_group_sphere, error) = prev_lod_data.unwrap_or_else(|| {372let bounds = meshopt::compute_meshlet_bounds(meshlet, vertices);373(BoundingSphere::new(bounds.center, bounds.radius), 0.0)374});375376cull_data.push(TempMeshletCullData {377aabb: Aabb3d::from_point_cloud(378Isometry3d::IDENTITY,379meshlet.vertices.iter().map(get_vertex),380),381lod_group_sphere,382error,383});384}385merge_meshlets(&mut meshlets, meshlet);386}387(meshlets, cull_data)388}389390fn find_connected_meshlets(391simplification_queue: &[u32],392meshlets: &Meshlets,393position_only_vertex_remap: &[u32],394) -> Vec<Vec<(usize, usize)>> {395// For each vertex, build a list of all meshlets that use it396let mut vertices_to_meshlets = vec![Vec::new(); position_only_vertex_remap.len()];397for (id_index, &meshlet_id) in simplification_queue.iter().enumerate() {398let meshlet = meshlets.get(meshlet_id as _);399for index in meshlet.triangles {400let vertex_id = position_only_vertex_remap[meshlet.vertices[*index as usize] as usize];401let vertex_to_meshlets = &mut vertices_to_meshlets[vertex_id as usize];402// Meshlets are added in order, so we can just check the last element to deduplicate,403// in the case of two triangles sharing the same vertex within a single meshlet404if vertex_to_meshlets.last() != Some(&id_index) {405vertex_to_meshlets.push(id_index);406}407}408}409410// For each meshlet pair, count how many vertices they share411let mut meshlet_pair_to_shared_vertex_count = <HashMap<_, _>>::default();412for vertex_meshlet_ids in vertices_to_meshlets {413for (meshlet_id1, meshlet_id2) in vertex_meshlet_ids.into_iter().tuple_combinations() {414let count = meshlet_pair_to_shared_vertex_count415.entry((meshlet_id1.min(meshlet_id2), meshlet_id1.max(meshlet_id2)))416.or_insert(0);417*count += 1;418}419}420421// For each meshlet, gather all other meshlets that share at least one vertex along with their shared vertex count422let mut connected_meshlets_per_meshlet = vec![Vec::new(); simplification_queue.len()];423for ((meshlet_id1, meshlet_id2), shared_vertex_count) in meshlet_pair_to_shared_vertex_count {424// We record both id1->id2 and id2->id1 as adjacency is symmetrical425connected_meshlets_per_meshlet[meshlet_id1].push((meshlet_id2, shared_vertex_count));426connected_meshlets_per_meshlet[meshlet_id2].push((meshlet_id1, shared_vertex_count));427}428429// The order of meshlets depends on hash traversal order; to produce deterministic results, sort them430// TODO: Wouldn't it be faster to use a `BTreeMap` above instead of `HashMap` + sorting?431for list in connected_meshlets_per_meshlet.iter_mut() {432list.sort_unstable();433}434435connected_meshlets_per_meshlet436}437438// METIS manual: https://github.com/KarypisLab/METIS/blob/e0f1b88b8efcb24ffa0ec55eabb78fbe61e58ae7/manual/manual.pdf439fn group_meshlets(440simplification_queue: &[u32],441meshlet_cull_data: &[TempMeshletCullData],442connected_meshlets_per_meshlet: &[Vec<(usize, usize)>],443) -> Vec<TempMeshletGroup> {444let mut xadj = Vec::with_capacity(simplification_queue.len() + 1);445let mut adjncy = Vec::new();446let mut adjwgt = Vec::new();447for connected_meshlets in connected_meshlets_per_meshlet {448xadj.push(adjncy.len() as i32);449for (connected_meshlet_id, shared_vertex_count) in connected_meshlets {450adjncy.push(*connected_meshlet_id as i32);451adjwgt.push(*shared_vertex_count as i32);452// TODO: Additional weight based on meshlet spatial proximity453}454}455xadj.push(adjncy.len() as i32);456457let mut options = [-1; metis::NOPTIONS];458options[metis::option::Seed::INDEX] = 17;459options[metis::option::UFactor::INDEX] = 200;460461let mut group_per_meshlet = vec![0; simplification_queue.len()];462let partition_count = simplification_queue463.len()464.div_ceil(TARGET_MESHLETS_PER_GROUP); // TODO: Nanite uses groups of 8-32, probably based on some kind of heuristic465Graph::new(1, partition_count as i32, &xadj, &adjncy)466.unwrap()467.set_options(&options)468.set_adjwgt(&adjwgt)469.part_recursive(&mut group_per_meshlet)470.unwrap();471472let mut groups = vec![TempMeshletGroup::default(); partition_count];473for (i, meshlet_group) in group_per_meshlet.into_iter().enumerate() {474let group = &mut groups[meshlet_group as usize];475let meshlet_id = simplification_queue[i];476477group.meshlets.push(meshlet_id);478let data = &meshlet_cull_data[meshlet_id as usize];479group.aabb = group.aabb.merge(&data.aabb);480group.lod_bounds = merge_spheres(group.lod_bounds, data.lod_group_sphere);481}482groups483}484485fn lock_group_borders(486vertex_locks: &mut [bool],487groups: &[TempMeshletGroup],488meshlets: &Meshlets,489position_only_vertex_remap: &[u32],490) {491let mut position_only_locks = vec![-1; position_only_vertex_remap.len()];492493// Iterate over position-only based vertices of all meshlets in all groups494for (group_id, group) in groups.iter().enumerate() {495for &meshlet_id in group.meshlets.iter() {496let meshlet = meshlets.get(meshlet_id as usize);497for index in meshlet.triangles {498let vertex_id =499position_only_vertex_remap[meshlet.vertices[*index as usize] as usize] as usize;500501// If the vertex is not yet claimed by any group, or was already claimed by this group502if position_only_locks[vertex_id] == -1503|| position_only_locks[vertex_id] == group_id as i32504{505position_only_locks[vertex_id] = group_id as i32; // Then claim the vertex for this group506} else {507position_only_locks[vertex_id] = -2; // Else vertex was already claimed by another group or was already locked, lock it508}509}510}511}512513// Lock vertices used by more than 1 group514for i in 0..vertex_locks.len() {515let vertex_id = position_only_vertex_remap[i] as usize;516vertex_locks[i] = position_only_locks[vertex_id] == -2;517}518}519520fn simplify_meshlet_group(521group: &TempMeshletGroup,522meshlets: &Meshlets,523vertices: &VertexDataAdapter<'_>,524vertex_normals: &[f32],525vertex_stride: usize,526vertex_locks: &[bool],527) -> Option<(Vec<u32>, f32)> {528// Build a new index buffer into the mesh vertex data by combining all meshlet data in the group529let group_indices = group530.meshlets531.iter()532.flat_map(|&meshlet_id| {533let meshlet = meshlets.get(meshlet_id as _);534meshlet535.triangles536.iter()537.map(|&meshlet_index| meshlet.vertices[meshlet_index as usize])538})539.collect::<Vec<_>>();540541// Simplify the group to ~50% triangle count542let mut error = 0.0;543let simplified_group_indices = simplify_with_attributes_and_locks(544&group_indices,545vertices,546vertex_normals,547&[0.5; 3],548vertex_stride,549vertex_locks,550group_indices.len() / 2,551f32::MAX,552SimplifyOptions::Sparse | SimplifyOptions::ErrorAbsolute,553Some(&mut error),554);555556// Check if we were able to simplify557if simplified_group_indices.len() as f32 / group_indices.len() as f32558> SIMPLIFICATION_FAILURE_PERCENTAGE559{560return None;561}562563Some((simplified_group_indices, error))564}565566fn merge_meshlets(meshlets: &mut Meshlets, merge: Meshlets) {567let vertex_offset = meshlets.vertices.len() as u32;568let triangle_offset = meshlets.triangles.len() as u32;569meshlets.vertices.extend_from_slice(&merge.vertices);570meshlets.triangles.extend_from_slice(&merge.triangles);571meshlets572.meshlets573.extend(merge.meshlets.into_iter().map(|mut meshlet| {574meshlet.vertex_offset += vertex_offset;575meshlet.triangle_offset += triangle_offset;576meshlet577}));578}579580fn build_and_compress_per_meshlet_vertex_data(581meshlet: &meshopt_Meshlet,582meshlet_vertex_ids: &[u32],583vertex_buffer: &[u8],584vertex_stride: usize,585vertex_positions: &mut BitVec<u32, Lsb0>,586vertex_normals: &mut Vec<u32>,587vertex_uvs: &mut Vec<Vec2>,588meshlets: &mut Vec<Meshlet>,589vertex_position_quantization_factor: u8,590) {591let start_vertex_position_bit = vertex_positions.len() as u32;592let start_vertex_attribute_id = vertex_normals.len() as u32;593594let quantization_factor =595(1 << vertex_position_quantization_factor) as f32 * CENTIMETERS_PER_METER;596597let mut min_quantized_position_channels = IVec3::MAX;598let mut max_quantized_position_channels = IVec3::MIN;599600// Lossy vertex compression601let mut quantized_positions = [IVec3::ZERO; 256];602for (i, vertex_id) in meshlet_vertex_ids.iter().enumerate() {603// Load source vertex attributes604let vertex_id_byte = *vertex_id as usize * vertex_stride;605let vertex_data = &vertex_buffer[vertex_id_byte..(vertex_id_byte + vertex_stride)];606let position = Vec3::from_slice(bytemuck::cast_slice(&vertex_data[0..12]));607let normal = Vec3::from_slice(bytemuck::cast_slice(&vertex_data[12..24]));608let uv = Vec2::from_slice(bytemuck::cast_slice(&vertex_data[24..32]));609610// Copy uncompressed UV611vertex_uvs.push(uv);612613// Compress normal614vertex_normals.push(pack2x16snorm(octahedral_encode(normal)));615616// Quantize position to a fixed-point IVec3617let quantized_position = (position * quantization_factor + 0.5).as_ivec3();618quantized_positions[i] = quantized_position;619620// Compute per X/Y/Z-channel quantized position min/max for this meshlet621min_quantized_position_channels = min_quantized_position_channels.min(quantized_position);622max_quantized_position_channels = max_quantized_position_channels.max(quantized_position);623}624625// Calculate bits needed to encode each quantized vertex position channel based on the range of each channel626let range = max_quantized_position_channels - min_quantized_position_channels + 1;627let bits_per_vertex_position_channel_x = log2(range.x as f32).ceil() as u8;628let bits_per_vertex_position_channel_y = log2(range.y as f32).ceil() as u8;629let bits_per_vertex_position_channel_z = log2(range.z as f32).ceil() as u8;630631// Lossless encoding of vertex positions in the minimum number of bits per channel632for quantized_position in quantized_positions.iter().take(meshlet_vertex_ids.len()) {633// Remap [range_min, range_max] IVec3 to [0, range_max - range_min] UVec3634let position = (quantized_position - min_quantized_position_channels).as_uvec3();635636// Store as a packed bitstream637vertex_positions.extend_from_bitslice(638&position.x.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_x as usize],639);640vertex_positions.extend_from_bitslice(641&position.y.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_y as usize],642);643vertex_positions.extend_from_bitslice(644&position.z.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_z as usize],645);646}647648meshlets.push(Meshlet {649start_vertex_position_bit,650start_vertex_attribute_id,651start_index_id: meshlet.triangle_offset,652vertex_count_minus_one: (meshlet.vertex_count - 1) as u8,653triangle_count: meshlet.triangle_count as u8,654padding: 0,655bits_per_vertex_position_channel_x,656bits_per_vertex_position_channel_y,657bits_per_vertex_position_channel_z,658vertex_position_quantization_factor,659min_vertex_position_channel_x: min_quantized_position_channels.x as f32,660min_vertex_position_channel_y: min_quantized_position_channels.y as f32,661min_vertex_position_channel_z: min_quantized_position_channels.z as f32,662});663}664665fn merge_spheres(a: BoundingSphere, b: BoundingSphere) -> BoundingSphere {666let sr = a.radius().min(b.radius());667let br = a.radius().max(b.radius());668let len = a.center.distance(b.center);669if len + sr <= br || sr == 0.0 || len == 0.0 {670if a.radius() > b.radius() {671a672} else {673b674}675} else {676let radius = (sr + br + len) / 2.0;677let center =678(a.center + b.center + (a.radius() - b.radius()) * (a.center - b.center) / len) / 2.0;679BoundingSphere::new(center, radius)680}681}682683#[derive(Copy, Clone)]684struct TempMeshletCullData {685aabb: Aabb3d,686lod_group_sphere: BoundingSphere,687error: f32,688}689690#[derive(Clone)]691struct TempMeshletGroup {692aabb: Aabb3d,693lod_bounds: BoundingSphere,694parent_error: f32,695meshlets: SmallVec<[u32; TARGET_MESHLETS_PER_GROUP]>,696}697698impl Default for TempMeshletGroup {699fn default() -> Self {700Self {701aabb: aabb_default(), // Default AABB to merge into702lod_bounds: BoundingSphere::new(Vec3A::ZERO, 0.0),703parent_error: f32::MAX,704meshlets: SmallVec::new(),705}706}707}708709// All the BVH build code was stolen from https://github.com/SparkyPotato/radiance/blob/4aa17a3a5be7a0466dc69713e249bbcee9f46057/crates/rad-renderer/src/assets/mesh/virtual_mesh.rs because it works and I'm lazy and don't want to reimplement it710struct TempBvhNode {711group: u32,712aabb: Aabb3d,713children: SmallVec<[u32; 8]>,714}715716#[derive(Default)]717struct BvhBuilder {718nodes: Vec<TempBvhNode>,719lods: Vec<Range<u32>>,720}721722impl BvhBuilder {723fn add_lod(&mut self, offset: u32, all_groups: &[TempMeshletGroup]) {724let first = self.nodes.len() as u32;725self.nodes.extend(726all_groups727.iter()728.enumerate()729.skip(offset as _)730.map(|(i, group)| TempBvhNode {731group: i as u32,732aabb: group.aabb,733children: SmallVec::new(),734}),735);736let end = self.nodes.len() as u32;737if first != end {738self.lods.push(first..end);739}740}741742fn surface_area(&self, nodes: &[u32]) -> f32 {743nodes744.iter()745.map(|&x| self.nodes[x as usize].aabb)746.reduce(|a, b| a.merge(&b))747.expect("cannot find surface area of zero nodes")748.visible_area()749}750751fn sort_nodes_by_sah(&self, nodes: &mut [u32], splits: [usize; 8]) {752// We use a BVH8, so just recursively binary split 3 times for near-optimal SAH753for i in 0..3 {754let parts = 1 << i; // 2^i755let nodes_per_split = 8 >> i; // 8 / 2^i756let half_count = nodes_per_split / 2;757let mut offset = 0;758for p in 0..parts {759let first = p * nodes_per_split;760let mut s0 = 0;761let mut s1 = 0;762for i in 0..half_count {763s0 += splits[first + i];764s1 += splits[first + half_count + i];765}766let c = s0 + s1;767let nodes = &mut nodes[offset..(offset + c)];768offset += c;769770let mut cost = f32::MAX;771let mut axis = 0;772let key = |x, ax| self.nodes[x as usize].aabb.center()[ax];773for ax in 0..3 {774nodes.sort_unstable_by(|&x, &y| key(x, ax).partial_cmp(&key(y, ax)).unwrap());775let (left, right) = nodes.split_at(s0);776let c = self.surface_area(left) + self.surface_area(right);777if c < cost {778axis = ax;779cost = c;780}781}782if axis != 2 {783nodes.sort_unstable_by(|&x, &y| {784key(x, axis).partial_cmp(&key(y, axis)).unwrap()785});786}787}788}789}790791fn build_temp_inner(&mut self, nodes: &mut [u32], optimize: bool) -> u32 {792let count = nodes.len();793if count == 1 {794nodes[0]795} else if count <= 8 {796let i = self.nodes.len();797self.nodes.push(TempBvhNode {798group: u32::MAX,799aabb: aabb_default(),800children: nodes.iter().copied().collect(),801});802i as _803} else {804// We need to split the nodes into 8 groups, with the smallest possible tree depth.805// Additionally, no child should be more than one level deeper than the others.806// At `l` levels, we can fit upto 8^l nodes.807// The `max_child_size` is the largest power of 8 <= `count` (any larger and we'd have808// unfilled nodes).809// The `min_child_size` is thus 1 level (8 times) smaller.810// After distributing `min_child_size` to all children, we have distributed811// `min_child_size * 8` nodes (== `max_child_size`).812// The remaining nodes are then distributed left to right.813let max_child_size = 1 << ((count.ilog2() / 3) * 3);814let min_child_size = max_child_size >> 3;815let max_extra_per_node = max_child_size - min_child_size;816let mut extra = count - max_child_size; // 8 * min_child_size817let splits = core::array::from_fn(|_| {818let size = extra.min(max_extra_per_node);819extra -= size;820min_child_size + size821});822823if optimize {824self.sort_nodes_by_sah(nodes, splits);825}826827let mut offset = 0;828let children = splits829.into_iter()830.map(|size| {831let i = self.build_temp_inner(&mut nodes[offset..(offset + size)], optimize);832offset += size;833i834})835.collect();836837let i = self.nodes.len();838self.nodes.push(TempBvhNode {839group: u32::MAX,840aabb: aabb_default(),841children,842});843i as _844}845}846847fn build_temp(&mut self) -> u32 {848let mut lods = Vec::with_capacity(self.lods.len());849for lod in core::mem::take(&mut self.lods) {850let mut lod: Vec<_> = lod.collect();851let root = self.build_temp_inner(&mut lod, true);852let node = &self.nodes[root as usize];853if node.group != u32::MAX || node.children.len() == 8 {854lods.push(root);855} else {856lods.extend(node.children.iter().copied());857}858}859self.build_temp_inner(&mut lods, false)860}861862fn build_inner(863&self,864groups: &[TempMeshletGroup],865out: &mut Vec<BvhNode>,866max_depth: &mut u32,867node: u32,868depth: u32,869) -> u32 {870*max_depth = depth.max(*max_depth);871let node = &self.nodes[node as usize];872let onode = out.len();873out.push(BvhNode::default());874875for (i, &child_id) in node.children.iter().enumerate() {876let child = &self.nodes[child_id as usize];877if child.group != u32::MAX {878let group = &groups[child.group as usize];879let out = &mut out[onode];880out.aabbs[i] = aabb_to_meshlet(group.aabb, group.parent_error, group.meshlets[0]);881out.lod_bounds[i] = sphere_to_meshlet(group.lod_bounds);882out.child_counts[i] = group.meshlets[1] as _;883} else {884let child_id = self.build_inner(groups, out, max_depth, child_id, depth + 1);885let child = &out[child_id as usize];886let mut aabb = aabb_default();887let mut parent_error = 0.0f32;888let mut lod_bounds = BoundingSphere::new(Vec3A::ZERO, 0.0);889for i in 0..8 {890if child.child_counts[i] == 0 {891break;892}893894aabb = aabb.merge(&Aabb3d::new(895child.aabbs[i].center,896child.aabbs[i].half_extent,897));898lod_bounds = merge_spheres(899lod_bounds,900BoundingSphere::new(child.lod_bounds[i].center, child.lod_bounds[i].radius),901);902parent_error = parent_error.max(child.aabbs[i].error);903}904905let out = &mut out[onode];906out.aabbs[i] = aabb_to_meshlet(aabb, parent_error, child_id);907out.lod_bounds[i] = sphere_to_meshlet(lod_bounds);908out.child_counts[i] = u8::MAX;909}910}911912onode as _913}914915fn build(916mut self,917meshlets: &mut Meshlets,918mut groups: Vec<TempMeshletGroup>,919cull_data: &mut Vec<TempMeshletCullData>,920) -> (Vec<BvhNode>, MeshletAabb, u32) {921// The BVH requires group meshlets to be contiguous, so remap them first.922let mut remap = Vec::with_capacity(meshlets.meshlets.len());923let mut remapped_cull_data = Vec::with_capacity(cull_data.len());924for group in groups.iter_mut() {925let first = remap.len() as u32;926let count = group.meshlets.len() as u32;927remap.extend(928group929.meshlets930.iter()931.map(|&m| meshlets.meshlets[m as usize]),932);933remapped_cull_data.extend(group.meshlets.iter().map(|&m| cull_data[m as usize]));934group.meshlets.resize(2, 0);935group.meshlets[0] = first;936group.meshlets[1] = count;937}938meshlets.meshlets = remap;939*cull_data = remapped_cull_data;940941let mut out = vec![];942let mut aabb = aabb_default();943let mut max_depth = 0;944945if self.nodes.len() == 1 {946let mut o = BvhNode::default();947let group = &groups[0];948o.aabbs[0] = aabb_to_meshlet(group.aabb, group.parent_error, group.meshlets[0]);949o.lod_bounds[0] = sphere_to_meshlet(group.lod_bounds);950o.child_counts[0] = group.meshlets[1] as _;951out.push(o);952aabb = group.aabb;953max_depth = 1;954} else {955let root = self.build_temp();956let root = self.build_inner(&groups, &mut out, &mut max_depth, root, 1);957assert_eq!(root, 0, "root must be 0");958959let root = &out[0];960for i in 0..8 {961if root.child_counts[i] == 0 {962break;963}964965aabb = aabb.merge(&Aabb3d::new(966root.aabbs[i].center,967root.aabbs[i].half_extent,968));969}970}971972let mut reachable = vec![false; meshlets.meshlets.len()];973verify_bvh(&out, cull_data, &mut reachable, 0);974assert!(975reachable.iter().all(|&x| x),976"all meshlets must be reachable"977);978979(980out,981MeshletAabb {982center: aabb.center().into(),983half_extent: aabb.half_size().into(),984},985max_depth,986)987}988}989990fn verify_bvh(991out: &[BvhNode],992cull_data: &[TempMeshletCullData],993reachable: &mut [bool],994node: u32,995) {996let node = &out[node as usize];997for i in 0..8 {998let sphere = node.lod_bounds[i];999let error = node.aabbs[i].error;1000if node.child_counts[i] == u8::MAX {1001let child = &out[node.aabbs[i].child_offset as usize];1002for i in 0..8 {1003if child.child_counts[i] == 0 {1004break;1005}1006assert!(1007child.aabbs[i].error <= error,1008"BVH errors are not monotonic"1009);1010let sphere_error = (sphere.center - child.lod_bounds[i].center).length()1011- (sphere.radius - child.lod_bounds[i].radius);1012assert!(1013sphere_error <= 0.0001,1014"BVH lod spheres are not monotonic ({sphere_error})"1015);1016}1017verify_bvh(out, cull_data, reachable, node.aabbs[i].child_offset);1018} else {1019for m in 0..node.child_counts[i] as u32 {1020let mid = (m + node.aabbs[i].child_offset) as usize;1021let meshlet = &cull_data[mid];1022assert!(meshlet.error <= error, "meshlet errors are not monotonic");1023let sphere_error = (Vec3A::from(sphere.center) - meshlet.lod_group_sphere.center)1024.length()1025- (sphere.radius - meshlet.lod_group_sphere.radius());1026assert!(1027sphere_error <= 0.0001,1028"meshlet lod spheres are not monotonic: ({sphere_error})"1029);1030reachable[mid] = true;1031}1032}1033}1034}10351036fn aabb_default() -> Aabb3d {1037Aabb3d {1038min: Vec3A::INFINITY,1039max: Vec3A::NEG_INFINITY,1040}1041}10421043fn aabb_to_meshlet(aabb: Aabb3d, error: f32, child_offset: u32) -> MeshletAabbErrorOffset {1044MeshletAabbErrorOffset {1045center: aabb.center().into(),1046error,1047half_extent: aabb.half_size().into(),1048child_offset,1049}1050}10511052fn sphere_to_meshlet(sphere: BoundingSphere) -> MeshletBoundingSphere {1053MeshletBoundingSphere {1054center: sphere.center.into(),1055radius: sphere.radius(),1056}1057}10581059// TODO: Precise encode variant1060fn octahedral_encode(v: Vec3) -> Vec2 {1061let n = v / (v.x.abs() + v.y.abs() + v.z.abs());1062let octahedral_wrap = (1.0 - n.yx().abs())1063* Vec2::new(1064if n.x >= 0.0 { 1.0 } else { -1.0 },1065if n.y >= 0.0 { 1.0 } else { -1.0 },1066);1067if n.z >= 0.0 {1068n.xy()1069} else {1070octahedral_wrap1071}1072}10731074// https://www.w3.org/TR/WGSL/#pack2x16snorm-builtin1075fn pack2x16snorm(v: Vec2) -> u32 {1076let v = v.clamp(Vec2::NEG_ONE, Vec2::ONE);1077let v = (v * 32767.0 + 0.5).floor().as_i16vec2();1078bytemuck::cast(v)1079}10801081/// An error produced by [`MeshletMesh::from_mesh`].1082#[derive(Error, Debug)]1083pub enum MeshToMeshletMeshConversionError {1084#[error("Mesh primitive topology is not TriangleList")]1085WrongMeshPrimitiveTopology,1086#[error("Mesh vertex attributes are not {{POSITION, NORMAL, UV_0}}: {0:?}")]1087WrongMeshVertexAttributes(Vec<String>),1088#[error("Mesh has no indices")]1089MeshMissingIndices,1090}109110921093