Path: blob/main/crates/bevy_pbr/src/meshlet/from_mesh.rs
6600 views
use crate::meshlet::asset::{MeshletAabb, MeshletAabbErrorOffset, MeshletCullData};12use super::asset::{BvhNode, Meshlet, MeshletBoundingSphere, MeshletMesh};3use alloc::borrow::Cow;4use bevy_math::{5bounding::{Aabb3d, BoundingSphere, BoundingVolume},6ops::log2,7IVec3, Isometry3d, Vec2, Vec3, Vec3A, Vec3Swizzles,8};9use bevy_mesh::{Indices, Mesh};10use bevy_platform::collections::HashMap;11use bevy_render::render_resource::PrimitiveTopology;12use bevy_tasks::{AsyncComputeTaskPool, ParallelSlice};13use bitvec::{order::Lsb0, vec::BitVec, view::BitView};14use core::{f32, ops::Range};15use itertools::Itertools;16use meshopt::{17build_meshlets, ffi::meshopt_Meshlet, generate_vertex_remap_multi,18simplify_with_attributes_and_locks, Meshlets, SimplifyOptions, VertexDataAdapter, VertexStream,19};20use metis::{option::Opt, Graph};21use smallvec::SmallVec;22use thiserror::Error;23use tracing::debug_span;2425// Aim to have 8 meshlets per group26const TARGET_MESHLETS_PER_GROUP: usize = 8;27// Reject groups that keep over 60% of their original triangles. We'd much rather render a few28// extra triangles than create too many meshlets, increasing cull overhead.29const SIMPLIFICATION_FAILURE_PERCENTAGE: f32 = 0.60;3031/// Default vertex position quantization factor for use with [`MeshletMesh::from_mesh`].32///33/// Snaps vertices to the nearest 1/16th of a centimeter (1/2^4).34pub const MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR: u8 = 4;3536const CENTIMETERS_PER_METER: f32 = 100.0;3738impl MeshletMesh {39/// Process a [`Mesh`] to generate a [`MeshletMesh`].40///41/// This process is very slow, and should be done ahead of time, and not at runtime.42///43/// # Requirements44///45/// This function requires the `meshlet_processor` cargo feature.46///47/// The input mesh must:48/// 1. Use [`PrimitiveTopology::TriangleList`]49/// 2. Use indices50/// 3. Have the exact following set of vertex attributes: `{POSITION, NORMAL, UV_0}` (tangents can be used in material shaders, but are calculated at runtime and are not stored in the mesh)51///52/// # Vertex precision53///54/// `vertex_position_quantization_factor` is the amount of precision to use when quantizing vertex positions.55///56/// Vertices are snapped to the nearest (1/2^x)th of a centimeter, where x = `vertex_position_quantization_factor`.57/// E.g. if x = 4, then vertices are snapped to the nearest 1/2^4 = 1/16th of a centimeter.58///59/// Use [`MESHLET_DEFAULT_VERTEX_POSITION_QUANTIZATION_FACTOR`] as a default, adjusting lower to save memory and disk space, and higher to prevent artifacts if needed.60///61/// To ensure that two different meshes do not have cracks between them when placed directly next to each other:62/// * Use the same quantization factor when converting each mesh to a meshlet mesh63/// * Ensure that their [`bevy_transform::components::Transform::translation`]s are a multiple of 1/2^x centimeters (note that translations are in meters)64/// * Ensure that their [`bevy_transform::components::Transform::scale`]s are the same65/// * Ensure that their [`bevy_transform::components::Transform::rotation`]s are a multiple of 90 degrees66pub fn from_mesh(67mesh: &Mesh,68vertex_position_quantization_factor: u8,69) -> Result<Self, MeshToMeshletMeshConversionError> {70let s = debug_span!("build meshlet mesh");71let _e = s.enter();7273// Validate mesh format74let indices = validate_input_mesh(mesh)?;7576// Get meshlet vertices77let vertex_buffer = mesh.create_packed_vertex_buffer_data();78let vertex_stride = mesh.get_vertex_size() as usize;79let vertices = VertexDataAdapter::new(&vertex_buffer, vertex_stride, 0).unwrap();80let vertex_normals = bytemuck::cast_slice(&vertex_buffer[12..16]);8182// Generate a position-only vertex buffer for determining triangle/meshlet connectivity83let (position_only_vertex_count, position_only_vertex_remap) = generate_vertex_remap_multi(84vertices.vertex_count,85&[VertexStream::new_with_stride::<Vec3, _>(86vertex_buffer.as_ptr(),87vertex_stride,88)],89Some(&indices),90);9192// Split the mesh into an initial list of meshlets (LOD 0)93let (mut meshlets, mut cull_data) = compute_meshlets(94&indices,95&vertices,96&position_only_vertex_remap,97position_only_vertex_count,98None,99);100101let mut vertex_locks = vec![false; vertices.vertex_count];102103// Build further LODs104let mut bvh = BvhBuilder::default();105let mut all_groups = Vec::new();106let mut simplification_queue: Vec<_> = (0..meshlets.len() as u32).collect();107let mut stuck = Vec::new();108while !simplification_queue.is_empty() {109let s = debug_span!("simplify lod", meshlets = simplification_queue.len());110let _e = s.enter();111112// For each meshlet build a list of connected meshlets (meshlets that share a vertex)113let connected_meshlets_per_meshlet = find_connected_meshlets(114&simplification_queue,115&meshlets,116&position_only_vertex_remap,117position_only_vertex_count,118);119120// Group meshlets into roughly groups of size TARGET_MESHLETS_PER_GROUP,121// grouping meshlets with a high number of shared vertices122let groups = group_meshlets(123&simplification_queue,124&cull_data,125&connected_meshlets_per_meshlet,126);127simplification_queue.clear();128129// Lock borders between groups to prevent cracks when simplifying130lock_group_borders(131&mut vertex_locks,132&groups,133&meshlets,134&position_only_vertex_remap,135position_only_vertex_count,136);137138let simplified = groups.par_chunk_map(AsyncComputeTaskPool::get(), 1, |_, groups| {139let mut group = groups[0].clone();140141// If the group only has a single meshlet we can't simplify it142if group.meshlets.len() == 1 {143return Err(group);144}145146let s = debug_span!("simplify group", meshlets = group.meshlets.len());147let _e = s.enter();148149// Simplify the group to ~50% triangle count150let Some((simplified_group_indices, mut group_error)) = simplify_meshlet_group(151&group,152&meshlets,153&vertices,154vertex_normals,155vertex_stride,156&vertex_locks,157) else {158// Couldn't simplify the group enough159return Err(group);160};161162// Force the group error to be atleast as large as all of its constituent meshlet's163// individual errors.164for &id in group.meshlets.iter() {165group_error = group_error.max(cull_data[id as usize].error);166}167group.parent_error = group_error;168169// Build new meshlets using the simplified group170let new_meshlets = compute_meshlets(171&simplified_group_indices,172&vertices,173&position_only_vertex_remap,174position_only_vertex_count,175Some((group.lod_bounds, group.parent_error)),176);177178Ok((group, new_meshlets))179});180181let first_group = all_groups.len() as u32;182let mut passed_tris = 0;183let mut stuck_tris = 0;184for group in simplified {185match group {186Ok((group, (new_meshlets, new_cull_data))) => {187let start = meshlets.len();188merge_meshlets(&mut meshlets, new_meshlets);189cull_data.extend(new_cull_data);190let end = meshlets.len();191let new_meshlet_ids = start as u32..end as u32;192193passed_tris += triangles_in_meshlets(&meshlets, new_meshlet_ids.clone());194simplification_queue.extend(new_meshlet_ids);195all_groups.push(group);196}197Err(group) => {198stuck_tris +=199triangles_in_meshlets(&meshlets, group.meshlets.iter().copied());200stuck.push(group);201}202}203}204205// If we have enough triangles that passed, we can retry simplifying the stuck206// meshlets.207if passed_tris > stuck_tris / 3 {208simplification_queue.extend(stuck.drain(..).flat_map(|group| group.meshlets));209}210211bvh.add_lod(first_group, &all_groups);212}213214// If there's any stuck meshlets left, add another LOD level with only them215if !stuck.is_empty() {216let first_group = all_groups.len() as u32;217all_groups.extend(stuck);218bvh.add_lod(first_group, &all_groups);219}220221let (bvh, aabb, depth) = bvh.build(&mut meshlets, all_groups, &mut cull_data);222223// Copy vertex attributes per meshlet and compress224let mut vertex_positions = BitVec::<u32, Lsb0>::new();225let mut vertex_normals = Vec::new();226let mut vertex_uvs = Vec::new();227let mut bevy_meshlets = Vec::with_capacity(meshlets.len());228for (i, meshlet) in meshlets.meshlets.iter().enumerate() {229build_and_compress_per_meshlet_vertex_data(230meshlet,231meshlets.get(i).vertices,232&vertex_buffer,233vertex_stride,234&mut vertex_positions,235&mut vertex_normals,236&mut vertex_uvs,237&mut bevy_meshlets,238vertex_position_quantization_factor,239);240}241vertex_positions.set_uninitialized(false);242243Ok(Self {244vertex_positions: vertex_positions.into_vec().into(),245vertex_normals: vertex_normals.into(),246vertex_uvs: vertex_uvs.into(),247indices: meshlets.triangles.into(),248bvh: bvh.into(),249meshlets: bevy_meshlets.into(),250meshlet_cull_data: cull_data251.into_iter()252.map(|cull_data| MeshletCullData {253aabb: aabb_to_meshlet(cull_data.aabb, cull_data.error, 0),254lod_group_sphere: sphere_to_meshlet(cull_data.lod_group_sphere),255})256.collect(),257aabb,258bvh_depth: depth,259})260}261}262263fn validate_input_mesh(mesh: &Mesh) -> Result<Cow<'_, [u32]>, MeshToMeshletMeshConversionError> {264if mesh.primitive_topology() != PrimitiveTopology::TriangleList {265return Err(MeshToMeshletMeshConversionError::WrongMeshPrimitiveTopology);266}267268if mesh.attributes().map(|(attribute, _)| attribute.id).ne([269Mesh::ATTRIBUTE_POSITION.id,270Mesh::ATTRIBUTE_NORMAL.id,271Mesh::ATTRIBUTE_UV_0.id,272]) {273return Err(MeshToMeshletMeshConversionError::WrongMeshVertexAttributes(274mesh.attributes()275.map(|(attribute, _)| format!("{attribute:?}"))276.collect(),277));278}279280match mesh.indices() {281Some(Indices::U32(indices)) => Ok(Cow::Borrowed(indices.as_slice())),282Some(Indices::U16(indices)) => Ok(indices.iter().map(|i| *i as u32).collect()),283_ => Err(MeshToMeshletMeshConversionError::MeshMissingIndices),284}285}286287fn triangles_in_meshlets(meshlets: &Meshlets, ids: impl IntoIterator<Item = u32>) -> u32 {288ids.into_iter()289.map(|id| meshlets.get(id as _).triangles.len() as u32 / 3)290.sum()291}292293fn compute_meshlets(294indices: &[u32],295vertices: &VertexDataAdapter,296position_only_vertex_remap: &[u32],297position_only_vertex_count: usize,298prev_lod_data: Option<(BoundingSphere, f32)>,299) -> (Meshlets, Vec<TempMeshletCullData>) {300// For each vertex, build a list of all triangles that use it301let mut vertices_to_triangles = vec![Vec::new(); position_only_vertex_count];302for (i, index) in indices.iter().enumerate() {303let vertex_id = position_only_vertex_remap[*index as usize];304let vertex_to_triangles = &mut vertices_to_triangles[vertex_id as usize];305vertex_to_triangles.push(i / 3);306}307308// For each triangle pair, count how many vertices they share309let mut triangle_pair_to_shared_vertex_count = <HashMap<_, _>>::default();310for vertex_triangle_ids in vertices_to_triangles {311for (triangle_id1, triangle_id2) in vertex_triangle_ids.into_iter().tuple_combinations() {312let count = triangle_pair_to_shared_vertex_count313.entry((314triangle_id1.min(triangle_id2),315triangle_id1.max(triangle_id2),316))317.or_insert(0);318*count += 1;319}320}321322// For each triangle, gather all other triangles that share at least one vertex along with their shared vertex count323let triangle_count = indices.len() / 3;324let mut connected_triangles_per_triangle = vec![Vec::new(); triangle_count];325for ((triangle_id1, triangle_id2), shared_vertex_count) in triangle_pair_to_shared_vertex_count326{327// We record both id1->id2 and id2->id1 as adjacency is symmetrical328connected_triangles_per_triangle[triangle_id1].push((triangle_id2, shared_vertex_count));329connected_triangles_per_triangle[triangle_id2].push((triangle_id1, shared_vertex_count));330}331332// The order of triangles depends on hash traversal order; to produce deterministic results, sort them333// TODO: Wouldn't it be faster to use a `BTreeMap` above instead of `HashMap` + sorting?334for list in connected_triangles_per_triangle.iter_mut() {335list.sort_unstable();336}337338let mut xadj = Vec::with_capacity(triangle_count + 1);339let mut adjncy = Vec::new();340let mut adjwgt = Vec::new();341for connected_triangles in connected_triangles_per_triangle {342xadj.push(adjncy.len() as i32);343for (connected_triangle_id, shared_vertex_count) in connected_triangles {344adjncy.push(connected_triangle_id as i32);345adjwgt.push(shared_vertex_count);346// TODO: Additional weight based on triangle center spatial proximity?347}348}349xadj.push(adjncy.len() as i32);350351let mut options = [-1; metis::NOPTIONS];352options[metis::option::Seed::INDEX] = 17;353options[metis::option::UFactor::INDEX] = 1; // Important that there's very little imbalance between partitions354355let mut meshlet_per_triangle = vec![0; triangle_count];356let partition_count = triangle_count.div_ceil(126); // Need to undershoot to prevent METIS from going over 128 triangles per meshlet357Graph::new(1, partition_count as i32, &xadj, &adjncy)358.unwrap()359.set_options(&options)360.set_adjwgt(&adjwgt)361.part_recursive(&mut meshlet_per_triangle)362.unwrap();363364let mut indices_per_meshlet = vec![Vec::new(); partition_count];365for (triangle_id, meshlet) in meshlet_per_triangle.into_iter().enumerate() {366let meshlet_indices = &mut indices_per_meshlet[meshlet as usize];367let base_index = triangle_id * 3;368meshlet_indices.extend_from_slice(&indices[base_index..(base_index + 3)]);369}370371// Use meshopt to build meshlets from the sets of triangles372let mut meshlets = Meshlets {373meshlets: Vec::new(),374vertices: Vec::new(),375triangles: Vec::new(),376};377let mut cull_data = Vec::new();378let get_vertex = |&v: &u32| {379*bytemuck::from_bytes::<Vec3>(380&vertices.reader.get_ref()381[vertices.position_offset + v as usize * vertices.vertex_stride..][..12],382)383};384for meshlet_indices in &indices_per_meshlet {385let meshlet = build_meshlets(meshlet_indices, vertices, 255, 128, 0.0);386for meshlet in meshlet.iter() {387let (lod_group_sphere, error) = prev_lod_data.unwrap_or_else(|| {388let bounds = meshopt::compute_meshlet_bounds(meshlet, vertices);389(BoundingSphere::new(bounds.center, bounds.radius), 0.0)390});391392cull_data.push(TempMeshletCullData {393aabb: Aabb3d::from_point_cloud(394Isometry3d::IDENTITY,395meshlet.vertices.iter().map(get_vertex),396),397lod_group_sphere,398error,399});400}401merge_meshlets(&mut meshlets, meshlet);402}403(meshlets, cull_data)404}405406fn find_connected_meshlets(407simplification_queue: &[u32],408meshlets: &Meshlets,409position_only_vertex_remap: &[u32],410position_only_vertex_count: usize,411) -> Vec<Vec<(usize, usize)>> {412// For each vertex, build a list of all meshlets that use it413let mut vertices_to_meshlets = vec![Vec::new(); position_only_vertex_count];414for (id_index, &meshlet_id) in simplification_queue.iter().enumerate() {415let meshlet = meshlets.get(meshlet_id as _);416for index in meshlet.triangles {417let vertex_id = position_only_vertex_remap[meshlet.vertices[*index as usize] as usize];418let vertex_to_meshlets = &mut vertices_to_meshlets[vertex_id as usize];419// Meshlets are added in order, so we can just check the last element to deduplicate,420// in the case of two triangles sharing the same vertex within a single meshlet421if vertex_to_meshlets.last() != Some(&id_index) {422vertex_to_meshlets.push(id_index);423}424}425}426427// For each meshlet pair, count how many vertices they share428let mut meshlet_pair_to_shared_vertex_count = <HashMap<_, _>>::default();429for vertex_meshlet_ids in vertices_to_meshlets {430for (meshlet_id1, meshlet_id2) in vertex_meshlet_ids.into_iter().tuple_combinations() {431let count = meshlet_pair_to_shared_vertex_count432.entry((meshlet_id1.min(meshlet_id2), meshlet_id1.max(meshlet_id2)))433.or_insert(0);434*count += 1;435}436}437438// For each meshlet, gather all other meshlets that share at least one vertex along with their shared vertex count439let mut connected_meshlets_per_meshlet = vec![Vec::new(); simplification_queue.len()];440for ((meshlet_id1, meshlet_id2), shared_vertex_count) in meshlet_pair_to_shared_vertex_count {441// We record both id1->id2 and id2->id1 as adjacency is symmetrical442connected_meshlets_per_meshlet[meshlet_id1].push((meshlet_id2, shared_vertex_count));443connected_meshlets_per_meshlet[meshlet_id2].push((meshlet_id1, shared_vertex_count));444}445446// The order of meshlets depends on hash traversal order; to produce deterministic results, sort them447// TODO: Wouldn't it be faster to use a `BTreeMap` above instead of `HashMap` + sorting?448for list in connected_meshlets_per_meshlet.iter_mut() {449list.sort_unstable();450}451452connected_meshlets_per_meshlet453}454455// METIS manual: https://github.com/KarypisLab/METIS/blob/e0f1b88b8efcb24ffa0ec55eabb78fbe61e58ae7/manual/manual.pdf456fn group_meshlets(457simplification_queue: &[u32],458meshlet_cull_data: &[TempMeshletCullData],459connected_meshlets_per_meshlet: &[Vec<(usize, usize)>],460) -> Vec<TempMeshletGroup> {461let mut xadj = Vec::with_capacity(simplification_queue.len() + 1);462let mut adjncy = Vec::new();463let mut adjwgt = Vec::new();464for connected_meshlets in connected_meshlets_per_meshlet {465xadj.push(adjncy.len() as i32);466for (connected_meshlet_id, shared_vertex_count) in connected_meshlets {467adjncy.push(*connected_meshlet_id as i32);468adjwgt.push(*shared_vertex_count as i32);469// TODO: Additional weight based on meshlet spatial proximity470}471}472xadj.push(adjncy.len() as i32);473474let mut options = [-1; metis::NOPTIONS];475options[metis::option::Seed::INDEX] = 17;476options[metis::option::UFactor::INDEX] = 200;477478let mut group_per_meshlet = vec![0; simplification_queue.len()];479let partition_count = simplification_queue480.len()481.div_ceil(TARGET_MESHLETS_PER_GROUP); // TODO: Nanite uses groups of 8-32, probably based on some kind of heuristic482Graph::new(1, partition_count as i32, &xadj, &adjncy)483.unwrap()484.set_options(&options)485.set_adjwgt(&adjwgt)486.part_recursive(&mut group_per_meshlet)487.unwrap();488489let mut groups = vec![TempMeshletGroup::default(); partition_count];490for (i, meshlet_group) in group_per_meshlet.into_iter().enumerate() {491let group = &mut groups[meshlet_group as usize];492let meshlet_id = simplification_queue[i];493494group.meshlets.push(meshlet_id);495let data = &meshlet_cull_data[meshlet_id as usize];496group.aabb = group.aabb.merge(&data.aabb);497group.lod_bounds = merge_spheres(group.lod_bounds, data.lod_group_sphere);498}499groups500}501502fn lock_group_borders(503vertex_locks: &mut [bool],504groups: &[TempMeshletGroup],505meshlets: &Meshlets,506position_only_vertex_remap: &[u32],507position_only_vertex_count: usize,508) {509let mut position_only_locks = vec![-1; position_only_vertex_count];510511// Iterate over position-only based vertices of all meshlets in all groups512for (group_id, group) in groups.iter().enumerate() {513for &meshlet_id in group.meshlets.iter() {514let meshlet = meshlets.get(meshlet_id as usize);515for index in meshlet.triangles {516let vertex_id =517position_only_vertex_remap[meshlet.vertices[*index as usize] as usize] as usize;518519// If the vertex is not yet claimed by any group, or was already claimed by this group520if position_only_locks[vertex_id] == -1521|| position_only_locks[vertex_id] == group_id as i32522{523position_only_locks[vertex_id] = group_id as i32; // Then claim the vertex for this group524} else {525position_only_locks[vertex_id] = -2; // Else vertex was already claimed by another group or was already locked, lock it526}527}528}529}530531// Lock vertices used by more than 1 group532for i in 0..vertex_locks.len() {533let vertex_id = position_only_vertex_remap[i] as usize;534vertex_locks[i] = position_only_locks[vertex_id] == -2;535}536}537538fn simplify_meshlet_group(539group: &TempMeshletGroup,540meshlets: &Meshlets,541vertices: &VertexDataAdapter<'_>,542vertex_normals: &[f32],543vertex_stride: usize,544vertex_locks: &[bool],545) -> Option<(Vec<u32>, f32)> {546// Build a new index buffer into the mesh vertex data by combining all meshlet data in the group547let group_indices = group548.meshlets549.iter()550.flat_map(|&meshlet_id| {551let meshlet = meshlets.get(meshlet_id as _);552meshlet553.triangles554.iter()555.map(|&meshlet_index| meshlet.vertices[meshlet_index as usize])556})557.collect::<Vec<_>>();558559// Simplify the group to ~50% triangle count560let mut error = 0.0;561let simplified_group_indices = simplify_with_attributes_and_locks(562&group_indices,563vertices,564vertex_normals,565&[0.5; 3],566vertex_stride,567vertex_locks,568group_indices.len() / 2,569f32::MAX,570SimplifyOptions::Sparse | SimplifyOptions::ErrorAbsolute,571Some(&mut error),572);573574// Check if we were able to simplify575if simplified_group_indices.len() as f32 / group_indices.len() as f32576> SIMPLIFICATION_FAILURE_PERCENTAGE577{578return None;579}580581Some((simplified_group_indices, error))582}583584fn merge_meshlets(meshlets: &mut Meshlets, merge: Meshlets) {585let vertex_offset = meshlets.vertices.len() as u32;586let triangle_offset = meshlets.triangles.len() as u32;587meshlets.vertices.extend_from_slice(&merge.vertices);588meshlets.triangles.extend_from_slice(&merge.triangles);589meshlets590.meshlets591.extend(merge.meshlets.into_iter().map(|mut meshlet| {592meshlet.vertex_offset += vertex_offset;593meshlet.triangle_offset += triangle_offset;594meshlet595}));596}597598fn build_and_compress_per_meshlet_vertex_data(599meshlet: &meshopt_Meshlet,600meshlet_vertex_ids: &[u32],601vertex_buffer: &[u8],602vertex_stride: usize,603vertex_positions: &mut BitVec<u32, Lsb0>,604vertex_normals: &mut Vec<u32>,605vertex_uvs: &mut Vec<Vec2>,606meshlets: &mut Vec<Meshlet>,607vertex_position_quantization_factor: u8,608) {609let start_vertex_position_bit = vertex_positions.len() as u32;610let start_vertex_attribute_id = vertex_normals.len() as u32;611612let quantization_factor =613(1 << vertex_position_quantization_factor) as f32 * CENTIMETERS_PER_METER;614615let mut min_quantized_position_channels = IVec3::MAX;616let mut max_quantized_position_channels = IVec3::MIN;617618// Lossy vertex compression619let mut quantized_positions = [IVec3::ZERO; 255];620for (i, vertex_id) in meshlet_vertex_ids.iter().enumerate() {621// Load source vertex attributes622let vertex_id_byte = *vertex_id as usize * vertex_stride;623let vertex_data = &vertex_buffer[vertex_id_byte..(vertex_id_byte + vertex_stride)];624let position = Vec3::from_slice(bytemuck::cast_slice(&vertex_data[0..12]));625let normal = Vec3::from_slice(bytemuck::cast_slice(&vertex_data[12..24]));626let uv = Vec2::from_slice(bytemuck::cast_slice(&vertex_data[24..32]));627628// Copy uncompressed UV629vertex_uvs.push(uv);630631// Compress normal632vertex_normals.push(pack2x16snorm(octahedral_encode(normal)));633634// Quantize position to a fixed-point IVec3635let quantized_position = (position * quantization_factor + 0.5).as_ivec3();636quantized_positions[i] = quantized_position;637638// Compute per X/Y/Z-channel quantized position min/max for this meshlet639min_quantized_position_channels = min_quantized_position_channels.min(quantized_position);640max_quantized_position_channels = max_quantized_position_channels.max(quantized_position);641}642643// Calculate bits needed to encode each quantized vertex position channel based on the range of each channel644let range = max_quantized_position_channels - min_quantized_position_channels + 1;645let bits_per_vertex_position_channel_x = log2(range.x as f32).ceil() as u8;646let bits_per_vertex_position_channel_y = log2(range.y as f32).ceil() as u8;647let bits_per_vertex_position_channel_z = log2(range.z as f32).ceil() as u8;648649// Lossless encoding of vertex positions in the minimum number of bits per channel650for quantized_position in quantized_positions.iter().take(meshlet_vertex_ids.len()) {651// Remap [range_min, range_max] IVec3 to [0, range_max - range_min] UVec3652let position = (quantized_position - min_quantized_position_channels).as_uvec3();653654// Store as a packed bitstream655vertex_positions.extend_from_bitslice(656&position.x.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_x as usize],657);658vertex_positions.extend_from_bitslice(659&position.y.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_y as usize],660);661vertex_positions.extend_from_bitslice(662&position.z.view_bits::<Lsb0>()[..bits_per_vertex_position_channel_z as usize],663);664}665666meshlets.push(Meshlet {667start_vertex_position_bit,668start_vertex_attribute_id,669start_index_id: meshlet.triangle_offset,670vertex_count: meshlet.vertex_count as u8,671triangle_count: meshlet.triangle_count as u8,672padding: 0,673bits_per_vertex_position_channel_x,674bits_per_vertex_position_channel_y,675bits_per_vertex_position_channel_z,676vertex_position_quantization_factor,677min_vertex_position_channel_x: min_quantized_position_channels.x as f32,678min_vertex_position_channel_y: min_quantized_position_channels.y as f32,679min_vertex_position_channel_z: min_quantized_position_channels.z as f32,680});681}682683fn merge_spheres(a: BoundingSphere, b: BoundingSphere) -> BoundingSphere {684let sr = a.radius().min(b.radius());685let br = a.radius().max(b.radius());686let len = a.center.distance(b.center);687if len + sr <= br || sr == 0.0 || len == 0.0 {688if a.radius() > b.radius() {689a690} else {691b692}693} else {694let radius = (sr + br + len) / 2.0;695let center =696(a.center + b.center + (a.radius() - b.radius()) * (a.center - b.center) / len) / 2.0;697BoundingSphere::new(center, radius)698}699}700701#[derive(Copy, Clone)]702struct TempMeshletCullData {703aabb: Aabb3d,704lod_group_sphere: BoundingSphere,705error: f32,706}707708#[derive(Clone)]709struct TempMeshletGroup {710aabb: Aabb3d,711lod_bounds: BoundingSphere,712parent_error: f32,713meshlets: SmallVec<[u32; TARGET_MESHLETS_PER_GROUP]>,714}715716impl Default for TempMeshletGroup {717fn default() -> Self {718Self {719aabb: aabb_default(), // Default AABB to merge into720lod_bounds: BoundingSphere::new(Vec3A::ZERO, 0.0),721parent_error: f32::MAX,722meshlets: SmallVec::new(),723}724}725}726727// All the BVH build code was stolen from https://github.com/SparkyPotato/radiance/blob/4aa17a3a5be7a0466dc69713e249bbcee9f46057/crates/rad-renderer/src/assets/mesh/virtual_mesh.rs because it works and I'm lazy and don't want to reimplement it728struct TempBvhNode {729group: u32,730aabb: Aabb3d,731children: SmallVec<[u32; 8]>,732}733734#[derive(Default)]735struct BvhBuilder {736nodes: Vec<TempBvhNode>,737lods: Vec<Range<u32>>,738}739740impl BvhBuilder {741fn add_lod(&mut self, offset: u32, all_groups: &[TempMeshletGroup]) {742let first = self.nodes.len() as u32;743self.nodes.extend(744all_groups745.iter()746.enumerate()747.skip(offset as _)748.map(|(i, group)| TempBvhNode {749group: i as u32,750aabb: group.aabb,751children: SmallVec::new(),752}),753);754let end = self.nodes.len() as u32;755if first != end {756self.lods.push(first..end);757}758}759760fn surface_area(&self, nodes: &[u32]) -> f32 {761nodes762.iter()763.map(|&x| self.nodes[x as usize].aabb)764.reduce(|a, b| a.merge(&b))765.expect("cannot find surface area of zero nodes")766.visible_area()767}768769fn sort_nodes_by_sah(&self, nodes: &mut [u32], splits: [usize; 8]) {770// We use a BVH8, so just recursively binary split 3 times for near-optimal SAH771for i in 0..3 {772let parts = 1 << i; // 2^i773let nodes_per_split = 8 >> i; // 8 / 2^i774let half_count = nodes_per_split / 2;775let mut offset = 0;776for p in 0..parts {777let first = p * nodes_per_split;778let mut s0 = 0;779let mut s1 = 0;780for i in 0..half_count {781s0 += splits[first + i];782s1 += splits[first + half_count + i];783}784let c = s0 + s1;785let nodes = &mut nodes[offset..(offset + c)];786offset += c;787788let mut cost = f32::MAX;789let mut axis = 0;790let key = |x, ax| self.nodes[x as usize].aabb.center()[ax];791for ax in 0..3 {792nodes.sort_unstable_by(|&x, &y| key(x, ax).partial_cmp(&key(y, ax)).unwrap());793let (left, right) = nodes.split_at(s0);794let c = self.surface_area(left) + self.surface_area(right);795if c < cost {796axis = ax;797cost = c;798}799}800if axis != 2 {801nodes.sort_unstable_by(|&x, &y| {802key(x, axis).partial_cmp(&key(y, axis)).unwrap()803});804}805}806}807}808809fn build_temp_inner(&mut self, nodes: &mut [u32], optimize: bool) -> u32 {810let count = nodes.len();811if count == 1 {812nodes[0]813} else if count <= 8 {814let i = self.nodes.len();815self.nodes.push(TempBvhNode {816group: u32::MAX,817aabb: aabb_default(),818children: nodes.iter().copied().collect(),819});820i as _821} else {822// We need to split the nodes into 8 groups, with the smallest possible tree depth.823// Additionally, no child should be more than one level deeper than the others.824// At `l` levels, we can fit upto 8^l nodes.825// The `max_child_size` is the largest power of 8 <= `count` (any larger and we'd have826// unfilled nodes).827// The `min_child_size` is thus 1 level (8 times) smaller.828// After distributing `min_child_size` to all children, we have distributed829// `min_child_size * 8` nodes (== `max_child_size`).830// The remaining nodes are then distributed left to right.831let max_child_size = 1 << ((count.ilog2() / 3) * 3);832let min_child_size = max_child_size >> 3;833let max_extra_per_node = max_child_size - min_child_size;834let mut extra = count - max_child_size; // 8 * min_child_size835let splits = core::array::from_fn(|_| {836let size = extra.min(max_extra_per_node);837extra -= size;838min_child_size + size839});840841if optimize {842self.sort_nodes_by_sah(nodes, splits);843}844845let mut offset = 0;846let children = splits847.into_iter()848.map(|size| {849let i = self.build_temp_inner(&mut nodes[offset..(offset + size)], optimize);850offset += size;851i852})853.collect();854855let i = self.nodes.len();856self.nodes.push(TempBvhNode {857group: u32::MAX,858aabb: aabb_default(),859children,860});861i as _862}863}864865fn build_temp(&mut self) -> u32 {866let mut lods = Vec::with_capacity(self.lods.len());867for lod in core::mem::take(&mut self.lods) {868let mut lod: Vec<_> = lod.collect();869let root = self.build_temp_inner(&mut lod, true);870let node = &self.nodes[root as usize];871if node.group != u32::MAX || node.children.len() == 8 {872lods.push(root);873} else {874lods.extend(node.children.iter().copied());875}876}877self.build_temp_inner(&mut lods, false)878}879880fn build_inner(881&self,882groups: &[TempMeshletGroup],883out: &mut Vec<BvhNode>,884max_depth: &mut u32,885node: u32,886depth: u32,887) -> u32 {888*max_depth = depth.max(*max_depth);889let node = &self.nodes[node as usize];890let onode = out.len();891out.push(BvhNode::default());892893for (i, &child_id) in node.children.iter().enumerate() {894let child = &self.nodes[child_id as usize];895if child.group != u32::MAX {896let group = &groups[child.group as usize];897let out = &mut out[onode];898out.aabbs[i] = aabb_to_meshlet(group.aabb, group.parent_error, group.meshlets[0]);899out.lod_bounds[i] = sphere_to_meshlet(group.lod_bounds);900out.child_counts[i] = group.meshlets[1] as _;901} else {902let child_id = self.build_inner(groups, out, max_depth, child_id, depth + 1);903let child = &out[child_id as usize];904let mut aabb = aabb_default();905let mut parent_error = 0.0f32;906let mut lod_bounds = BoundingSphere::new(Vec3A::ZERO, 0.0);907for i in 0..8 {908if child.child_counts[i] == 0 {909break;910}911912aabb = aabb.merge(&Aabb3d::new(913child.aabbs[i].center,914child.aabbs[i].half_extent,915));916lod_bounds = merge_spheres(917lod_bounds,918BoundingSphere::new(child.lod_bounds[i].center, child.lod_bounds[i].radius),919);920parent_error = parent_error.max(child.aabbs[i].error);921}922923let out = &mut out[onode];924out.aabbs[i] = aabb_to_meshlet(aabb, parent_error, child_id);925out.lod_bounds[i] = sphere_to_meshlet(lod_bounds);926out.child_counts[i] = u8::MAX;927}928}929930onode as _931}932933fn build(934mut self,935meshlets: &mut Meshlets,936mut groups: Vec<TempMeshletGroup>,937cull_data: &mut Vec<TempMeshletCullData>,938) -> (Vec<BvhNode>, MeshletAabb, u32) {939// The BVH requires group meshlets to be contiguous, so remap them first.940let mut remap = Vec::with_capacity(meshlets.meshlets.len());941let mut remapped_cull_data = Vec::with_capacity(cull_data.len());942for group in groups.iter_mut() {943let first = remap.len() as u32;944let count = group.meshlets.len() as u32;945remap.extend(946group947.meshlets948.iter()949.map(|&m| meshlets.meshlets[m as usize]),950);951remapped_cull_data.extend(group.meshlets.iter().map(|&m| cull_data[m as usize]));952group.meshlets.resize(2, 0);953group.meshlets[0] = first;954group.meshlets[1] = count;955}956meshlets.meshlets = remap;957*cull_data = remapped_cull_data;958959let mut out = vec![];960let mut aabb = aabb_default();961let mut max_depth = 0;962963if self.nodes.len() == 1 {964let mut o = BvhNode::default();965let group = &groups[0];966o.aabbs[0] = aabb_to_meshlet(group.aabb, group.parent_error, group.meshlets[0]);967o.lod_bounds[0] = sphere_to_meshlet(group.lod_bounds);968o.child_counts[0] = group.meshlets[1] as _;969out.push(o);970aabb = group.aabb;971max_depth = 1;972} else {973let root = self.build_temp();974let root = self.build_inner(&groups, &mut out, &mut max_depth, root, 1);975assert_eq!(root, 0, "root must be 0");976977let root = &out[0];978for i in 0..8 {979if root.child_counts[i] == 0 {980break;981}982983aabb = aabb.merge(&Aabb3d::new(984root.aabbs[i].center,985root.aabbs[i].half_extent,986));987}988}989990let mut reachable = vec![false; meshlets.meshlets.len()];991verify_bvh(&out, cull_data, &mut reachable, 0);992assert!(993reachable.iter().all(|&x| x),994"all meshlets must be reachable"995);996997(998out,999MeshletAabb {1000center: aabb.center().into(),1001half_extent: aabb.half_size().into(),1002},1003max_depth,1004)1005}1006}10071008fn verify_bvh(1009out: &[BvhNode],1010cull_data: &[TempMeshletCullData],1011reachable: &mut [bool],1012node: u32,1013) {1014let node = &out[node as usize];1015for i in 0..8 {1016let sphere = node.lod_bounds[i];1017let error = node.aabbs[i].error;1018if node.child_counts[i] == u8::MAX {1019let child = &out[node.aabbs[i].child_offset as usize];1020for i in 0..8 {1021if child.child_counts[i] == 0 {1022break;1023}1024assert!(1025child.aabbs[i].error <= error,1026"BVH errors are not monotonic"1027);1028let sphere_error = (sphere.center - child.lod_bounds[i].center).length()1029- (sphere.radius - child.lod_bounds[i].radius);1030assert!(1031sphere_error <= 0.0001,1032"BVH lod spheres are not monotonic ({sphere_error})"1033);1034}1035verify_bvh(out, cull_data, reachable, node.aabbs[i].child_offset);1036} else {1037for m in 0..node.child_counts[i] as u32 {1038let mid = (m + node.aabbs[i].child_offset) as usize;1039let meshlet = &cull_data[mid];1040assert!(meshlet.error <= error, "meshlet errors are not monotonic");1041let sphere_error = (Vec3A::from(sphere.center) - meshlet.lod_group_sphere.center)1042.length()1043- (sphere.radius - meshlet.lod_group_sphere.radius());1044assert!(1045sphere_error <= 0.0001,1046"meshlet lod spheres are not monotonic: ({sphere_error})"1047);1048reachable[mid] = true;1049}1050}1051}1052}10531054fn aabb_default() -> Aabb3d {1055Aabb3d {1056min: Vec3A::INFINITY,1057max: Vec3A::NEG_INFINITY,1058}1059}10601061fn aabb_to_meshlet(aabb: Aabb3d, error: f32, child_offset: u32) -> MeshletAabbErrorOffset {1062MeshletAabbErrorOffset {1063center: aabb.center().into(),1064error,1065half_extent: aabb.half_size().into(),1066child_offset,1067}1068}10691070fn sphere_to_meshlet(sphere: BoundingSphere) -> MeshletBoundingSphere {1071MeshletBoundingSphere {1072center: sphere.center.into(),1073radius: sphere.radius(),1074}1075}10761077// TODO: Precise encode variant1078fn octahedral_encode(v: Vec3) -> Vec2 {1079let n = v / (v.x.abs() + v.y.abs() + v.z.abs());1080let octahedral_wrap = (1.0 - n.yx().abs())1081* Vec2::new(1082if n.x >= 0.0 { 1.0 } else { -1.0 },1083if n.y >= 0.0 { 1.0 } else { -1.0 },1084);1085if n.z >= 0.0 {1086n.xy()1087} else {1088octahedral_wrap1089}1090}10911092// https://www.w3.org/TR/WGSL/#pack2x16snorm-builtin1093fn pack2x16snorm(v: Vec2) -> u32 {1094let v = v.clamp(Vec2::NEG_ONE, Vec2::ONE);1095let v = (v * 32767.0 + 0.5).floor().as_i16vec2();1096bytemuck::cast(v)1097}10981099/// An error produced by [`MeshletMesh::from_mesh`].1100#[derive(Error, Debug)]1101pub enum MeshToMeshletMeshConversionError {1102#[error("Mesh primitive topology is not TriangleList")]1103WrongMeshPrimitiveTopology,1104#[error("Mesh vertex attributes are not {{POSITION, NORMAL, UV_0}}: {0:?}")]1105WrongMeshVertexAttributes(Vec<String>),1106#[error("Mesh has no indices")]1107MeshMissingIndices,1108}110911101111