Skip to content

Commit

Permalink
Meshlet fix software rasterization (#16049)
Browse files Browse the repository at this point in the history
# Objective
1. Prevent weird glitches with stray pixels scattered around the scene

![image](https://github.com/user-attachments/assets/f12adb38-5996-4dc7-bea6-bd326b7317e1)
2. Prevent weird glitchy full-screen triangles that pop-up and destroy
perf (SW rasterizing huge triangles is slow)

![image](https://github.com/user-attachments/assets/d3705427-13a5-47bc-a54b-756f0409da0b)

## Solution
1. Use floating point math in the SW rasterizer bounding box calculation
to handle negative verticss, and add backface culling
2. Force hardware raster for clusters that clip the near plane, and let
the hardware rasterizer handle the clipping

I also adjusted the SW rasterizer threshold to < 64 pixels (little bit
better perf in my test scene, but still need to do a more comprehensive
test), and enabled backface culling for the hardware raster pipeline.

## Testing

- Did you test these changes? If so, how?
  - Yes, on an example scene. Issues no longer occur.
- Are there any parts that need more testing?
  - No.
- How can other people (reviewers) test your changes? Is there anything
specific they need to know?
  - Run the meshlet example.
  • Loading branch information
JMS55 authored and mockersf committed Oct 22, 2024
1 parent e514295 commit 730e85b
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 39 deletions.
8 changes: 5 additions & 3 deletions crates/bevy_pbr/src/meshlet/cull_clusters.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,13 @@ fn cull_clusters(
aabb_width_pixels = (aabb.z - aabb.x) * view.viewport.z;
aabb_height_pixels = (aabb.w - aabb.y) * view.viewport.w;
#endif
let cluster_is_small = all(vec2(aabb_width_pixels, aabb_height_pixels) < vec2(32.0)); // TODO: Nanite does something different. Come up with my own heuristic.
let cluster_is_small = all(vec2(aabb_width_pixels, aabb_height_pixels) < vec2(64.0));

// Let the hardware rasterizer handle near-plane clipping
let not_intersects_near_plane = dot(view.frustum[4u], culling_bounding_sphere_center) > culling_bounding_sphere_radius;

// TODO: Also check if needs depth clipping
var buffer_slot: u32;
if cluster_is_small {
if cluster_is_small && not_intersects_near_plane {
// Append this cluster to the list for software rasterization
buffer_slot = atomicAdd(&meshlet_software_raster_indirect_args.x, 1u);
} else {
Expand Down
6 changes: 3 additions & 3 deletions crates/bevy_pbr/src/meshlet/pipelines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ impl FromWorld for MeshletPipelines {
topology: PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: FrontFace::Ccw,
cull_mode: None,
cull_mode: Some(Face::Back),
unclipped_depth: false,
polygon_mode: PolygonMode::Fill,
conservative: false,
Expand Down Expand Up @@ -292,7 +292,7 @@ impl FromWorld for MeshletPipelines {
topology: PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: FrontFace::Ccw,
cull_mode: None,
cull_mode: Some(Face::Back),
unclipped_depth: false,
polygon_mode: PolygonMode::Fill,
conservative: false,
Expand Down Expand Up @@ -336,7 +336,7 @@ impl FromWorld for MeshletPipelines {
topology: PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: FrontFace::Ccw,
cull_mode: None,
cull_mode: Some(Face::Back),
unclipped_depth: false,
polygon_mode: PolygonMode::Fill,
conservative: false,
Expand Down
67 changes: 34 additions & 33 deletions crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

/// Compute shader for rasterizing small clusters into a visibility buffer.

// TODO: Subpixel precision and top-left rule
// TODO: Fixed-point math and top-left rule

var<workgroup> viewport_vertices: array<vec3f, 255>;

Expand Down Expand Up @@ -79,98 +79,99 @@ fn rasterize_cluster(
let vertex_2 = viewport_vertices[vertex_ids[0]];
let packed_ids = (cluster_id << 7u) | triangle_id;

// Compute triangle bounding box
let min_x = u32(min3(vertex_0.x, vertex_1.x, vertex_2.x));
let min_y = u32(min3(vertex_0.y, vertex_1.y, vertex_2.y));
var max_x = u32(ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x)));
var max_y = u32(ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y)));
max_x = min(max_x, u32(view.viewport.z) - 1u);
max_y = min(max_y, u32(view.viewport.w) - 1u);
if any(vec2(min_x, min_y) > vec2(max_x, max_y)) { return; }
// Backface culling
let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy);
if triangle_double_area <= 0.0 { return; }

// Setup triangle gradients
let w_x = vec3(vertex_1.y - vertex_2.y, vertex_2.y - vertex_0.y, vertex_0.y - vertex_1.y);
let w_y = vec3(vertex_2.x - vertex_1.x, vertex_0.x - vertex_2.x, vertex_1.x - vertex_0.x);
let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy); // TODO: Reuse earlier calculations and take advantage of summing to 1
let vertices_z = vec3(vertex_0.z, vertex_1.z, vertex_2.z) / triangle_double_area;
let z_x = dot(vertices_z, w_x);
let z_y = dot(vertices_z, w_y);

// Compute triangle bounding box
var min_x = floor(min3(vertex_0.x, vertex_1.x, vertex_2.x));
var min_y = floor(min3(vertex_0.y, vertex_1.y, vertex_2.y));
var max_x = ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x));
var max_y = ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y));
min_x = max(min_x, 0.0);
min_y = max(min_y, 0.0);
max_x = min(max_x, view.viewport.z - 1.0);
max_y = min(max_y, view.viewport.w - 1.0);

// Setup initial triangle equations
let starting_pixel = vec2(f32(min_x), f32(min_y)) + 0.5;
let starting_pixel = vec2(min_x, min_y) + 0.5;
var w_row = vec3(
// TODO: Reuse earlier calculations and take advantage of summing to 1
edge_function(vertex_1.xy, vertex_2.xy, starting_pixel),
edge_function(vertex_2.xy, vertex_0.xy, starting_pixel),
edge_function(vertex_0.xy, vertex_1.xy, starting_pixel),
);
var z_row = dot(vertices_z, w_row);
let view_width = u32(view.viewport.z);
var frag_coord_1d_row = min_y * view_width;

// Rasterize triangle
if subgroupAny(max_x - min_x > 4u) {
if subgroupAny(max_x - min_x > 4.0) {
// Scanline setup
let edge_012 = -w_x;
let open_edge = edge_012 < vec3(0.0);
let inverse_edge_012 = select(1.0 / edge_012, vec3(1e8), edge_012 == vec3(0.0));
let max_x_diff = vec3<f32>(max_x - min_x);
for (var y = min_y; y <= max_y; y++) {
let max_x_diff = vec3(max_x - min_x);
for (var y = min_y; y <= max_y; y += 1.0) {
// Calculate start and end X interval for pixels in this row within the triangle
let cross_x = w_row * inverse_edge_012;
let min_x2 = select(vec3(0.0), cross_x, open_edge);
let max_x2 = select(cross_x, max_x_diff, open_edge);
var x0 = u32(ceil(max3(min_x2[0], min_x2[1], min_x2[2])));
var x1 = u32(min3(max_x2[0], max_x2[1], max_x2[2]));
var x0 = ceil(max3(min_x2[0], min_x2[1], min_x2[2]));
var x1 = min3(max_x2[0], max_x2[1], max_x2[2]);

var w = w_row + w_x * f32(x0);
var z = z_row + z_x * f32(x0);
var w = w_row + w_x * x0;
var z = z_row + z_x * x0;
x0 += min_x;
x1 += min_x;

// Iterate scanline X interval
for (var x = x0; x <= x1; x++) {
for (var x = x0; x <= x1; x += 1.0) {
// Check if point at pixel is within triangle (TODO: this shouldn't be needed, but there's bugs without it)
if min3(w[0], w[1], w[2]) >= 0.0 {
write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids);
write_visibility_buffer_pixel(x, y, z, packed_ids);
}

// Increment edge functions along the X-axis
// Increment triangle equations along the X-axis
w += w_x;
z += z_x;
}

// Increment edge functions along the Y-axis
// Increment triangle equations along the Y-axis
w_row += w_y;
z_row += z_y;
frag_coord_1d_row += view_width;
}
} else {
// Iterate over every pixel in the triangle's bounding box
for (var y = min_y; y <= max_y; y++) {
for (var y = min_y; y <= max_y; y += 1.0) {
var w = w_row;
var z = z_row;

for (var x = min_x; x <= max_x; x++) {
for (var x = min_x; x <= max_x; x += 1.0) {
// Check if point at pixel is within triangle
if min3(w[0], w[1], w[2]) >= 0.0 {
write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids);
write_visibility_buffer_pixel(x, y, z, packed_ids);
}

// Increment edge functions along the X-axis
// Increment triangle equations along the X-axis
w += w_x;
z += z_x;
}

// Increment edge functions along the Y-axis
// Increment triangle equations along the Y-axis
w_row += w_y;
z_row += z_y;
frag_coord_1d_row += view_width;
}
}
}

fn write_visibility_buffer_pixel(frag_coord_1d: u32, z: f32, packed_ids: u32) {
fn write_visibility_buffer_pixel(x: f32, y: f32, z: f32, packed_ids: u32) {
let frag_coord_1d = u32(y * view.viewport.z + x);

#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
let depth = bitcast<u32>(z);
let visibility = (u64(depth) << 32u) | u64(packed_ids);
Expand Down

0 comments on commit 730e85b

Please sign in to comment.