Skip to content

Commit

Permalink
Optimize clustered lighting shaders compilation time on Windows platf…
Browse files Browse the repository at this point in the history
…orm (#5012)

Co-authored-by: Martin Valigursky <mvaligursky@snapchat.com>
  • Loading branch information
mvaligursky and Martin Valigursky authored Jan 27, 2023
1 parent df84913 commit 2a39488
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 83 deletions.
4 changes: 2 additions & 2 deletions examples/src/examples/graphics/shader-compile.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ class ShaderCompileExample {
camera.lookAt(0, 0, 0);

// generate a grid of spheres, each with a unique material / shader
for (let x = -10; x <= 10; x += 4) {
for (let y = -10; y <= 10; y += 4) {
for (let x = -10; x <= 10; x += 6) {
for (let y = -10; y <= 10; y += 6) {
const pos = new pc.Vec3(x, 0.6, y);
const color = new pc.Color(0.3 + Math.random() * 0.7, 0.3 + Math.random() * 0.7, 0.3 + Math.random() * 0.7);
createPrimitive("sphere", pos, new pc.Vec3(1, 1, 1), color, assets, true);
Expand Down
21 changes: 21 additions & 0 deletions src/platform/graphics/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,27 @@ export const typedArrayToType = {
export const typedArrayIndexFormats = [Uint8Array, Uint16Array, Uint32Array];
export const typedArrayIndexFormatsByteSize = [1, 2, 4];

// map of engine PIXELFORMAT_*** enums to the pixel byte size
export const pixelFormatByteSizes = [];
pixelFormatByteSizes[PIXELFORMAT_A8] = 1;
pixelFormatByteSizes[PIXELFORMAT_L8] = 1;
pixelFormatByteSizes[PIXELFORMAT_LA8] = 2;
pixelFormatByteSizes[PIXELFORMAT_RGB565] = 2;
pixelFormatByteSizes[PIXELFORMAT_RGBA5551] = 2;
pixelFormatByteSizes[PIXELFORMAT_RGBA4] = 2;
pixelFormatByteSizes[PIXELFORMAT_RGB8] = 4;
pixelFormatByteSizes[PIXELFORMAT_RGBA8] = 4;
pixelFormatByteSizes[PIXELFORMAT_RGB16F] = 8;
pixelFormatByteSizes[PIXELFORMAT_RGBA16F] = 8;
pixelFormatByteSizes[PIXELFORMAT_RGB32F] = 16;
pixelFormatByteSizes[PIXELFORMAT_RGBA32F] = 16;
pixelFormatByteSizes[PIXELFORMAT_R32F] = 4;
pixelFormatByteSizes[PIXELFORMAT_DEPTH] = 4; // can be smaller using WebGL1 extension?
pixelFormatByteSizes[PIXELFORMAT_DEPTHSTENCIL] = 4;
pixelFormatByteSizes[PIXELFORMAT_111110F] = 4;
pixelFormatByteSizes[PIXELFORMAT_SRGB] = 4;
pixelFormatByteSizes[PIXELFORMAT_SRGBA] = 4;

/**
* Map of engine semantics into location on device in range 0..15 (note - semantics mapping to the
* same location cannot be used at the same time) organized in a way that ATTR0-ATTR7 do not
Expand Down
29 changes: 3 additions & 26 deletions src/platform/graphics/texture.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ import { math } from '../../core/math/math.js';

import {
isCompressedPixelFormat,
pixelFormatByteSizes,
ADDRESS_REPEAT,
FILTER_LINEAR, FILTER_LINEAR_MIPMAP_LINEAR,
FUNC_LESS,
PIXELFORMAT_A8, PIXELFORMAT_L8, PIXELFORMAT_LA8, PIXELFORMAT_RGB565, PIXELFORMAT_RGBA5551, PIXELFORMAT_RGBA4,
PIXELFORMAT_RGB8, PIXELFORMAT_RGBA8, PIXELFORMAT_DXT1, PIXELFORMAT_DXT3, PIXELFORMAT_DXT5,
PIXELFORMAT_RGB16F, PIXELFORMAT_RGBA16F, PIXELFORMAT_RGB32F, PIXELFORMAT_RGBA32F, PIXELFORMAT_R32F, PIXELFORMAT_DEPTH,
PIXELFORMAT_DEPTHSTENCIL, PIXELFORMAT_111110F, PIXELFORMAT_SRGB, PIXELFORMAT_SRGBA, PIXELFORMAT_ETC1,
PIXELFORMAT_RGB16F, PIXELFORMAT_RGBA16F, PIXELFORMAT_RGB32F, PIXELFORMAT_RGBA32F, PIXELFORMAT_ETC1,
PIXELFORMAT_ETC2_RGB, PIXELFORMAT_ETC2_RGBA, PIXELFORMAT_PVRTC_2BPP_RGB_1, PIXELFORMAT_PVRTC_2BPP_RGBA_1,
PIXELFORMAT_PVRTC_4BPP_RGB_1, PIXELFORMAT_PVRTC_4BPP_RGBA_1, PIXELFORMAT_ASTC_4x4, PIXELFORMAT_ATC_RGB,
PIXELFORMAT_ATC_RGBA,
Expand All @@ -20,7 +20,6 @@ import {
TEXTURETYPE_DEFAULT, TEXTURETYPE_RGBM, TEXTURETYPE_RGBE, TEXTURETYPE_RGBP, TEXTURETYPE_SWIZZLEGGGR
} from './constants.js';

let _pixelSizeTable = null;
let _blockSizeTable = null;

let id = 0;
Expand Down Expand Up @@ -679,28 +678,6 @@ class Texture {
* @ignore
*/
static calcGpuSize(width, height, depth, format, mipmaps, cubemap) {
if (!_pixelSizeTable) {
_pixelSizeTable = [];
_pixelSizeTable[PIXELFORMAT_A8] = 1;
_pixelSizeTable[PIXELFORMAT_L8] = 1;
_pixelSizeTable[PIXELFORMAT_LA8] = 2;
_pixelSizeTable[PIXELFORMAT_RGB565] = 2;
_pixelSizeTable[PIXELFORMAT_RGBA5551] = 2;
_pixelSizeTable[PIXELFORMAT_RGBA4] = 2;
_pixelSizeTable[PIXELFORMAT_RGB8] = 4;
_pixelSizeTable[PIXELFORMAT_RGBA8] = 4;
_pixelSizeTable[PIXELFORMAT_RGB16F] = 8;
_pixelSizeTable[PIXELFORMAT_RGBA16F] = 8;
_pixelSizeTable[PIXELFORMAT_RGB32F] = 16;
_pixelSizeTable[PIXELFORMAT_RGBA32F] = 16;
_pixelSizeTable[PIXELFORMAT_R32F] = 4;
_pixelSizeTable[PIXELFORMAT_DEPTH] = 4; // can be smaller using WebGL1 extension?
_pixelSizeTable[PIXELFORMAT_DEPTHSTENCIL] = 4;
_pixelSizeTable[PIXELFORMAT_111110F] = 4;
_pixelSizeTable[PIXELFORMAT_SRGB] = 4;
_pixelSizeTable[PIXELFORMAT_SRGBA] = 4;
}

if (!_blockSizeTable) {
_blockSizeTable = [];
_blockSizeTable[PIXELFORMAT_ETC1] = 8;
Expand All @@ -718,7 +695,7 @@ class Texture {
_blockSizeTable[PIXELFORMAT_ATC_RGBA] = 16;
}

const pixelSize = _pixelSizeTable.hasOwnProperty(format) ? _pixelSizeTable[format] : 0;
const pixelSize = pixelFormatByteSizes[format] ?? 0;
const blockSize = _blockSizeTable.hasOwnProperty(format) ? _blockSizeTable[format] : 0;
let result = 0;

Expand Down
19 changes: 13 additions & 6 deletions src/platform/graphics/webgpu/webgpu-texture.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Debug, DebugHelper } from '../../../core/debug.js';

import {
pixelFormatByteSizes,
ADDRESS_REPEAT, ADDRESS_CLAMP_TO_EDGE, ADDRESS_MIRRORED_REPEAT,
PIXELFORMAT_A8, PIXELFORMAT_L8, PIXELFORMAT_LA8, PIXELFORMAT_RGB565, PIXELFORMAT_RGBA5551, PIXELFORMAT_RGBA4,
PIXELFORMAT_RGB8, PIXELFORMAT_RGBA8, PIXELFORMAT_DXT1, PIXELFORMAT_DXT3, PIXELFORMAT_DXT5,
Expand All @@ -13,9 +14,9 @@ import {

// map of PIXELFORMAT_*** to GPUTextureFormat
const gpuTextureFormats = [];
gpuTextureFormats[PIXELFORMAT_A8] = '';
gpuTextureFormats[PIXELFORMAT_L8] = '';
gpuTextureFormats[PIXELFORMAT_LA8] = '';
gpuTextureFormats[PIXELFORMAT_A8] = 'r8unorm';
gpuTextureFormats[PIXELFORMAT_L8] = 'r8unorm';
gpuTextureFormats[PIXELFORMAT_LA8] = 'rg8unorm';
gpuTextureFormats[PIXELFORMAT_RGB565] = '';
gpuTextureFormats[PIXELFORMAT_RGBA5551] = '';
gpuTextureFormats[PIXELFORMAT_RGBA4] = '';
Expand Down Expand Up @@ -265,13 +266,19 @@ class WebgpuTexture {
mipLevel: 0
};

// TODO: RGBA only for now, needs to be more generic
const numElementsPerPixel = 4;
// TODO: handle update to mipmap levels other than 0
const pixelSize = pixelFormatByteSizes[texture.format] ?? 0;
Debug.assert(pixelSize);
const bytesPerRow = texture.width * pixelSize;
const byteSize = bytesPerRow * texture.height;

Debug.assert(byteSize === data.byteLength,
`Error uploading data to texture, the data byte size of ${data.byteLength} does not match required ${byteSize}`, texture);

// type {GPUImageDataLayout}
const dataLayout = {
offset: 0,
bytesPerRow: texture.width * data.BYTES_PER_ELEMENT * numElementsPerPixel,
bytesPerRow: bytesPerRow,
rowsPerImage: texture.height
};

Expand Down
41 changes: 18 additions & 23 deletions src/scene/lighting/world-clusters.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Vec3 } from '../../core/math/vec3.js';
import { math } from '../../core/math/math.js';
import { BoundingBox } from '../../core/shape/bounding-box.js';
import { PIXELFORMAT_RGBA8 } from '../../platform/graphics/constants.js';
import { PIXELFORMAT_A8 } from '../../platform/graphics/constants.js';
import { LIGHTTYPE_DIRECTIONAL, LIGHTTYPE_SPOT, MASK_AFFECT_DYNAMIC, MASK_AFFECT_LIGHTMAPPED } from '../constants.js';
import { LightsBuffer } from './lights-buffer.js';
import { Debug } from '../../core/debug.js';
Expand Down Expand Up @@ -29,6 +29,9 @@ class ClusterLight {
// Main class implementing clustered lighting. Internally it organizes the omni / spot lights placement in world space 3d cell structure,
// and also uses LightsBuffer class to store light properties in textures
class WorldClusters {
/** @type {import('../../platform/graphics/texture.js').Texture} */
clusterTexture;

constructor(device) {
this.device = device;
this.name = 'Untitled';
Expand All @@ -46,9 +49,7 @@ class WorldClusters {
this._cellsLimit = new Vec3(); // number of cells minus one
this.cells = this._cells;

// number of lights each cell can store, and number of pixels this takes (4 lights per pixel)
this._maxCellLightCount = 0;
this._pixelsPerCellCount = 0;
// number of lights each cell can store
this.maxCellLightCount = 4;

// limits on some light properties, used for compression to 8bit texture
Expand All @@ -70,11 +71,8 @@ class WorldClusters {

set maxCellLightCount(count) {

// each cell stores 4 lights (xyzw), so round up the count
const maxCellLightCount = math.roundUp(count, 4);
if (maxCellLightCount !== this._maxCellLightCount) {
this._maxCellLightCount = maxCellLightCount;
this._pixelsPerCellCount = this._maxCellLightCount / 4;
if (count !== this._maxCellLightCount) {
this._maxCellLightCount = count;
this._cellsDirty = true;
}
}
Expand Down Expand Up @@ -118,7 +116,6 @@ class WorldClusters {
this._clusterMaxCellsId = device.scope.resolve('clusterMaxCells');

this._clusterWorldTextureId = device.scope.resolve('clusterWorldTexture');
this._clusterPixelsPerCellId = device.scope.resolve('clusterPixelsPerCell');

this._clusterTextureSizeId = device.scope.resolve('clusterTextureSize');
this._clusterTextureSizeData = new Float32Array(3);
Expand Down Expand Up @@ -165,13 +162,13 @@ class WorldClusters {
const cy = this._cells.y;
const cz = this._cells.z;

// storing 4 lights per pixels
// storing 1 light per pixel
const numCells = cx * cy * cz;
const totalPixels = this._pixelsPerCellCount * numCells;
const totalPixels = this.maxCellLightCount * numCells;

// cluster texture size - roughly square that fits all cells. The width is multiply of numPixels to simplify shader math
let width = Math.ceil(Math.sqrt(totalPixels));
width = math.roundUp(width, this._pixelsPerCellCount);
width = math.roundUp(width, this.maxCellLightCount);
const height = Math.ceil(totalPixels / width);

// if the texture is allowed size
Expand All @@ -184,20 +181,20 @@ class WorldClusters {
this._clusterCellsMaxData[2] = cz;

// vector to allow single dot product to convert from world coordinates to cluster index
this._clusterCellsDotData[0] = this._pixelsPerCellCount;
this._clusterCellsDotData[1] = cx * cz * this._pixelsPerCellCount;
this._clusterCellsDotData[2] = cx * this._pixelsPerCellCount;
this._clusterCellsDotData[0] = this.maxCellLightCount;
this._clusterCellsDotData[1] = cx * cz * this.maxCellLightCount;
this._clusterCellsDotData[2] = cx * this.maxCellLightCount;

// cluster data and number of lights per cell
this.clusters = new Uint8ClampedArray(4 * totalPixels);
this.clusters = new Uint8ClampedArray(totalPixels);
this.counts = new Int32Array(numCells);

this._clusterTextureSizeData[0] = width;
this._clusterTextureSizeData[1] = 1.0 / width;
this._clusterTextureSizeData[2] = 1.0 / height;

this.releaseClusterTexture();
this.clusterTexture = LightsBuffer.createTexture(this.device, width, height, PIXELFORMAT_RGBA8, 'ClusterTexture');
this.clusterTexture = LightsBuffer.createTexture(this.device, width, height, PIXELFORMAT_A8, 'ClusterTexture');
}
}

Expand All @@ -217,7 +214,7 @@ class WorldClusters {
this._clusterWorldTextureId.setValue(this.clusterTexture);

// uniform values
this._clusterMaxCellsId.setValue(this._pixelsPerCellCount);
this._clusterMaxCellsId.setValue(this.maxCellLightCount);

const boundsDelta = this.boundsDelta;
this._clusterCellsCountByBoundsSizeData[0] = this._cells.x / boundsDelta.x;
Expand All @@ -237,7 +234,6 @@ class WorldClusters {
this._clusterCompressionLimit0Data[1] = this._maxColorValue;

// assign values
this._clusterPixelsPerCellId.setValue(this._pixelsPerCellCount);
this._clusterTextureSizeId.setValue(this._clusterTextureSizeData);
this._clusterBoundsMinId.setValue(this._clusterBoundsMinData);
this._clusterBoundsDeltaId.setValue(this._clusterBoundsDeltaData);
Expand Down Expand Up @@ -381,7 +377,7 @@ class WorldClusters {
const counts = this.counts;
const limit = this._maxCellLightCount;
const clusters = this.clusters;
const pixelsPerCellCount = this._pixelsPerCellCount;
const pixelsPerCellCount = this.maxCellLightCount;
let tooManyLights = false;

// started from index 1, zero is "no-light" index
Expand Down Expand Up @@ -411,13 +407,12 @@ class WorldClusters {
const clusterIndex = x + divX * (z + y * divZ);
const count = counts[clusterIndex];
if (count < limit) {
clusters[pixelsPerCellCount * clusterIndex * 4 + count] = i;
clusters[pixelsPerCellCount * clusterIndex + count] = i;
counts[clusterIndex] = count + 1;

} else {
tooManyLights = true;
}

}
}
}
Expand Down
37 changes: 11 additions & 26 deletions src/scene/shader-lib/chunks/lit/frag/clusteredLight.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ uniform highp sampler2D lightsTextureFloat;
#ifdef GL2
uniform int clusterMaxCells;
#else
uniform float clusterMaxCells;
uniform vec4 lightsTextureInvSize;
#endif
uniform float clusterPixelsPerCell;
uniform vec3 clusterCellsCountByBoundsSize;
uniform vec3 clusterTextureSize;
uniform vec3 clusterBoundsMin;
Expand Down Expand Up @@ -592,43 +592,28 @@ void addClusteredLights() {
// loop over maximum number of light cells
for (int lightCellIndex = 0; lightCellIndex < clusterMaxCells; lightCellIndex++) {
vec4 lightIndices = texelFetch(clusterWorldTexture, ivec2(int(clusterU) + lightCellIndex, clusterV), 0);
// evaluate up to 4 lights. This is written using a loop instead of manually unrolling to keep shader compile time smaller
vec4 indices = lightIndices * 255.0;
for (int i = 0; i < 4; i++) {
if (indices.x <= 0.0)
return;
evaluateClusterLight(indices.x);
indices = indices.yzwx;
}
// using a single channel texture with data in alpha channel
float lightIndex = texelFetch(clusterWorldTexture, ivec2(int(clusterU) + lightCellIndex, clusterV), 0).a;
evaluateClusterLight(lightIndex * 255.0);
}
#else
clusterV = (clusterV + 0.5) * clusterTextureSize.z;
// loop over maximum possible number of supported light cells
const float maxLightCells = 256.0 / 4.0; // 8 bit index, each stores 4 lights
const float maxLightCells = 256.0;
for (float lightCellIndex = 0.5; lightCellIndex < maxLightCells; lightCellIndex++) {
vec4 lightIndices = texture2DLodEXT(clusterWorldTexture, vec2(clusterTextureSize.y * (clusterU + lightCellIndex), clusterV), 0.0);
vec4 indices = lightIndices * 255.0;
float lightIndex = texture2DLodEXT(clusterWorldTexture, vec2(clusterTextureSize.y * (clusterU + lightCellIndex), clusterV), 0.0).a;
// evaluate up to 4 lights. This is written using a loop instead of manually unrolling to keep shader compile time smaller
for (int i = 0; i < 4; i++) {
if (indices.x <= 0.0)
return;
evaluateClusterLight(indices.x);
indices = indices.yzwx;
}
if (lightIndex <= 0.0)
return;
evaluateClusterLight(lightIndex * 255.0);
// end of the cell array
if (lightCellIndex > clusterPixelsPerCell) {
if (lightCellIndex >= clusterMaxCells) {
break;
}
}
Expand Down

0 comments on commit 2a39488

Please sign in to comment.