Skip to content

Commit

Permalink
sokol_gfx.h: start implementing storage buffer support
Browse files Browse the repository at this point in the history
  • Loading branch information
floooh committed Mar 5, 2024
1 parent 55dff3d commit cf2b5e3
Showing 1 changed file with 126 additions and 5 deletions.
131 changes: 126 additions & 5 deletions sokol_gfx.h
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,7 @@ enum {
SG_MAX_SHADERSTAGE_IMAGES = 12,
SG_MAX_SHADERSTAGE_SAMPLERS = 8,
SG_MAX_SHADERSTAGE_IMAGESAMPLERPAIRS = 12,
SG_MAX_SHADERSTAGE_STORAGE_BUFFERS = 4, // FIXME: bump to 8?
SG_MAX_SHADERSTAGE_UBS = 4,
SG_MAX_UB_MEMBERS = 16,
SG_MAX_VERTEX_ATTRIBUTES = 16,
Expand Down Expand Up @@ -1700,6 +1701,7 @@ typedef struct sg_features {
bool image_clamp_to_border; // border color and clamp-to-border UV-wrap mode is supported
bool mrt_independent_blend_state; // multiple-render-target rendering can use per-render-target blend state
bool mrt_independent_write_mask; // multiple-render-target rendering can use per-render-target color write masks
bool storage_buffer; // storage buffers are supported
} sg_features;

/*
Expand Down Expand Up @@ -1798,6 +1800,7 @@ typedef enum sg_buffer_type {
_SG_BUFFERTYPE_DEFAULT, // value 0 reserved for default-init
SG_BUFFERTYPE_VERTEXBUFFER,
SG_BUFFERTYPE_INDEXBUFFER,
SG_BUFFERTYPE_STORAGEBUFFER,
_SG_BUFFERTYPE_NUM,
_SG_BUFFERTYPE_FORCE_U32 = 0x7FFFFFFF
} sg_buffer_type;
Expand Down Expand Up @@ -2575,6 +2578,7 @@ typedef struct sg_pass {
typedef struct sg_stage_bindings {
sg_image images[SG_MAX_SHADERSTAGE_IMAGES];
sg_sampler samplers[SG_MAX_SHADERSTAGE_SAMPLERS];
sg_buffer storage_buffers[SG_MAX_SHADERSTAGE_STORAGE_BUFFERS];
} sg_stage_bindings;

typedef struct sg_bindings {
Expand Down Expand Up @@ -2846,6 +2850,12 @@ typedef struct sg_shader_uniform_block_desc {
sg_shader_uniform_desc uniforms[SG_MAX_UB_MEMBERS];
} sg_shader_uniform_block_desc;

typedef struct sg_shader_storage_buffer_desc {
// FIXME: this should probably be '.used', because storage buffers
// are not limited to arrays (like for instance D3D's StructuredBuffer)
size_t item_size;
} sg_shader_storage_buffer_desc;

typedef struct sg_shader_image_desc {
bool used;
bool multisampled;
Expand All @@ -2871,6 +2881,7 @@ typedef struct sg_shader_stage_desc {
const char* entry;
const char* d3d11_target;
sg_shader_uniform_block_desc uniform_blocks[SG_MAX_SHADERSTAGE_UBS];
sg_shader_storage_buffer_desc storage_buffers[SG_MAX_SHADERSTAGE_STORAGE_BUFFERS];
sg_shader_image_desc images[SG_MAX_SHADERSTAGE_IMAGES];
sg_shader_sampler_desc samplers[SG_MAX_SHADERSTAGE_SAMPLERS];
sg_shader_image_sampler_pair_desc image_sampler_pairs[SG_MAX_SHADERSTAGE_IMAGESAMPLERPAIRS];
Expand Down Expand Up @@ -3634,6 +3645,10 @@ typedef struct sg_frame_stats {
_SG_LOGITEM_XMACRO(VALIDATE_ABND_VS_EXPECTED_NONFILTERING_SAMPLER, "sg_apply_bindings: shader expected SG_SAMPLERTYPE_NONFILTERING on vertex stage, but sampler has SG_FILTER_LINEAR filters") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_VS_UNEXPECTED_SAMPLER_BINDING, "sg_apply_bindings: unexpected sampler binding on vertex stage") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_VS_SMP_EXISTS, "sg_apply_bindings: sampler bound to vertex stage no longer alive") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_VS_EXPECTED_STORAGEBUFFER_BINDING, "sg_apply_bindings: storage buffer binding on vertex stage is missing or the buffer handle is invalid") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_VS_STORAGEBUFFER_EXISTS, "sg_apply_bindings: storage buffer bound to vertex stage no longer alive") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_VS_STORAGEBUFFER_BINDING_BUFFERTYPE, "sg_apply_bindings: buffer bound to vertex stage storage buffer slot is not of type storage buffer") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_VS_UNEXPECTED_STORAGEBUFFER_BINDING, "sg_apply_bindings: unexpected storage buffer binding on vertex stage") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_EXPECTED_IMAGE_BINDING, "sg_apply_bindings: image binding on fragment stage is missing or the image handle is invalid") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_IMG_EXISTS, "sg_apply_bindings: image bound to fragment stage no longer alive") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_IMAGE_TYPE_MISMATCH, "sg_apply_bindings: type of image bound to fragment stage doesn't match shader desc") \
Expand All @@ -3647,6 +3662,10 @@ typedef struct sg_frame_stats {
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_EXPECTED_NONFILTERING_SAMPLER, "sg_apply_bindings: shader expected SG_SAMPLERTYPE_NONFILTERING on fragment stage, but sampler has SG_FILTER_LINEAR filters") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_UNEXPECTED_SAMPLER_BINDING, "sg_apply_bindings: unexpected sampler binding on fragment stage") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_SMP_EXISTS, "sg_apply_bindings: sampler bound to fragment stage no longer alive") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_EXPECTED_STORAGEBUFFER_BINDING, "sg_apply_bindings: storage buffer binding on fragment stage is missing or the buffer handle is invalid") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_STORAGEBUFFER_EXISTS, "sg_apply_bindings: storage buffer bound to fragment stage no longer alive") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_STORAGEBUFFER_BINDING_BUFFERTYPE, "sg_apply_bindings: buffer bound to frahment stage storage buffer slot is not of type storage buffer") \
_SG_LOGITEM_XMACRO(VALIDATE_ABND_FS_UNEXPECTED_STORAGEBUFFER_BINDING, "sg_apply_bindings: unexpected storage buffer binding on fragment stage") \
_SG_LOGITEM_XMACRO(VALIDATE_AUB_NO_PIPELINE, "sg_apply_uniforms: must be called after sg_apply_pipeline()") \
_SG_LOGITEM_XMACRO(VALIDATE_AUB_NO_UB_AT_SLOT, "sg_apply_uniforms: no uniform block declaration at this shader stage UB slot") \
_SG_LOGITEM_XMACRO(VALIDATE_AUB_SIZE, "sg_apply_uniforms: data size doesn't match declared uniform block size") \
Expand Down Expand Up @@ -4876,6 +4895,10 @@ typedef struct {
size_t size;
} _sg_shader_uniform_block_t;

typedef struct {
size_t item_size;
} _sg_shader_storage_buffer_t;

typedef struct {
sg_image_type image_type;
sg_image_sample_type sample_type;
Expand All @@ -4897,7 +4920,9 @@ typedef struct {
int num_images;
int num_samplers;
int num_image_samplers;
int num_storage_buffers;
_sg_shader_uniform_block_t uniform_blocks[SG_MAX_SHADERSTAGE_UBS];
_sg_shader_storage_buffer_t storage_buffers[SG_MAX_SHADERSTAGE_STORAGE_BUFFERS];
_sg_shader_image_t images[SG_MAX_SHADERSTAGE_IMAGES];
_sg_shader_sampler_t samplers[SG_MAX_SHADERSTAGE_SAMPLERS];
_sg_shader_image_sampler_t image_samplers[SG_MAX_SHADERSTAGE_IMAGESAMPLERPAIRS];
Expand Down Expand Up @@ -4952,6 +4977,15 @@ _SOKOL_PRIVATE void _sg_shader_common_init(_sg_shader_common_t* cmn, const sg_sh
stage->image_samplers[img_smp_index].sampler_slot = img_smp_desc->sampler_slot;
stage->num_image_samplers++;
}
SOKOL_ASSERT(stage->num_storage_buffers == 0);
for (int sbuf_index = 0; sbuf_index < SG_MAX_SHADERSTAGE_STORAGE_BUFFERS; sbuf_index++) {
const sg_shader_storage_buffer_desc* sbuf_desc = &stage_desc->storage_buffers[sbuf_index];
if (0 == sbuf_desc->item_size) {
break;
}
stage->storage_buffers[sbuf_index].item_size = sbuf_desc->item_size;
stage->num_storage_buffers++;
}
}
}

Expand Down Expand Up @@ -5672,16 +5706,20 @@ typedef struct {
int num_vbs;
int num_vs_imgs;
int num_vs_smps;
int num_vs_sbufs;
int num_fs_imgs;
int num_fs_smps;
int num_fs_sbufs;
int vb_offsets[SG_MAX_VERTEX_BUFFERS];
int ib_offset;
_sg_buffer_t* vbs[SG_MAX_VERTEX_BUFFERS];
_sg_buffer_t* ib;
_sg_image_t* vs_imgs[SG_MAX_SHADERSTAGE_IMAGES];
_sg_sampler_t* vs_smps[SG_MAX_SHADERSTAGE_SAMPLERS];
_sg_buffer_t* vs_sbufs[SG_MAX_SHADERSTAGE_STORAGE_BUFFERS];
_sg_image_t* fs_imgs[SG_MAX_SHADERSTAGE_IMAGES];
_sg_sampler_t* fs_smps[SG_MAX_SHADERSTAGE_SAMPLERS];
_sg_buffer_t* fs_sbufs[SG_MAX_SHADERSTAGE_STORAGE_BUFFERS];
} _sg_bindings_t;

typedef struct {
Expand Down Expand Up @@ -7409,6 +7447,7 @@ _SOKOL_PRIVATE void _sg_gl_init_caps_glcore33(void) {
_sg.features.image_clamp_to_border = true;
_sg.features.mrt_independent_blend_state = false;
_sg.features.mrt_independent_write_mask = true;
_sg.features.storage_buffer = false;

// scan extensions
bool has_s3tc = false; // BC1..BC3
Expand Down Expand Up @@ -7481,6 +7520,7 @@ _SOKOL_PRIVATE void _sg_gl_init_caps_gles3(void) {
_sg.features.image_clamp_to_border = false;
_sg.features.mrt_independent_blend_state = false;
_sg.features.mrt_independent_write_mask = false;
_sg.features.storage_buffer = false;

bool has_s3tc = false; // BC1..BC3
bool has_rgtc = false; // BC4 and BC5
Expand Down Expand Up @@ -9913,6 +9953,7 @@ _SOKOL_PRIVATE void _sg_d3d11_init_caps(void) {
_sg.features.image_clamp_to_border = true;
_sg.features.mrt_independent_blend_state = true;
_sg.features.mrt_independent_write_mask = true;
_sg.features.storage_buffer = false;

_sg.limits.max_image_size_2d = 16 * 1024;
_sg.limits.max_image_size_cube = 16 * 1024;
Expand Down Expand Up @@ -11579,6 +11620,7 @@ _SOKOL_PRIVATE void _sg_mtl_init_caps(void) {
_sg.features.origin_top_left = true;
_sg.features.mrt_independent_blend_state = true;
_sg.features.mrt_independent_write_mask = true;
_sg.features.storage_buffer = true;

_sg.features.image_clamp_to_border = false;
#if (MAC_OS_X_VERSION_MAX_ALLOWED >= 120000) || (__IPHONE_OS_VERSION_MAX_ALLOWED >= 140000)
Expand Down Expand Up @@ -12683,7 +12725,7 @@ _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) {
}
}

// apply vertex shader images
// apply vertex stage images
for (NSUInteger slot = 0; slot < (NSUInteger)bnd->num_vs_imgs; slot++) {
const _sg_image_t* img = bnd->vs_imgs[slot];
if (_sg.mtl.state_cache.cur_vs_image_ids[slot].id != img->slot.id) {
Expand All @@ -12694,7 +12736,7 @@ _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) {
}
}

// apply vertex shader samplers
// apply vertex stage samplers
for (NSUInteger slot = 0; slot < (NSUInteger)bnd->num_vs_smps; slot++) {
const _sg_sampler_t* smp = bnd->vs_smps[slot];
if (_sg.mtl.state_cache.cur_vs_sampler_ids[slot].id != smp->slot.id) {
Expand All @@ -12705,7 +12747,17 @@ _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) {
}
}

// apply fragment shader images
// apply vertex stage storage buffers
// FIXME: move start slot after UBs (?)
// FIXME: caching
for (NSUInteger slot = 0; slot < (NSUInteger)bnd->num_vs_sbufs; slot++) {
const _sg_buffer_t* sbuf = bnd->vs_sbufs[slot];
const NSUInteger mtl_slot = SG_MAX_SHADERSTAGE_UBS + SG_MAX_VERTEX_BUFFERS + slot;
[_sg.mtl.cmd_encoder setVertexBuffer:_sg_mtl_id(sbuf->mtl.buf[sbuf->cmn.active_slot]) offset:0 atIndex:mtl_slot];
_sg_stats_add(metal.bindings.num_set_vertex_buffer, 1);
}

// apply fragment stage images
for (NSUInteger slot = 0; slot < (NSUInteger)bnd->num_fs_imgs; slot++) {
const _sg_image_t* img = bnd->fs_imgs[slot];
if (_sg.mtl.state_cache.cur_fs_image_ids[slot].id != img->slot.id) {
Expand All @@ -12716,7 +12768,7 @@ _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) {
}
}

// apply fragment shader samplers
// apply fragment stage samplers
for (NSUInteger slot = 0; slot < (NSUInteger)bnd->num_fs_smps; slot++) {
const _sg_sampler_t* smp = bnd->fs_smps[slot];
if (_sg.mtl.state_cache.cur_fs_sampler_ids[slot].id != smp->slot.id) {
Expand All @@ -12726,6 +12778,15 @@ _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) {
_sg_stats_add(metal.bindings.num_set_fragment_sampler_state, 1);
}
}

// apply fragment stage storage buffers
for (NSUInteger slot = 0; slot < (NSUInteger)bnd->num_fs_sbufs; slot++) {
const _sg_buffer_t* sbuf = bnd->fs_sbufs[slot];
const NSUInteger mtl_slot = SG_MAX_SHADERSTAGE_UBS + slot;
[_sg.mtl.cmd_encoder setFragmentBuffer:_sg_mtl_id(sbuf->mtl.buf[sbuf->cmn.active_slot]) offset:0 atIndex:mtl_slot];
// FIXME: _sg_stats_add(metal.bindings.num_set_fragment_buffer, 1);
}

return true;
}

Expand Down Expand Up @@ -13238,6 +13299,7 @@ _SOKOL_PRIVATE void _sg_wgpu_init_caps(void) {
_sg.features.image_clamp_to_border = false;
_sg.features.mrt_independent_blend_state = true;
_sg.features.mrt_independent_write_mask = true;
_sg.features.storage_buffer = false;

wgpuDeviceGetLimits(_sg.wgpu.dev, &_sg.wgpu.limits);

Expand Down Expand Up @@ -15892,7 +15954,6 @@ _SOKOL_PRIVATE bool _sg_validate_pipeline_desc(const sg_pipeline_desc* desc) {
}
_SG_VALIDATE(_sg_multiple_u64((uint64_t)l_state->stride, 4), VALIDATE_PIPELINEDESC_LAYOUT_STRIDE4);
}
_SG_VALIDATE(desc->layout.attrs[0].format != SG_VERTEXFORMAT_INVALID, VALIDATE_PIPELINEDESC_NO_ATTRS);
const _sg_shader_t* shd = _sg_lookup_shader(&_sg.pools, desc->shader.id);
_SG_VALIDATE(0 != shd, VALIDATE_PIPELINEDESC_SHADER);
if (shd) {
Expand Down Expand Up @@ -16291,6 +16352,23 @@ _SOKOL_PRIVATE bool _sg_validate_apply_bindings(const sg_bindings* bindings) {
}
}

// has expected vertex shader storage buffers
for (int i = 0; i < SG_MAX_SHADERSTAGE_STORAGE_BUFFERS; i++) {
const _sg_shader_stage_t* stage = &pip->shader->cmn.stage[SG_SHADERSTAGE_VS];
if (stage->storage_buffers[i].item_size != 0) {
_SG_VALIDATE(bindings->vs.storage_buffers[i].id != SG_INVALID_ID, VALIDATE_ABND_VS_EXPECTED_STORAGEBUFFER_BINDING);
if (bindings->vs.storage_buffers[i].id != SG_INVALID_ID) {
const _sg_buffer_t* sbuf = _sg_lookup_buffer(&_sg.pools, bindings->vs.storage_buffers[i].id);
_SG_VALIDATE(sbuf != 0, VALIDATE_ABND_VS_STORAGEBUFFER_EXISTS);
if (sbuf) {
_SG_VALIDATE(sbuf->cmn.type == SG_BUFFERTYPE_STORAGEBUFFER, VALIDATE_ABND_VS_STORAGEBUFFER_BINDING_BUFFERTYPE);
}
}
} else {
_SG_VALIDATE(bindings->vs.storage_buffers[i].id == SG_INVALID_ID, VALIDATE_ABND_VS_UNEXPECTED_STORAGEBUFFER_BINDING);
}
}

// has expected fragment shader images
for (int i = 0; i < SG_MAX_SHADERSTAGE_IMAGES; i++) {
const _sg_shader_stage_t* stage = &pip->shader->cmn.stage[SG_SHADERSTAGE_FS];
Expand Down Expand Up @@ -16346,6 +16424,24 @@ _SOKOL_PRIVATE bool _sg_validate_apply_bindings(const sg_bindings* bindings) {
_SG_VALIDATE(bindings->fs.samplers[i].id == SG_INVALID_ID, VALIDATE_ABND_FS_UNEXPECTED_SAMPLER_BINDING);
}
}

// has expected fragment shader storage buffers
for (int i = 0; i < SG_MAX_SHADERSTAGE_STORAGE_BUFFERS; i++) {
const _sg_shader_stage_t* stage = &pip->shader->cmn.stage[SG_SHADERSTAGE_FS];
if (stage->storage_buffers[i].item_size != 0) {
_SG_VALIDATE(bindings->fs.storage_buffers[i].id != SG_INVALID_ID, VALIDATE_ABND_FS_EXPECTED_STORAGEBUFFER_BINDING);
if (bindings->fs.storage_buffers[i].id != SG_INVALID_ID) {
const _sg_buffer_t* sbuf = _sg_lookup_buffer(&_sg.pools, bindings->fs.storage_buffers[i].id);
_SG_VALIDATE(sbuf != 0, VALIDATE_ABND_FS_STORAGEBUFFER_EXISTS);
if (sbuf) {
_SG_VALIDATE(sbuf->cmn.type == SG_BUFFERTYPE_STORAGEBUFFER, VALIDATE_ABND_FS_STORAGEBUFFER_BINDING_BUFFERTYPE);
}
}
} else {
_SG_VALIDATE(bindings->fs.storage_buffers[i].id == SG_INVALID_ID, VALIDATE_ABND_FS_UNEXPECTED_STORAGEBUFFER_BINDING);
}
}

return _sg_validate_end();
#endif
}
Expand Down Expand Up @@ -17876,6 +17972,19 @@ SOKOL_API_IMPL void sg_apply_bindings(const sg_bindings* bindings) {
}
}

for (int i = 0; i < SG_MAX_SHADERSTAGE_STORAGE_BUFFERS; i++, bnd.num_vs_sbufs++) {
if (bindings->vs.storage_buffers[i].id) {
bnd.vs_sbufs[i] = _sg_lookup_buffer(&_sg.pools, bindings->vs.storage_buffers[i].id);
if (bnd.vs_sbufs[i]) {
_sg.next_draw_valid &= (SG_RESOURCESTATE_VALID == bnd.vs_sbufs[i]->slot.state);
} else {
_sg.next_draw_valid = false;
}
} else {
break;
}
}

for (int i = 0; i < SG_MAX_SHADERSTAGE_IMAGES; i++, bnd.num_fs_imgs++) {
if (bindings->fs.images[i].id) {
bnd.fs_imgs[i] = _sg_lookup_image(&_sg.pools, bindings->fs.images[i].id);
Expand All @@ -17902,6 +18011,18 @@ SOKOL_API_IMPL void sg_apply_bindings(const sg_bindings* bindings) {
}
}

for (int i = 0; i < SG_MAX_SHADERSTAGE_STORAGE_BUFFERS; i++, bnd.num_fs_sbufs++) {
if (bindings->fs.storage_buffers[i].id) {
bnd.fs_sbufs[i] = _sg_lookup_buffer(&_sg.pools, bindings->fs.storage_buffers[i].id);
if (bnd.fs_sbufs[i]) {
_sg.next_draw_valid &= (SG_RESOURCESTATE_VALID == bnd.fs_sbufs[i]->slot.state);
} else {
_sg.next_draw_valid = false;
}
} else {
break;
}
}
if (_sg.next_draw_valid) {
_sg.next_draw_valid &= _sg_apply_bindings(&bnd);
_SG_TRACE_ARGS(apply_bindings, bindings);
Expand Down

2 comments on commit cf2b5e3

@kariem2k
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for doing this! I had a feeling that you are doing this and maybe Compute Shaders afterwards! I was reading this #219 and I said I am pretty sure @floooh started that and checked the branches and voila! I have been a big admirer of your work since the early days of Nebula Device 2. Thank you very much again for all of your contributions.

@floooh
Copy link
Owner Author

@floooh floooh commented on cf2b5e3 Mar 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's the plan! I'm not sure yet what restrictions there will be, but I'll use WebGPU as guidance. One problem will be that OpenGL will need to be bumped to 4.3 for compute shader support, but macOS only goes up to 4.1, so for GL I'll need to make all that stuff optional.

Please sign in to comment.