From a3bc86d748a85f92d5927609e132e59a4f962373 Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Fri, 12 Apr 2024 13:03:19 -0700
Subject: [PATCH] [core] Document command encoding and command buffers.

Flesh out the documentation for `wgpu_core`'s `CommandBuffer`,
`CommandEncoder`, and associated types.

Allow doc links to private items. `wgpu-core` isn't entirely
user-facing, so it's useful to document internal items.
---
 wgpu-core/src/command/mod.rs  | 160 +++++++++++++++++++++++++++++++++-
 wgpu-core/src/device/life.rs  |  10 +++
 wgpu-core/src/device/mod.rs   |  17 ++++
 wgpu-core/src/device/queue.rs |   7 +-
 wgpu-core/src/lib.rs          |   2 +
 wgpu-hal/src/lib.rs           |  50 +++++++++--
 wgpu-hal/src/vulkan/mod.rs    |  21 +++++
 7 files changed, 257 insertions(+), 10 deletions(-)

diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index 133c84021b..e3f5e3a4d3 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -38,23 +38,115 @@ use crate::device::trace::Command as TraceCommand;
 
 const PUSH_CONSTANT_CLEAR_ARRAY: &[u32] = &[0_u32; 64];
 
+/// The current state of a [`CommandBuffer`].
 #[derive(Debug)]
 pub(crate) enum CommandEncoderStatus {
+    /// Ready to record commands. An encoder's initial state.
+    ///
+    /// Command building methods like [`command_encoder_clear_buffer`] and
+    /// [`command_encoder_run_compute_pass`] require the encoder to be in this
+    /// state.
+    ///
+    /// [`command_encoder_clear_buffer`]: Global::command_encoder_clear_buffer
+    /// [`command_encoder_run_compute_pass`]: Global::command_encoder_run_compute_pass
     Recording,
+
+    /// Command recording is complete, and the buffer is ready for submission.
+    ///
+    /// [`Global::command_encoder_finish`] transitions a
+    /// `CommandBuffer` from the `Recording` state into this state.
+    ///
+    /// [`Global::queue_submit`] drops command buffers unless they are
+    /// in this state.
     Finished,
+
+    /// An error occurred while recording a compute or render pass.
+    ///
+    /// When a `CommandEncoder` is left in this state, we have also
+    /// returned an error result from the function that encountered
+    /// the problem. Future attempts to use the encoder (that is,
+    /// calls to [`CommandBuffer::get_encoder`]) will also return
+    /// errors.
+    ///
+    /// Calling [`Global::command_encoder_finish`] in this state
+    /// discards the command buffer under construction.
     Error,
 }
 
+/// A raw [`CommandEncoder`][rce], and the raw [`CommandBuffer`][rcb]s built from it.
+///
+/// Each wgpu-core [`CommandBuffer`] owns an instance of this type, which is
+/// where the commands are actually stored.
+///
+/// This holds a `Vec` of raw [`CommandBuffer`][rcb]s, not just one. We are not
+/// always able to record commands in the order in which they must ultimately be
+/// submitted to the queue, but raw command buffers don't permit inserting new
+/// commands into the middle of a recorded stream. However, hal queue submission
+/// accepts a series of command buffers at once, so we can simply break the
+/// stream up into multiple buffers, and then reorder the buffers. See
+/// [`CommandEncoder::close_and_swap`] for a specific example of this.
+///
+/// Note that a [`CommandEncoderId`] actually refers to a [`CommandBuffer`].
+/// Methods that take a command encoder id actually look up the command buffer,
+/// and then use its encoder.
+///
+/// [rce]: wgpu_hal::Api::CommandEncoder
+/// [rcb]: wgpu_hal::Api::CommandBuffer
 pub(crate) struct CommandEncoder<A: HalApi> {
+    /// The underlying `wgpu_hal` [`CommandEncoder`].
+    ///
+    /// Successfully executed command buffers' encoders are saved in a
+    /// [`wgpu_hal::device::CommandAllocator`] for recycling.
+    ///
+    /// [`CommandEncoder`]: wgpu_hal::Api::CommandEncoder
     raw: A::CommandEncoder,
+
+    /// All the raw command buffers for our owning [`CommandBuffer`], in
+    /// submission order.
+    ///
+    /// These command buffers were all constructed with `raw`. The
+    /// [`wgpu_hal::CommandEncoder`] trait forbids these from outliving `raw`,
+    /// and requires that we provide all of these when we call
+    /// [`raw.reset_all()`][CE::ra], so the encoder and its buffers travel
+    /// together.
+    ///
+    /// [CE::ra]: wgpu_hal::CommandEncoder::reset_all
     list: Vec<A::CommandBuffer>,
+
+    /// True if `raw` is in the "recording" state.
+    ///
+    /// See the documentation for [`wgpu_hal::CommandEncoder`] for
+    /// details on the states `raw` can be in.
     is_open: bool,
+
     label: Option<String>,
 }
 
 //TODO: handle errors better
 impl<A: HalApi> CommandEncoder<A> {
-    /// Closes the live encoder
+    /// Finish the current command buffer, if any, and place it
+    /// at the second-to-last position in our list.
+    ///
+    /// If we have opened this command encoder, finish its current
+    /// command buffer, and insert it just before the last element in
+    /// [`self.list`][l]. If this command buffer is closed, do nothing.
+    ///
+    /// On return, the underlying hal encoder is closed.
+    ///
+    /// What is this for?
+    ///
+    /// The `wgpu_hal` contract requires that each render or compute pass's
+    /// commands be preceded by calls to [`transition_buffers`] and
+    /// [`transition_textures`], to put the resources the pass operates on in
+    /// the appropriate state. Unfortunately, we don't know which transitions
+    /// are needed until we're done recording the pass itself. Rather than
+    /// iterating over the pass twice, we note the necessary transitions as we
+    /// record its commands, finish the raw command buffer for the actual pass,
+    /// record a new raw command buffer for the transitions, and jam that buffer
+    /// in just before the pass's. This is the function that jams in the
+    /// transitions' command buffer.
+    ///
+    /// [l]: CommandEncoder::list
     fn close_and_swap(&mut self) -> Result<(), DeviceError> {
         if self.is_open {
             self.is_open = false;
@@ -65,6 +157,16 @@ impl<A: HalApi> CommandEncoder<A> {
         Ok(())
     }
 
+    /// Finish the current command buffer, if any, and add it to the
+    /// end of [`self.list`][l].
+    ///
+    /// If we have opened this command encoder, finish its current
+    /// command buffer, and push it onto the end of [`self.list`][l].
+    /// If this command buffer is closed, do nothing.
+    ///
+    /// On return, the underlying hal encoder is closed.
+    ///
+    /// [l]: CommandEncoder::list
     fn close(&mut self) -> Result<(), DeviceError> {
         if self.is_open {
             self.is_open = false;
@@ -75,6 +177,9 @@ impl<A: HalApi> CommandEncoder<A> {
         Ok(())
     }
 
+    /// Discard the command buffer under construction, if any.
+    ///
+    /// The underlying hal encoder is closed, if it was recording.
     pub(crate) fn discard(&mut self) {
         if self.is_open {
             self.is_open = false;
@@ -82,6 +187,9 @@ impl<A: HalApi> CommandEncoder<A> {
         }
     }
 
+    /// Begin recording a new command buffer, if we haven't already.
+    ///
+    /// The underlying hal encoder is put in the "recording" state.
     pub(crate) fn open(&mut self) -> Result<&mut A::CommandEncoder, DeviceError> {
         if !self.is_open {
             self.is_open = true;
@@ -92,6 +200,10 @@ impl<A: HalApi> CommandEncoder<A> {
         Ok(&mut self.raw)
     }
 
+    /// Begin recording a new command buffer for a render pass, with
+    /// its own label.
+    ///
+    /// The underlying hal encoder is put in the "recording" state.
     fn open_pass(&mut self, label: Option<&str>) -> Result<(), DeviceError> {
         self.is_open = true;
         unsafe { self.raw.begin_encoding(label)? };
@@ -111,12 +223,27 @@ pub(crate) struct BakedCommands<A: HalApi> {
 pub(crate) struct DestroyedBufferError(pub id::BufferId);
 pub(crate) struct DestroyedTextureError(pub id::TextureId);
 
+/// The mutable state of a [`CommandBuffer`].
 pub struct CommandBufferMutable<A: HalApi> {
+    /// The [`wgpu_hal::Api::CommandBuffer`]s we've built so far, and the encoder
+    /// they belong to.
     pub(crate) encoder: CommandEncoder<A>,
+
+    /// The current state of this command buffer's encoder.
     status: CommandEncoderStatus,
+
+    /// All the resources that the commands recorded so far have referred to.
     pub(crate) trackers: Tracker<A>,
+
+    /// The regions of buffers and textures these commands will read and write.
+    ///
+    /// This is used to determine which portions of which
+    /// buffers/textures we actually need to initialize. If we're
+    /// definitely going to write to something before we read from it,
+    /// we don't need to clear its contents.
     buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
     texture_memory_actions: CommandBufferTextureMemoryActions<A>,
+
     pub(crate) pending_query_resets: QueryResetMap<A>,
     #[cfg(feature = "trace")]
     pub(crate) commands: Option<Vec<TraceCommand>>,
@@ -133,11 +260,36 @@ impl<A: HalApi> CommandBufferMutable<A> {
     }
 }
 
+/// A buffer of commands to be submitted to the GPU for execution.
+///
+/// Whereas the WebGPU API uses two separate types for command buffers and
+/// encoders, this type is a fusion of the two:
+///
+/// - During command recording, this holds a [`CommandEncoder`] accepting this
+///   buffer's commands. In this state, the [`CommandBuffer`] type behaves like
+///   a WebGPU `GPUCommandEncoder`.
+///
+/// - Once command recording is finished by calling
+///   [`Global::command_encoder_finish`], no further recording is allowed. The
+///   internal [`CommandEncoder`] is retained solely as a storage pool for the
+///   raw command buffers. In this state, the value behaves like a WebGPU
+///   `GPUCommandBuffer`.
+///
+/// - Once a command buffer is submitted to the queue, it is removed from the id
+///   registry, and its contents are taken to construct a [`BakedCommands`],
+///   whose contents eventually become the property of the submission queue.
 pub struct CommandBuffer<A: HalApi> {
     pub(crate) device: Arc<Device<A>>,
     limits: wgt::Limits,
     support_clear_texture: bool,
     pub(crate) info: ResourceInfo<CommandBuffer<A>>,
+
+    /// The mutable state of this command buffer.
+    ///
+    /// This `Option` is populated when the command buffer is first created.
+    /// When this is submitted, dropped, or destroyed, its contents are
+    /// extracted into a [`BakedCommands`] by
+    /// [`CommandBuffer::extract_baked_commands`].
     pub(crate) data: Mutex<Option<CommandBufferMutable<A>>>,
 }
 
@@ -248,6 +400,12 @@ impl<A: HalApi> CommandBuffer<A> {
 }
 
 impl<A: HalApi> CommandBuffer<A> {
+    /// Return the [`CommandBuffer`] for `id`, for recording new commands.
+    ///
+    /// In `wgpu_core`, the [`CommandBuffer`] type serves both as encoder and
+    /// buffer, which is why this function takes an [`id::CommandEncoderId`] but
+    /// returns a [`CommandBuffer`]. The returned command buffer must be in the
+    /// "recording" state. Otherwise, an error is returned.
     fn get_encoder(
         hub: &Hub<A>,
         id: id::CommandEncoderId,
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index af345015df..e631479614 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -150,6 +150,16 @@ struct ActiveSubmission<A: HalApi> {
     /// Buffers to be mapped once this submission has completed.
     mapped: Vec<Arc<Buffer<A>>>,
 
+    /// Command buffers used by this submission, and the encoder that owns them.
+    ///
+    /// [`wgpu_hal::Queue::submit`] requires the submitted command buffers to
+    /// remain alive until the submission has completed execution. Command
+    /// encoders double as allocation pools for command buffers, so holding them
+    /// here and cleaning them up in [`LifetimeTracker::triage_submissions`]
+    /// satisfies that requirement.
+    ///
+    /// Once this submission has completed, the command buffers are reset and
+    /// the command encoder is recycled.
     encoders: Vec<EncoderInFlight<A>>,
 
     /// List of queue "on_submitted_work_done" closures to be called once this
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index e2ab6c2690..951ddcc90b 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -377,11 +377,24 @@ fn map_buffer<A: HalApi>(
     Ok(mapping.ptr)
 }
 
+/// A pool of free [`wgpu_hal::CommandEncoder`]s, owned by a `Device`.
+///
+/// Each encoder in this list is in the "closed" state.
+///
+/// Since a raw [`CommandEncoder`][ce] is itself a pool for allocating
+/// raw [`CommandBuffer`][cb]s, this is a pool of pools.
+///
+/// [ce]: wgpu_hal::CommandEncoder
+/// [cb]: wgpu_hal::Api::CommandBuffer
 pub(crate) struct CommandAllocator<A: HalApi> {
     free_encoders: Vec<A::CommandEncoder>,
 }
 
 impl<A: HalApi> CommandAllocator<A> {
+    /// Return a fresh [`wgpu_hal::CommandEncoder`] in the "closed" state.
+    ///
+    /// If we have free encoders in the pool, take one of those. Otherwise,
+    /// create a new one on `device`.
     fn acquire_encoder(
         &mut self,
         device: &A::Device,
@@ -396,10 +409,14 @@ impl<A: HalApi> CommandAllocator<A> {
         }
     }
 
+    /// Add `encoder` back to the free pool.
     fn release_encoder(&mut self, encoder: A::CommandEncoder) {
         self.free_encoders.push(encoder);
     }
 
+    /// Free the pool of command encoders.
+    ///
+    /// This is only called when the `Device` is dropped.
     fn dispose(self, device: &A::Device) {
         resource_log!(
             "CommandAllocator::dispose encoders {}",
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 075cd30e60..e663a3efff 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -152,13 +152,18 @@ pub enum TempResource<A: HalApi> {
     Texture(Arc<Texture<A>>),
 }
 
-/// A queue execution for a particular command encoder.
+/// A series of [`CommandBuffers`] that have been submitted to a
+/// queue, and the [`wgpu_hal::CommandEncoder`] that built them.
 pub(crate) struct EncoderInFlight<A: HalApi> {
     raw: A::CommandEncoder,
     cmd_buffers: Vec<A::CommandBuffer>,
 }
 
 impl<A: HalApi> EncoderInFlight<A> {
+    /// Free all of our command buffers.
+    ///
+    /// Return the command encoder, fully reset and ready to be
+    /// reused.
     pub(crate) unsafe fn land(mut self) -> A::CommandEncoder {
         unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) };
         self.raw
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 5454f0d682..b00c51825a 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -39,6 +39,8 @@
     unused_braces,
     // It gets in the way a lot and does not prevent bugs in practice.
     clippy::pattern_type_mismatch,
+    // `wgpu-core` isn't entirely user-facing, so it's useful to document internal items.
+    rustdoc::private_intra_doc_links
 )]
 #![warn(
     trivial_casts,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index ddcb0634fe..0b8e8d1e13 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -303,6 +303,15 @@ pub trait Api: Clone + fmt::Debug + Sized {
 
     type Queue: Queue<A = Self>;
     type CommandEncoder: CommandEncoder<A = Self>;
+
+    /// This API's command buffer type.
+    ///
+    /// The only thing you can do with `CommandBuffer`s is build them
+    /// with a [`CommandEncoder`] and then pass them to
+    /// [`Queue::submit`] for execution, or destroy them by passing
+    /// them to [`CommandEncoder::reset_all`].
+    ///
+    /// [`CommandEncoder`]: Api::CommandEncoder
     type CommandBuffer: WasmNotSendSync + fmt::Debug;
 
     type Buffer: fmt::Debug + WasmNotSendSync + 'static;
@@ -545,11 +554,21 @@ pub trait Queue: WasmNotSendSync {
     /// Submits the command buffers for execution on GPU.
     ///
     /// Valid usage:
-    /// - all of the command buffers were created from command pools
-    ///   that are associated with this queue.
-    /// - all of the command buffers had `CommandBuffer::finish()` called.
-    /// - all surface textures that the command buffers write to must be
-    ///   passed to the surface_textures argument.
+    ///
+    /// - All of the [`CommandBuffer`][cb]s were created from
+    ///   [`CommandEncoder`][ce]s that are associated with this queue.
+    ///
+    /// - All of those [`CommandBuffer`][cb]s must remain alive until
+    ///   the submitted commands have finished execution. (Since
+    ///   command buffers must not outlive their encoders, this
+    ///   implies that the encoders must remain alive as well.)
+    ///
+    /// - All of the [`SurfaceTexture`][st]s that the command buffers
+    ///   write to appear in the `surface_textures` argument.
+    ///
+    /// [cb]: Api::CommandBuffer
+    /// [ce]: Api::CommandEncoder
+    /// [st]: Api::SurfaceTexture
     unsafe fn submit(
         &self,
         command_buffers: &[&<Self::A as Api>::CommandBuffer],
@@ -564,7 +583,12 @@ pub trait Queue: WasmNotSendSync {
     unsafe fn get_timestamp_period(&self) -> f32;
 }
 
-/// Encoder and allocation pool for `CommandBuffer`.
+/// Encoder and allocation pool for `CommandBuffer`s.
+///
+/// A `CommandEncoder` not only constructs `CommandBuffer`s but also
+/// acts as the allocation pool that owns the buffers' underlying
+/// storage. Thus, `CommandBuffer`s must not outlive the
+/// `CommandEncoder` that created them.
 ///
 /// The life cycle of a `CommandBuffer` is as follows:
 ///
@@ -577,14 +601,17 @@ pub trait Queue: WasmNotSendSync {
 ///
 /// - Call methods like `copy_buffer_to_buffer`, `begin_render_pass`,
 ///   etc. on a "recording" `CommandEncoder` to add commands to the
-///   list.
+///   list. (If an error occurs, you must call `discard_encoding`; see
+///   below.)
 ///
 /// - Call `end_encoding` on a recording `CommandEncoder` to close the
 ///   encoder and construct a fresh `CommandBuffer` consisting of the
 ///   list of commands recorded up to that point.
 ///
 /// - Call `discard_encoding` on a recording `CommandEncoder` to drop
-///   the commands recorded thus far and close the encoder.
+///   the commands recorded thus far and close the encoder. This is
+///   the only safe thing to do on a `CommandEncoder` if an error has
+///   occurred while recording commands.
 ///
 /// - Call `reset_all` on a closed `CommandEncoder`, passing all the
 ///   live `CommandBuffers` built from it. All the `CommandBuffer`s
@@ -602,6 +629,10 @@ pub trait Queue: WasmNotSendSync {
 ///   built it.
 ///
 /// - A `CommandEncoder` must not outlive its `Device`.
+///
+/// It is the user's responsibility to meet this requirements. This
+/// allows `CommandEncoder` implementations to keep their state
+/// tracking to a minimum.
 pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
     type A: Api;
 
@@ -616,6 +647,9 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
 
     /// Discard the command list under construction, if any.
     ///
+    /// If an error has occurred while recording commands, this
+    /// is the only safe thing to do with the encoder.
+    ///
     /// This puts this `CommandEncoder` in the "closed" state.
     ///
     /// # Safety
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index d969c887d5..32443eff7e 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -447,6 +447,7 @@ pub struct BindGroup {
     set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
 }
 
+/// Miscellaneous allocation recycling pool for `CommandAllocator`.
 #[derive(Default)]
 struct Temp {
     marker: Vec<u8>,
@@ -476,11 +477,31 @@ impl Temp {
 pub struct CommandEncoder {
     raw: vk::CommandPool,
     device: Arc<DeviceShared>,
+
+    /// The current command buffer, if `self` is in the ["recording"]
+    /// state.
+    ///
+    /// ["recording"]: crate::CommandEncoder
+    ///
+    /// If non-`null`, the buffer is in the Vulkan "recording" state.
     active: vk::CommandBuffer,
+
+    /// What kind of pass we are currently within: compute or render.
     bind_point: vk::PipelineBindPoint,
+
+    /// Allocation recycling pool for this encoder.
     temp: Temp,
+
+    /// A pool of available command buffers.
+    ///
+    /// These are all in the Vulkan "initial" state.
     free: Vec<vk::CommandBuffer>,
+
+    /// A pool of discarded command buffers.
+    ///
+    /// These could be in any Vulkan state except "pending".
     discarded: Vec<vk::CommandBuffer>,
+
     /// If this is true, the active renderpass enabled a debug span,
     /// and needs to be disabled on renderpass close.
     rpass_debug_marker_active: bool,