Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dx12: Add dynamically sized CPU descriptor heaps #2110

Merged
merged 4 commits into from
Jun 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 14 additions & 16 deletions src/backend/dx12/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use winapi::shared::{dxgiformat, winerror};

use wio::com::ComPtr;

use {conv, device, internal, native as n, Backend, Device, Shared, MAX_VERTEX_BUFFERS, validate_line_width};
use {conv, device, descriptors_cpu, internal, native as n, Backend, Device, Shared, MAX_VERTEX_BUFFERS, validate_line_width};
use device::ViewInfo;
use root_constants::RootConstant;
use smallvec::SmallVec;
Expand Down Expand Up @@ -1249,19 +1249,11 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
pass_cache.framebuffer.attachments[rtv_id]
};

let mut rtv_pool = n::DescriptorCpuPool {
heap: Device::create_descriptor_heap_impl(
&mut device.clone(),
d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
false,
clear_rects.len()
),
offset: 0,
size: 0,
max_size: clear_rects.len() as _
};

self.rtv_pools.push(rtv_pool.heap.raw.clone());
let mut rtv_pool = descriptors_cpu::HeapLinear::new(
&device,
d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
clear_rects.len()
);

for clear_rect in &clear_rects {
let rect = [get_rect(&clear_rect.rect)];
Expand All @@ -1278,9 +1270,10 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
layers: clear_rect.layers.clone()
}
};
let rtv = Device::view_image_as_render_target_impl(
let rtv = rtv_pool.alloc_handle();
Device::view_image_as_render_target_impl(
&mut device,
&mut rtv_pool,
rtv,
view_info
).unwrap();

Expand Down Expand Up @@ -1841,6 +1834,10 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
);
unsafe { self.raw.ResourceBarrier(1, &pre_barrier) };

warn!("fill_buffer currently unimplemented");
// TODO: GPU handle must be in the current heap. Atm we use a CPU descriptor heap for allocation
// which is not shader visible.
/*
let handle = buffer.clear_uav.unwrap();
unsafe {
self.raw.ClearUnorderedAccessViewUint(
Expand All @@ -1852,6 +1849,7 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
&rect as *const _,
);
}
*/

let post_barrier = Self::transition_barrier(
d3d12::D3D12_RESOURCE_TRANSITION_BARRIER {
Expand Down
171 changes: 171 additions & 0 deletions src/backend/dx12/src/descriptors_cpu.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@

use std::collections::HashSet;
use std::ptr;
use winapi::Interface;
use winapi::um::d3d12;
use wio::com::ComPtr;

// Linear stack allocator for CPU descriptor heaps.
pub struct HeapLinear {
handle_size: usize,
num: usize,
size: usize,
start: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
_raw: ComPtr<d3d12::ID3D12DescriptorHeap>,
}

impl HeapLinear {
pub fn new(
device: &ComPtr<d3d12::ID3D12Device>,
ty: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE,
size: usize,
) -> Self {
let desc = d3d12::D3D12_DESCRIPTOR_HEAP_DESC {
Type: ty,
NumDescriptors: size as u32,
Flags: d3d12::D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
NodeMask: 0,
};

let mut heap: *mut d3d12::ID3D12DescriptorHeap = ptr::null_mut();
let handle_size = unsafe {
device.CreateDescriptorHeap(
&desc,
&d3d12::ID3D12DescriptorHeap::uuidof(),
&mut heap as *mut *mut _ as *mut *mut _,
);
device.GetDescriptorHandleIncrementSize(ty) as usize
};

let start = unsafe { (*heap).GetCPUDescriptorHandleForHeapStart() };

HeapLinear {
handle_size,
num: 0,
size,
start,
_raw: unsafe { ComPtr::from_raw(heap) },
}
}

pub fn alloc_handle(&mut self) -> d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
assert!(!self.is_full());

let slot = self.num;
self.num += 1;

d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
ptr: self.start.ptr + self.handle_size * slot,
}
}

pub fn is_full(&self) -> bool {
self.num >= self.size
}

pub fn clear(&mut self) {
self.num = 0;
}
}

const HEAP_SIZE_FIXED: usize = 64;

// Fixed-size free-list allocator for CPU descriptors.
struct Heap {
// Bit flag representation of available handles in the heap.
//
// 0 - Occupied
// 1 - free
availability: u64,
handle_size: usize,
start: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
_raw: ComPtr<d3d12::ID3D12DescriptorHeap>,
}

impl Heap {
pub fn new(device: &ComPtr<d3d12::ID3D12Device>, ty: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE) -> Self {
let desc = d3d12::D3D12_DESCRIPTOR_HEAP_DESC {
Type: ty,
NumDescriptors: HEAP_SIZE_FIXED as _,
Flags: d3d12::D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
NodeMask: 0,
};

let mut heap: *mut d3d12::ID3D12DescriptorHeap = ptr::null_mut();
let handle_size = unsafe {
device.CreateDescriptorHeap(
&desc,
&d3d12::ID3D12DescriptorHeap::uuidof(),
&mut heap as *mut *mut _ as *mut *mut _,
);
device.GetDescriptorHandleIncrementSize(ty) as usize
};
let start = unsafe { (*heap).GetCPUDescriptorHandleForHeapStart() };

Heap {
handle_size,
availability: !0, // all free!
start,
_raw: unsafe { ComPtr::from_raw(heap) },
}
}

pub fn alloc_handle(&mut self) -> d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
// Find first free slot.
let slot = self.availability.trailing_zeros() as usize;
assert!(slot < HEAP_SIZE_FIXED);
// Set the slot as occupied.
self.availability ^= 1 << slot;

d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
ptr: self.start.ptr + self.handle_size * slot,
}
}

pub fn is_full(&self) -> bool {
self.availability == 0
}
}

pub struct DescriptorCpuPool {
device: ComPtr<d3d12::ID3D12Device>,
ty: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE,
heaps: Vec<Heap>,
free_list: HashSet<usize>,
}

impl DescriptorCpuPool {
pub fn new(device: &ComPtr<d3d12::ID3D12Device>, ty: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE) -> Self {
DescriptorCpuPool {
device: device.clone(),
ty,
heaps: Vec::new(),
free_list: HashSet::new(),
}
}

pub fn alloc_handle(&mut self) -> d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
let heap_id = self
.free_list
.iter()
.cloned()
.next()
.unwrap_or_else(|| {
// Allocate a new heap
let id = self.heaps.len();
self.heaps.push(Heap::new(&self.device, self.ty));
self.free_list.insert(id);
id
});

let heap = &mut self.heaps[heap_id];
let handle = heap.alloc_handle();
if heap.is_full() {
self.free_list.remove(&heap_id);
}

handle
}

// TODO: free handles
}
54 changes: 26 additions & 28 deletions src/backend/dx12/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use hal::queue::{RawCommandQueue, QueueFamilyId};
use hal::range::RangeArg;

use {
conv, native as n, root_constants, window as w,
conv, descriptors_cpu, native as n, root_constants, window as w,
Backend as B, Device, MemoryGroup, QUEUE_FAMILIES, MAX_VERTEX_BUFFERS, NUM_HEAP_PROPERTIES,
};
use pool::RawCommandPool;
Expand Down Expand Up @@ -560,8 +560,10 @@ impl Device {
}

pub(crate) fn view_image_as_render_target_impl(
device: &mut ComPtr<d3d12::ID3D12Device>, pool: &mut n::DescriptorCpuPool, info: ViewInfo
) -> Result<d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, image::ViewError> {
device: &mut ComPtr<d3d12::ID3D12Device>,
handle: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
info: ViewInfo,
) -> Result<(), image::ViewError> {
#![allow(non_snake_case)]

let mut desc = d3d12::D3D12_RENDER_TARGET_VIEW_DESC {
Expand Down Expand Up @@ -642,21 +644,21 @@ impl Device {
}
};

let handle = pool.alloc_handles(1).cpu;

unsafe {
device.CreateRenderTargetView(info.resource, &desc, handle);
}

Ok(handle)
Ok(())
}

fn view_image_as_render_target(
&self, info: ViewInfo
) -> Result<d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, image::ViewError> {
let mut pool = self.rtv_pool.lock().unwrap();
let handle = pool.alloc_handle();

Self::view_image_as_render_target_impl(&mut self.raw.clone(), &mut *pool, info)
Self::view_image_as_render_target_impl(&mut self.raw.clone(), handle, info)
.map(|_| handle)
}

fn view_image_as_depth_stencil(
Expand Down Expand Up @@ -728,7 +730,7 @@ impl Device {
}
};

let handle = self.dsv_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.dsv_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateDepthStencilView(info.resource, &desc, handle);
}
Expand Down Expand Up @@ -857,7 +859,7 @@ impl Device {
};

let desc = Self::build_image_as_shader_resource_desc(&info)?;
let handle = self.srv_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateShaderResourceView(info.resource, &desc, handle);
}
Expand Down Expand Up @@ -933,7 +935,7 @@ impl Device {
}
}

let handle = self.uav_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateUnorderedAccessView(info.resource, ptr::null_mut(), &desc, handle);
}
Expand Down Expand Up @@ -1863,7 +1865,7 @@ impl d::Device<B> for Device {
});

let clear_uav = if buffer.usage.contains(buffer::Usage::TRANSFER_DST) {
let handles = self.uav_pool.lock().unwrap().alloc_handles(1);
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
let mut view_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC {
Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER,
Expand All @@ -1883,10 +1885,10 @@ impl d::Device<B> for Device {
resource as *mut _,
ptr::null_mut(),
&view_desc,
handles.cpu,
handle,
);
}
Some(handles)
Some(handle)
} else {
None
};
Expand Down Expand Up @@ -1937,7 +1939,7 @@ impl d::Device<B> for Device {
Flags: d3d12::D3D12_BUFFER_SRV_FLAG_NONE,
};

let handle = self.srv_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateShaderResourceView(buffer.resource, &desc, handle);
}
Expand All @@ -1961,7 +1963,7 @@ impl d::Device<B> for Device {
CounterOffsetInBytes: 0,
};

let handle = self.uav_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateUnorderedAccessView(buffer.resource, ptr::null_mut(), &desc, handle);
}
Expand Down Expand Up @@ -2278,7 +2280,7 @@ impl d::Device<B> for Device {
}

fn create_sampler(&self, info: image::SamplerInfo) -> n::Sampler {
let handle = self.sampler_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.sampler_pool.lock().unwrap().alloc_handle();

let op = match info.comparison {
Some(_) => d3d12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON,
Expand Down Expand Up @@ -2428,21 +2430,17 @@ impl d::Device<B> for Device {
pso::Descriptor::Buffer(buffer, ref range) => {
if update_pool_index == descriptor_update_pools.len() {
let max_size = 1u64<<12; //arbitrary
descriptor_update_pools.push(n::DescriptorCpuPool {
heap: Self::create_descriptor_heap_impl(
&mut self.raw.clone(),
descriptor_update_pools.push(
descriptors_cpu::HeapLinear::new(
&self.raw,
d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
false,
max_size as _,
),
offset: 0,
size: 0,
max_size,
});
)
);
}
let heap = descriptor_update_pools.last_mut().unwrap();
let handle = heap.alloc_handles(1).cpu;
if heap.size == heap.max_size {
let handle = heap.alloc_handle();
if heap.is_full() {
// pool is full, move to the next one
update_pool_index += 1;
}
Expand Down Expand Up @@ -2561,7 +2559,7 @@ impl d::Device<B> for Device {

// reset the temporary CPU-size descriptor pools
for buffer_desc_pool in descriptor_update_pools.iter_mut() {
buffer_desc_pool.size = 0;
buffer_desc_pool.clear();
}
}

Expand Down
Loading