diff --git a/CHANGELOG.md b/CHANGELOG.md index a5b3ec0..a0a9c51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - ReleaseDate ### Added ⭐ +- [PR#24](https://github.com/EmbarkStudios/spirt/pull/24) added `qptr` ("quasi-pointer") type + and associated passes to destroy and recreate pointer-related type information + (see [PR#24](https://github.com/EmbarkStudios/spirt/pull/24) for a much more detailed overview) - [PR#22](https://github.com/EmbarkStudios/spirt/pull/22) added `Diag` and `Attr::Diagnostics`, for embedding diagnostics (errors or warnings) in SPIR-T itself - [PR#18](https://github.com/EmbarkStudios/spirt/pull/18) added anchor-based alignment diff --git a/examples/spv-lower-link-qptr-lift.rs b/examples/spv-lower-link-qptr-lift.rs new file mode 100644 index 0000000..86b2cf4 --- /dev/null +++ b/examples/spv-lower-link-qptr-lift.rs @@ -0,0 +1,138 @@ +use std::fs; +use std::path::Path; +use std::rc::Rc; + +fn main() -> std::io::Result<()> { + match &std::env::args().collect::>()[..] { + [_, in_file] => { + let in_file_path = Path::new(in_file); + + let save_print_plan = |suffix: &str, plan: spirt::print::Plan| { + let pretty = plan.pretty_print(); + let ext = format!("{suffix}.spirt"); + + // FIXME(eddyb) don't allocate whole `String`s here. + fs::write(in_file_path.with_extension(&ext), pretty.to_string())?; + fs::write( + in_file_path.with_extension(ext + ".html"), + pretty + .render_to_html() + .with_dark_mode_support() + .to_html_doc(), + ) + }; + + // FIXME(eddyb) adapt the other examples to this style. + + fn eprint_duration(f: impl FnOnce() -> R) -> R { + let start = std::time::Instant::now(); + let r = f(); + eprint!("[{:8.3}ms] ", start.elapsed().as_secs_f64() * 1000.0); + r + } + + eprint_duration(|| { + let _ = spirt::spv::spec::Spec::get(); + }); + eprintln!("spv::spec::Spec::get"); + + let cx = Rc::new(spirt::Context::new()); + + let multi_version_printing = true; + let mut per_pass_module = vec![]; + let mut after_pass = |pass, module: &spirt::Module| { + if multi_version_printing { + per_pass_module.push((pass, module.clone())); + Ok(()) + } else { + save_print_plan( + &format!("after.{pass}"), + spirt::print::Plan::for_module(module), + ) + } + }; + + let mut module = + eprint_duration(|| spirt::Module::lower_from_spv_file(cx.clone(), in_file_path))?; + eprintln!("Module::lower_from_spv_file({})", in_file_path.display()); + + let original_export_count = module.exports.len(); + eprint_duration(|| { + spirt::passes::link::minimize_exports(&mut module, |export_key| { + matches!(export_key, spirt::ExportKey::SpvEntryPoint { .. }) + }) + }); + eprintln!( + "link::minimize_exports: {} -> {} exports", + original_export_count, + module.exports.len() + ); + //after_pass("minimize_exports", &module)?; + + // HACK(eddyb) do this late enough to avoid spending time on unused + // functions, which `link::minimize_exports` makes unreachable. + eprint_duration(|| spirt::passes::legalize::structurize_func_cfgs(&mut module)); + eprintln!("legalize::structurize_func_cfgs"); + //after_pass("structurize_func_cfgs", &module)?; + + eprint_duration(|| spirt::passes::link::resolve_imports(&mut module)); + eprintln!("link::resolve_imports"); + //after_pass("resolve_imports", &module)?; + + // HACK(eddyb) + after_pass("", &module)?; + + // HACK(eddyb) this is roughly what Rust-GPU would need. + let layout_config = &spirt::qptr::LayoutConfig { + abstract_bool_size_align: (1, 1), + logical_ptr_size_align: (4, 4), + ..spirt::qptr::LayoutConfig::VULKAN_SCALAR_LAYOUT + }; + + eprint_duration(|| { + spirt::passes::qptr::lower_from_spv_ptrs(&mut module, layout_config) + }); + eprintln!("qptr::lower_from_spv_ptrs"); + after_pass("qptr::lower_from_spv_ptrs", &module)?; + + eprint_duration(|| spirt::passes::qptr::analyze_uses(&mut module, layout_config)); + eprintln!("qptr::analyze_uses"); + after_pass("qptr::analyze_uses", &module)?; + + eprint_duration(|| spirt::passes::qptr::lift_to_spv_ptrs(&mut module, layout_config)); + eprintln!("qptr::lift_to_spv_ptrs"); + after_pass("qptr::lift_to_spv_ptrs", &module)?; + + if multi_version_printing { + // FIXME(eddyb) use a better suffix than `qptr` (or none). + save_print_plan( + "qptr", + spirt::print::Plan::for_versions( + &cx, + per_pass_module.iter().map(|(pass, module)| { + ( + // HACK(eddyb) + if pass.is_empty() { + "initial".into() + } else { + format!("after {pass}") + }, + module, + ) + }), + ), + )?; + } + + //let out_file_path = in_file_path.with_extension("qptr.spv"); + //eprint_duration(|| module.lift_to_spv_file(&out_file_path))?; + //eprintln!("Module::lift_to_spv_file({})", out_file_path.display()); + + Ok(()) + } + args => { + eprintln!("Usage: {} IN", args[0]); + std::process::exit(1); + } + } +} diff --git a/src/context.rs b/src/context.rs index 35aef36..26d0dc8 100644 --- a/src/context.rs +++ b/src/context.rs @@ -403,7 +403,9 @@ impl, V> EntityOrientedDenseMap { Self::default() } - pub fn insert(&mut self, key: K, value: V) -> Option { + // FIXME(eddyb) this should not allocate space unconditionally, but offer an + // API where "vacant entry" may or may not have a `&mut Option` in it. + pub fn entry(&mut self, key: K) -> &mut Option { let entity = K::to_entity(key); let (chunk_start, intra_chunk_idx) = entity.to_chunk_start_and_intra_chunk_idx(); let chunk_value_slots = self @@ -417,7 +419,11 @@ impl, V> EntityOrientedDenseMap { } let value_slots = &mut chunk_value_slots[intra_chunk_idx]; - K::get_dense_value_slot_mut(key, value_slots).replace(value) + K::get_dense_value_slot_mut(key, value_slots) + } + + pub fn insert(&mut self, key: K, value: V) -> Option { + self.entry(key).replace(value) } pub fn get(&self, key: K) -> Option<&V> { @@ -438,6 +444,7 @@ impl, V> EntityOrientedDenseMap { self.get_slot_mut(key)?.take() } + // FIXME(eddyb) deduplicate with `entry`. fn get_slot_mut(&mut self, key: K) -> Option<&mut Option> { let entity = K::to_entity(key); let (chunk_start, intra_chunk_idx) = entity.to_chunk_start_and_intra_chunk_idx(); @@ -514,7 +521,9 @@ impl>, D> EntityList { let old_first_def = &mut defs[old_first]; // FIXME(eddyb) this situation should be impossible anyway, as it - // involves the `EntityListNode`s links, which should be unforgeable. + // involves the `EntityListNode`s links, which should be unforgeable, + // but it's still possible to keep around outdated `EntityList`s + // (should `EntityList` not implement `Copy`/`Clone` *at all*?) assert!( old_first_def.prev.is_none(), "invalid EntityList: `first->prev != None`" @@ -543,7 +552,9 @@ impl>, D> EntityList { let old_last_def = &mut defs[old_last]; // FIXME(eddyb) this situation should be impossible anyway, as it - // involves the `EntityListNode`s links, which should be unforgeable. + // involves the `EntityListNode`s links, which should be unforgeable, + // but it's still possible to keep around outdated `EntityList`s + // (should `EntityList` not implement `Copy`/`Clone` *at all*?) assert!( old_last_def.next.is_none(), "invalid EntityList: `last->next != None`" @@ -558,6 +569,49 @@ impl>, D> EntityList { }); } + /// Insert `new_node` (defined in `defs`) into `self`, before `next`. + // + // FIXME(eddyb) unify this with the other insert methods, maybe with a new + // "insert position" type? + #[track_caller] + pub fn insert_before(&mut self, new_node: E, next: E, defs: &mut EntityDefs) { + let prev = defs[next].prev.replace(new_node); + + let new_node_def = &mut defs[new_node]; + assert!( + new_node_def.prev.is_none() && new_node_def.next.is_none(), + "EntityList::insert_before: new node already linked into a (different?) list" + ); + + new_node_def.prev = prev; + new_node_def.next = Some(next); + + match prev { + Some(prev) => { + let old_prev_next = defs[prev].next.replace(new_node); + + // FIXME(eddyb) this situation should be impossible anyway, as it + // involves the `EntityListNode`s links, which should be unforgeable. + assert!( + old_prev_next == Some(next), + "invalid EntityListNode: `node->prev->next != node`" + ); + } + None => { + // FIXME(eddyb) this situation should be impossible anyway, as it + // involves the `EntityListNode`s links, which should be unforgeable, + // but it's still possible to keep around outdated `EntityList`s + // (should `EntityList` not implement `Copy`/`Clone` *at all*?) + assert!( + self.0.map(|this| this.first) == Some(next), + "invalid EntityList: `node->prev == None` but `node != first`" + ); + + self.0.as_mut().unwrap().first = new_node; + } + } + } + /// Insert all of `list_to_prepend`'s nodes at the start of `self`. #[track_caller] pub fn prepend(&mut self, list_to_prepend: Self, defs: &mut EntityDefs) { @@ -582,7 +636,9 @@ impl>, D> EntityList { let a_last_def = &mut defs[a.last]; // FIXME(eddyb) this situation should be impossible anyway, as it - // involves the `EntityListNode`s links, which should be unforgeable. + // involves the `EntityListNode`s links, which should be unforgeable, + // but it's still possible to keep around outdated `EntityList`s + // (should `EntityList` not implement `Copy`/`Clone` *at all*?) assert!( a_last_def.next.is_none(), "invalid EntityList: `last->next != None`" @@ -594,7 +650,9 @@ impl>, D> EntityList { let b_first_def = &mut defs[b.first]; // FIXME(eddyb) this situation should be impossible anyway, as it - // involves the `EntityListNode`s links, which should be unforgeable. + // involves the `EntityListNode`s links, which should be unforgeable, + // but it's still possible to keep around outdated `EntityList`s + // (should `EntityList` not implement `Copy`/`Clone` *at all*?) assert!( b_first_def.prev.is_none(), "invalid EntityList: `first->prev != None`" @@ -608,6 +666,72 @@ impl>, D> EntityList { last: b.last, })) } + + /// Remove `node` (defined in `defs`) from `self`. + #[track_caller] + pub fn remove(&mut self, node: E, defs: &mut EntityDefs) { + // Unlink `node->{prev,next}` first (also allowing re-insertion elsewhere). + let (prev, next) = { + let node_def = &mut defs[node]; + (node_def.prev.take(), node_def.next.take()) + }; + + // Unlink `prev->next = node` (or validate `first = node`). + match prev { + Some(prev) => { + let old_prev_next = mem::replace(&mut defs[prev].next, next); + + // FIXME(eddyb) this situation should be impossible anyway, as it + // involves the `EntityListNode`s links, which should be unforgeable. + assert!( + old_prev_next == Some(node), + "invalid EntityListNode: `node->prev->next != node`" + ); + } + None => { + // FIXME(eddyb) this situation should be impossible anyway, as it + // involves the `EntityListNode`s links, which should be unforgeable, + // but it's still possible to keep around outdated `EntityList`s + // (should `EntityList` not implement `Copy`/`Clone` *at all*?) + assert!( + self.0.map(|this| this.first) == Some(node), + "invalid EntityList: `node->prev == None` but `node != first`" + ); + } + } + + // Unlink `next->prev = node` (or validate `last = node`). + match next { + Some(next) => { + let old_next_prev = mem::replace(&mut defs[next].prev, prev); + + // FIXME(eddyb) this situation should be impossible anyway, as it + // involves the `EntityListNode`s links, which should be unforgeable. + assert!( + old_next_prev == Some(node), + "invalid EntityListNode: `node->next->prev != node`" + ); + } + None => { + // FIXME(eddyb) this situation should be impossible anyway, as it + // involves the `EntityListNode`s links, which should be unforgeable, + // but it's still possible to keep around outdated `EntityList`s + // (should `EntityList` not implement `Copy`/`Clone` *at all*?) + assert!( + self.0.map(|this| this.last) == Some(node), + "invalid EntityList: `node->next == None` but `node != last`" + ); + } + } + + // Update list end-points (overwritten `first`/`last` validated above). + match (prev, next) { + (Some(_), Some(_)) => {} + (None, Some(next)) => self.0.as_mut().unwrap().first = next, + (Some(prev), None) => self.0.as_mut().unwrap().last = prev, + (None, None) => self.0 = None, + } + } } /// [`EntityList`] iterator, but with a different API than [`Iterator`]. diff --git a/src/func_at.rs b/src/func_at.rs index f5e1e62..fba3eea 100644 --- a/src/func_at.rs +++ b/src/func_at.rs @@ -91,6 +91,14 @@ impl<'a> Iterator for FuncAt<'a, EntityListIter> { } } +impl<'a> DoubleEndedIterator for FuncAt<'a, EntityListIter> { + fn next_back(&mut self) -> Option { + let (prev, rest) = self.position.split_last(self.data_insts)?; + self.position = rest; + Some(self.at(prev)) + } +} + impl<'a> FuncAt<'a, DataInst> { pub fn def(self) -> &'a DataInstDef { &self.data_insts[self.position] @@ -146,6 +154,24 @@ impl<'a, P: Copy> FuncAtMut<'a, P> { position: new_position, } } + + /// Demote to a `FuncAt`, with the same `position`. + // + // FIXME(eddyb) maybe find a better name for this? + pub fn freeze(self) -> FuncAt<'a, P> { + let FuncAtMut { + control_regions, + control_nodes, + data_insts, + position, + } = self; + FuncAt { + control_regions, + control_nodes, + data_insts, + position, + } + } } impl<'a> FuncAtMut<'a, ControlRegion> { diff --git a/src/lib.rs b/src/lib.rs index aa4639c..195054c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -165,7 +165,9 @@ pub mod passes { pub mod legalize; pub mod link; + pub mod qptr; } +pub mod qptr; pub mod spv; use smallvec::SmallVec; @@ -338,6 +340,9 @@ impl AttrSet { // FIXME(eddyb) consider interning individual attrs, not just `AttrSet`s. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Attr { + /// `QPtr`-specific attributes (see [`qptr::QPtrAttr`]). + QPtr(qptr::QPtrAttr), + SpvAnnotation(spv::Inst), SpvDebugLine { @@ -348,6 +353,7 @@ pub enum Attr { /// Some SPIR-V instructions, like `OpFunction`, take a bitflags operand /// that is effectively an optimization over using `OpDecorate`. + // // FIXME(eddyb) handle flags having further operands as parameters. SpvBitflagsOperand(spv::Imm), @@ -412,6 +418,7 @@ pub enum DiagMsgPart { Attrs(AttrSet), Type(Type), Const(Const), + QPtrUsage(qptr::QPtrUsage), } // FIXME(eddyb) move this out of `lib.rs` and/or define with a macro. @@ -445,6 +452,12 @@ impl From for DiagMsgPart { } } +impl From for DiagMsgPart { + fn from(usage: qptr::QPtrUsage) -> Self { + Self::QPtrUsage(usage) + } +} + /// Wrapper to limit `Ord` for interned index types (e.g. [`InternedStr`]) /// to only situations where the interned index reflects contents (i.e. equality). // @@ -488,6 +501,22 @@ pub struct TypeDef { /// [`Type`] "constructor": a [`TypeDef`] wiithout any [`TypeCtorArg`]s ([`Type`]s/[`Const`]s). #[derive(Clone, PartialEq, Eq, Hash)] pub enum TypeCtor { + /// "Quasi-pointer", an untyped pointer-like abstract scalar that can represent + /// both memory locations (in any address space) and other kinds of locations + /// (e.g. SPIR-V `OpVariable`s in non-memory "storage classes"). + /// + /// This flexibility can be used to represent pointers from source languages + /// that expect/are defined to operate on untyped memory (C, C++, Rust, etc.), + /// that can then be legalized away (e.g. via inlining) or even emulated. + /// + /// Information narrowing down how values of the type may be created/used + /// (e.g. "points to variable `x`" or "accessed at offset `y`") can be found + /// attached as `Attr`s on those `Value`s (see [`Attr::QPtr`]). + // + // FIXME(eddyb) a "refinement system" that's orthogonal from types, and kept + // separately in e.g. `ControlRegionInputDecl`, might be a better approach? + QPtr, + SpvInst(spv::Inst), /// The type of a [`ConstCtor::SpvStringLiteralForExtInst`] constant, i.e. @@ -560,14 +589,24 @@ pub struct GlobalVarDecl { // FIXME(eddyb) try to replace with value type (or at least have that too). pub type_of_ptr_to: Type, + /// When `type_of_ptr_to` is `QPtr`, `shape` must be used to describe the + /// global variable (see `GlobalVarShape`'s documentation for more details). + pub shape: Option, + /// The address space the global variable will be allocated into. pub addr_space: AddrSpace, pub def: DeclDef, } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq, Hash)] pub enum AddrSpace { + /// Placeholder for `GlobalVar`s with `GlobalVarShape::Handles`. + /// + /// In SPIR-V, this corresponds to `UniformConstant` for `Handle::Opaque`, + /// or the buffer's storage class for `Handle::Buffer`. + Handles, + SpvStorageClass(u32), } @@ -845,8 +884,14 @@ pub enum DataInstKind { // to avoid needing special handling for recursion where it's impossible. FuncCall(Func), + /// `QPtr`-specific operations (see [`qptr::QPtrOp`]). + QPtr(qptr::QPtrOp), + SpvInst(spv::Inst), - SpvExtInst { ext_set: InternedStr, inst: u32 }, + SpvExtInst { + ext_set: InternedStr, + inst: u32, + }, } #[derive(Copy, Clone, PartialEq, Eq)] diff --git a/src/passes/legalize.rs b/src/passes/legalize.rs index 386b416..c7cb430 100644 --- a/src/passes/legalize.rs +++ b/src/passes/legalize.rs @@ -15,7 +15,8 @@ pub fn structurize_func_cfgs(module: &mut Module) { seen_global_vars: FxIndexSet::default(), seen_funcs: FxIndexSet::default(), }; - for &exportee in module.exports.values() { + for (export_key, &exportee) in &module.exports { + export_key.inner_visit_with(&mut collector); exportee.inner_visit_with(&mut collector); } diff --git a/src/passes/qptr.rs b/src/passes/qptr.rs new file mode 100644 index 0000000..0342f9f --- /dev/null +++ b/src/passes/qptr.rs @@ -0,0 +1,104 @@ +//! [`QPtr`](crate::TypeCtor::QPtr) transforms. + +use crate::qptr; +use crate::visit::{InnerVisit, Visitor}; +use crate::{AttrSet, Const, Context, Func, FxIndexSet, GlobalVar, Module, Type}; + +pub fn lower_from_spv_ptrs(module: &mut Module, layout_config: &qptr::LayoutConfig) { + let cx = &module.cx(); + + let (seen_global_vars, seen_funcs) = { + // FIXME(eddyb) reuse this collection work in some kind of "pass manager". + let mut collector = ReachableUseCollector { + cx, + module, + + seen_types: FxIndexSet::default(), + seen_consts: FxIndexSet::default(), + seen_global_vars: FxIndexSet::default(), + seen_funcs: FxIndexSet::default(), + }; + for (export_key, &exportee) in &module.exports { + export_key.inner_visit_with(&mut collector); + exportee.inner_visit_with(&mut collector); + } + (collector.seen_global_vars, collector.seen_funcs) + }; + + let lowerer = qptr::lower::LowerFromSpvPtrs::new(cx.clone(), layout_config); + for &global_var in &seen_global_vars { + lowerer.lower_global_var(&mut module.global_vars[global_var]); + } + for &func in &seen_funcs { + lowerer.lower_func(&mut module.funcs[func]); + } +} + +pub fn analyze_uses(module: &mut Module, layout_config: &qptr::LayoutConfig) { + qptr::analyze::InferUsage::new(module.cx(), layout_config).infer_usage_in_module(module); +} + +pub fn lift_to_spv_ptrs(module: &mut Module, layout_config: &qptr::LayoutConfig) { + let cx = &module.cx(); + + let (seen_global_vars, seen_funcs) = { + // FIXME(eddyb) reuse this collection work in some kind of "pass manager". + let mut collector = ReachableUseCollector { + cx, + module, + + seen_types: FxIndexSet::default(), + seen_consts: FxIndexSet::default(), + seen_global_vars: FxIndexSet::default(), + seen_funcs: FxIndexSet::default(), + }; + for (export_key, &exportee) in &module.exports { + export_key.inner_visit_with(&mut collector); + exportee.inner_visit_with(&mut collector); + } + (collector.seen_global_vars, collector.seen_funcs) + }; + + let lifter = qptr::lift::LiftToSpvPtrs::new(cx.clone(), layout_config); + for &global_var in &seen_global_vars { + lifter.lift_global_var(&mut module.global_vars[global_var]); + } + lifter.lift_all_funcs(module, seen_funcs); +} + +struct ReachableUseCollector<'a> { + cx: &'a Context, + module: &'a Module, + + // FIXME(eddyb) build some automation to avoid ever repeating these. + seen_types: FxIndexSet, + seen_consts: FxIndexSet, + seen_global_vars: FxIndexSet, + seen_funcs: FxIndexSet, +} + +impl Visitor<'_> for ReachableUseCollector<'_> { + // FIXME(eddyb) build some automation to avoid ever repeating these. + fn visit_attr_set_use(&mut self, _attrs: AttrSet) {} + fn visit_type_use(&mut self, ty: Type) { + if self.seen_types.insert(ty) { + self.visit_type_def(&self.cx[ty]); + } + } + fn visit_const_use(&mut self, ct: Const) { + if self.seen_consts.insert(ct) { + self.visit_const_def(&self.cx[ct]); + } + } + + fn visit_global_var_use(&mut self, gv: GlobalVar) { + if self.seen_global_vars.insert(gv) { + self.visit_global_var_decl(&self.module.global_vars[gv]); + } + } + fn visit_func_use(&mut self, func: Func) { + if self.seen_funcs.insert(func) { + self.visit_func_decl(&self.module.funcs[func]); + } + } +} diff --git a/src/print/mod.rs b/src/print/mod.rs index f1a7cd2..d4f4607 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -21,6 +21,7 @@ use itertools::Itertools as _; use crate::func_at::FuncAt; use crate::print::multiversion::Versions; +use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; use crate::visit::{DynVisit, InnerVisit, Visit, Visitor}; use crate::{ cfg, spv, AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstCtor, ConstDef, Context, @@ -420,6 +421,8 @@ impl<'a> Visitor<'a> for Plan<'a> { } fn visit_attr(&mut self, attr: &'a Attr) { + attr.inner_visit_with(self); + // HACK(eddyb) the interpolated parts aren't visited by default // (as they're "inert data"). if let Attr::Diagnostics(OrdAssertEq(diags)) = attr { @@ -613,7 +616,9 @@ impl<'a> Printer<'a> { ] .contains(&inst.opcode), - TypeCtor::SpvStringLiteralForExtInst => true, + TypeCtor::QPtr | TypeCtor::SpvStringLiteralForExtInst => { + true + } }; ty_def.attrs == AttrSet::default() @@ -1844,6 +1849,53 @@ impl Print for Attr { ), ), + Attr::QPtr(attr) => { + let (name, params_inputs) = match attr { + QPtrAttr::ToSpvPtrInput { input_idx, pointee } => ( + "to_spv_ptr_input", + pretty::Fragment::new([ + // FIXME(eddyb) is using angle brackets like this consistent styling? + pretty::join_comma_sep( + "<", + [printer + .numeric_literal_style() + .apply(format!("{input_idx}"))], + ">", + ), + pretty::join_comma_sep("(", [pointee.0.print(printer)], ")"), + ]), + ), + + QPtrAttr::FromSpvPtrOutput { + addr_space, + pointee, + } => ( + "from_spv_ptr_output", + pretty::Fragment::new([ + // FIXME(eddyb) is using angle brackets like this consistent styling? + pretty::join_comma_sep("<", [addr_space.0.print(printer)], ">"), + pretty::join_comma_sep("(", [pointee.0.print(printer)], ")"), + ]), + ), + + QPtrAttr::Usage(usage) => ( + "usage", + pretty::join_comma_sep("(", [usage.0.print(printer)], ")"), + ), + }; + ( + AttrStyle::NonComment, + pretty::Fragment::new([ + printer + .demote_style_for_namespace_prefix(printer.attr_style()) + .apply("qptr.") + .into(), + printer.attr_style().apply(name).into(), + params_inputs, + ]), + ) + } + Attr::SpvAnnotation(spv::Inst { opcode, imms }) => { struct ImplicitTargetId; @@ -1903,6 +1955,7 @@ impl Print for Vec { DiagMsgPart::Attrs(attrs) => attrs.print(printer), DiagMsgPart::Type(ty) => ty.print(printer), DiagMsgPart::Const(ct) => ct.print(printer), + DiagMsgPart::QPtrUsage(usage) => usage.print(printer), })); AttrsAndDef { attrs: pretty::Fragment::default(), @@ -1911,6 +1964,78 @@ impl Print for Vec { } } +impl Print for QPtrUsage { + type Output = pretty::Fragment; + + fn print(&self, printer: &Printer<'_>) -> pretty::Fragment { + match self { + QPtrUsage::Handles(qptr::shapes::Handle::Opaque(ty)) => ty.print(printer), + QPtrUsage::Handles(qptr::shapes::Handle::Buffer(_, data_usage)) => { + pretty::Fragment::new([ + printer + .declarative_keyword_style() + .apply("buffer_data") + .into(), + pretty::join_comma_sep("(", [data_usage.print(printer)], ")"), + ]) + } + QPtrUsage::Memory(usage) => usage.print(printer), + } + } +} + +impl Print for QPtrMemUsage { + type Output = pretty::Fragment; + + fn print(&self, printer: &Printer<'_>) -> pretty::Fragment { + // FIXME(eddyb) should this be a helper on `Printer`? + let num_lit = |x: u32| printer.numeric_literal_style().apply(format!("{x}")).into(); + + match &self.kind { + QPtrMemUsageKind::Unused => "_".into(), + // FIXME(eddyb) should the distinction be noted? + &QPtrMemUsageKind::StrictlyTyped(ty) | &QPtrMemUsageKind::DirectAccess(ty) => { + ty.print(printer) + } + QPtrMemUsageKind::OffsetBase(entries) => pretty::join_comma_sep( + "{", + entries + .iter() + .map(|(&offset, sub_usage)| { + pretty::Fragment::new([ + num_lit(offset), + "..".into(), + sub_usage + .max_size + .and_then(|max_size| offset.checked_add(max_size)) + .map(num_lit) + .unwrap_or_default(), + " => ".into(), + sub_usage.print(printer), + ]) + }) + .map(|entry| { + pretty::Fragment::new([pretty::Node::ForceLineSeparation.into(), entry]) + }), + "}", + ), + QPtrMemUsageKind::DynOffsetBase { element, stride } => pretty::Fragment::new([ + "(".into(), + num_lit(0), + "..".into(), + self.max_size + .map(|max_size| max_size / stride.get()) + .map(num_lit) + .unwrap_or_default(), + ") × ".into(), + num_lit(stride.get()), + " => ".into(), + element.print(printer), + ]), + } + } +} + impl Print for TypeDef { type Output = AttrsAndDef; fn print(&self, printer: &Printer<'_>) -> AttrsAndDef { @@ -1977,6 +2102,9 @@ impl Print for TypeDef { def } else { match *ctor { + // FIXME(eddyb) should this be shortened to `qtr`? + TypeCtor::QPtr => printer.declarative_keyword_style().apply("qptr").into(), + TypeCtor::SpvInst(spv::Inst { opcode, ref imms }) => printer.pretty_spv_inst( printer.spv_op_style(), opcode, @@ -2175,6 +2303,7 @@ impl Print for GlobalVarDecl { let Self { attrs, type_of_ptr_to, + shape, addr_space, def, } = self; @@ -2187,6 +2316,109 @@ impl Print for GlobalVarDecl { let type_of_ptr_to_def = &printer.cx[*type_of_ptr_to]; match &type_of_ptr_to_def.ctor { + TypeCtor::QPtr if shape.is_some() => match shape.unwrap() { + qptr::shapes::GlobalVarShape::Handles { + handle, + fixed_count, + } => { + let handle = match handle { + qptr::shapes::Handle::Opaque(ty) => ty.print(printer), + qptr::shapes::Handle::Buffer(addr_space, buf) => { + pretty::Fragment::new([ + printer.declarative_keyword_style().apply("buffer").into(), + // FIXME(eddyb) is using angle brackets like this consistent styling? + pretty::join_comma_sep("<", [addr_space.print(printer)], ">"), + pretty::join_comma_sep( + "(", + [ + pretty::Fragment::new([ + "size: ".into(), + pretty::Fragment::new( + Some(buf.fixed_base.size) + .filter(|&base_size| { + base_size > 0 + || buf.dyn_unit_stride.is_none() + }) + .map(|base_size| { + printer + .numeric_literal_style() + .apply(base_size.to_string()) + .into() + }) + .into_iter() + .chain(buf.dyn_unit_stride.map(|stride| { + pretty::Fragment::new([ + "N × ".into(), + printer + .numeric_literal_style() + .apply(stride.to_string()), + ]) + })) + .intersperse_with(|| " + ".into()), + ), + ]), + pretty::Fragment::new([ + "align: ".into(), + printer + .numeric_literal_style() + .apply(buf.fixed_base.align.to_string()), + ]), + ], + ")", + ), + ]) + } + }; + + let handles = if fixed_count.map_or(0, |c| c.get()) == 1 { + handle + } else { + pretty::Fragment::new([ + "[".into(), + fixed_count + .map(|count| { + pretty::Fragment::new([ + printer + .numeric_literal_style() + .apply(count.to_string()), + " × ".into(), + ]) + }) + .unwrap_or_default(), + handle, + "]".into(), + ]) + }; + pretty::join_space(":", [handles]) + } + qptr::shapes::GlobalVarShape::UntypedData(mem_layout) => { + pretty::Fragment::new([ + " ".into(), + printer.declarative_keyword_style().apply("layout").into(), + pretty::join_comma_sep( + "(", + [ + pretty::Fragment::new([ + "size: ".into(), + printer + .numeric_literal_style() + .apply(mem_layout.size.to_string()), + ]), + pretty::Fragment::new([ + "align: ".into(), + printer + .numeric_literal_style() + .apply(mem_layout.align.to_string()), + ]), + ], + ")", + ), + ]) + } + qptr::shapes::GlobalVarShape::TypedInterface(ty) => { + printer.pretty_type_ascription_suffix(ty) + } + }, TypeCtor::SpvInst(inst) if inst.opcode == wk.OpTypePointer => { match type_of_ptr_to_def.ctor_args[..] { [TypeCtorArg::Type(ty)] => printer.pretty_type_ascription_suffix(ty), @@ -2202,22 +2434,23 @@ impl Print for GlobalVarDecl { ]), } }; - let addr_space = match *addr_space { - AddrSpace::SpvStorageClass(sc) => printer.pretty_spv_imm(wk.StorageClass, sc), + let addr_space_suffix = match addr_space { + AddrSpace::Handles => pretty::Fragment::default(), + AddrSpace::SpvStorageClass(_) => { + pretty::Fragment::new([" in ".into(), addr_space.print(printer)]) + } }; - let header = pretty::Fragment::new([" in ".into(), addr_space, type_ascription_suffix]); + let header = pretty::Fragment::new([addr_space_suffix, type_ascription_suffix]); - let body = match def { - DeclDef::Imported(import) => { - Some(pretty::Fragment::new(["= ".into(), import.print(printer)])) - } + let maybe_rhs = match def { + DeclDef::Imported(import) => Some(import.print(printer)), DeclDef::Present(GlobalVarDefBody { initializer }) => { - initializer.map(|initializer| { - // FIXME(eddyb) find a better syntax for this. - pretty::Fragment::new(["init=".into(), initializer.print(printer)]) - }) + // FIXME(eddyb) `global_varX in AS: T = Y` feels a bit wonky for + // the initializer, but it's cleaner than obvious alternatives. + initializer.map(|initializer| initializer.print(printer)) } }; + let body = maybe_rhs.map(|rhs| pretty::Fragment::new(["= ".into(), rhs])); let def_without_name = pretty::Fragment::new([header, pretty::join_space("", body)]); @@ -2228,6 +2461,19 @@ impl Print for GlobalVarDecl { } } +impl Print for AddrSpace { + type Output = pretty::Fragment; + fn print(&self, printer: &Printer<'_>) -> pretty::Fragment { + match *self { + AddrSpace::Handles => printer.declarative_keyword_style().apply("handles").into(), + AddrSpace::SpvStorageClass(sc) => { + let wk = &spv::spec::Spec::get().well_known; + printer.pretty_spv_imm(wk.StorageClass, sc) + } + } + } +} + impl Print for FuncDecl { type Output = AttrsAndDef; fn print(&self, printer: &Printer<'_>) -> AttrsAndDef { @@ -2248,7 +2494,7 @@ impl Print for FuncDecl { region: def.body, input_idx: i.try_into().unwrap(), } - .print(printer), + .print_as_def(printer), }; param.print(printer).insert_name_before_def(param_name) }), @@ -2591,26 +2837,171 @@ impl Print for DataInstDef { let attrs = attrs.print(printer); - let header = match *kind { - DataInstKind::FuncCall(func) => pretty::Fragment::new([ + let header = match kind { + &DataInstKind::FuncCall(func) => pretty::Fragment::new([ printer.declarative_keyword_style().apply("call").into(), " ".into(), func.print(printer), ]), - DataInstKind::SpvInst(spv::Inst { opcode, ref imms }) => { + + DataInstKind::QPtr(op) => { + let (qptr_input, extra_inputs) = match op { + // HACK(eddyb) `FuncLocalVar` should probably not even be in `QPtrOp`. + QPtrOp::FuncLocalVar(_) => (None, &inputs[..]), + _ => (Some(inputs[0]), &inputs[1..]), + }; + let (name, extra_inputs): (_, SmallVec<[_; 1]>) = match op { + QPtrOp::FuncLocalVar(mem_layout) => { + assert!(extra_inputs.len() <= 1); + ( + "func_local_var", + [ + pretty::Fragment::new([ + "size: ".into(), + printer + .numeric_literal_style() + .apply(mem_layout.size.to_string()), + ]), + pretty::Fragment::new([ + "align: ".into(), + printer + .numeric_literal_style() + .apply(mem_layout.align.to_string()), + ]), + ] + .into_iter() + .chain(extra_inputs.get(0).map(|&init| { + pretty::Fragment::new(["init: ".into(), init.print(printer)]) + })) + .collect(), + ) + } + + QPtrOp::HandleArrayIndex => { + assert_eq!(extra_inputs.len(), 1); + ( + "handle_array_index", + [extra_inputs[0].print(printer)].into_iter().collect(), + ) + } + QPtrOp::BufferData => { + assert_eq!(extra_inputs.len(), 0); + ("buffer_data", [].into_iter().collect()) + } + &QPtrOp::BufferDynLen { + fixed_base_size, + dyn_unit_stride, + } => { + assert_eq!(extra_inputs.len(), 0); + + // FIXME(eddyb) this isn't very nice, but without mapping + // to actual integer ops, there's not a lot of options. + ( + "buffer_dyn_len", + [ + pretty::Fragment::new([ + "fixed_base_size: ".into(), + printer + .numeric_literal_style() + .apply(fixed_base_size.to_string()), + ]), + pretty::Fragment::new([ + "dyn_unit_stride: ".into(), + printer + .numeric_literal_style() + .apply(dyn_unit_stride.to_string()), + ]), + ] + .into_iter() + .collect(), + ) + } + + QPtrOp::Offset(offset) => { + assert_eq!(extra_inputs.len(), 0); + ( + "offset", + [printer + .numeric_literal_style() + .apply(format!("{offset}")) + .into()] + .into_iter() + .collect(), + ) + } + &QPtrOp::DynOffset { + stride, + index_bounds: _, + } => { + assert_eq!(extra_inputs.len(), 1); + ( + "dyn_offset", + [pretty::Fragment::new([ + extra_inputs[0].print(printer), + " × ".into(), + printer + .numeric_literal_style() + .apply(format!("{stride}")) + .into(), + ])] + .into_iter() + .collect(), + ) + } + + QPtrOp::Load => { + assert_eq!(extra_inputs.len(), 0); + ("load", [].into_iter().collect()) + } + QPtrOp::Store => { + assert_eq!(extra_inputs.len(), 1); + ( + "store", + [extra_inputs[0].print(printer)].into_iter().collect(), + ) + } + }; + + // HACK(eddyb) this duplicates and intentionally breaks away from + // the style of SPIR-V instructions and how they handle immediates. + // FIXME(eddyb) pick a consistent style, this is getting annoying. + return AttrsAndDef { + attrs, + def_without_name: pretty::Fragment::new([ + printer + .demote_style_for_namespace_prefix(printer.declarative_keyword_style()) + .apply("qptr.") + .into(), + printer.declarative_keyword_style().apply(name).into(), + pretty::join_comma_sep( + "(", + qptr_input + .map(|v| v.print(printer)) + .into_iter() + .chain(extra_inputs), + ")", + ), + output_type + .map(|ty| printer.pretty_type_ascription_suffix(ty)) + .unwrap_or_default(), + ]), + }; + } + + DataInstKind::SpvInst(inst) => { return AttrsAndDef { attrs, def_without_name: printer.pretty_spv_inst( printer.spv_op_style(), - opcode, - imms, + inst.opcode, + &inst.imms, inputs, Print::print, *output_type, ), }; } - DataInstKind::SpvExtInst { ext_set, inst } => { + &DataInstKind::SpvExtInst { ext_set, inst } => { let wk = &spv::spec::Spec::get().well_known; // FIXME(eddyb) should this be rendered more compactly? diff --git a/src/qptr/analyze.rs b/src/qptr/analyze.rs new file mode 100644 index 0000000..8c9bfbf --- /dev/null +++ b/src/qptr/analyze.rs @@ -0,0 +1,1314 @@ +//! [`QPtr`](crate::TypeCtor::QPtr) usage analysis (for legalizing/lifting). + +// HACK(eddyb) sharing layout code with other modules. +use super::{layout::*, QPtrMemUsageKind}; + +use super::{shapes, QPtrAttr, QPtrMemUsage, QPtrOp, QPtrUsage}; +use crate::func_at::FuncAt; +use crate::visit::{InnerVisit, Visitor}; +use crate::{ + AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstCtor, Context, ControlNode, ControlNodeKind, + DataInst, DataInstKind, DeclDef, Diag, EntityList, ExportKey, Exportee, Func, FxIndexMap, + GlobalVar, Module, OrdAssertEq, Type, TypeCtor, Value, +}; +use itertools::Either; +use rustc_hash::FxHashMap; +use smallvec::SmallVec; +use std::mem; +use std::num::NonZeroU32; +use std::ops::Bound; +use std::rc::Rc; + +#[derive(Clone)] +struct AnalysisError(Diag); + +struct UsageMerger<'a> { + layout_cache: &'a LayoutCache<'a>, +} + +/// Result type for `UsageMerger` methods - unlike `Result`, +/// this always keeps the `T` value, even in the case of an error. +struct MergeResult { + merged: T, + error: Option, +} + +impl MergeResult { + fn ok(merged: T) -> Self { + Self { + merged, + error: None, + } + } + + fn into_result(self) -> Result { + let Self { merged, error } = self; + match error { + None => Ok(merged), + Some(e) => Err(e), + } + } + + fn map(self, f: impl FnOnce(T) -> U) -> MergeResult { + let Self { merged, error } = self; + let merged = f(merged); + MergeResult { merged, error } + } +} + +impl UsageMerger<'_> { + fn merge(&self, a: QPtrUsage, b: QPtrUsage) -> MergeResult { + match (a, b) { + ( + QPtrUsage::Handles(shapes::Handle::Opaque(a)), + QPtrUsage::Handles(shapes::Handle::Opaque(b)), + ) if a == b => MergeResult::ok(QPtrUsage::Handles(shapes::Handle::Opaque(a))), + + ( + QPtrUsage::Handles(shapes::Handle::Buffer(a_as, a)), + QPtrUsage::Handles(shapes::Handle::Buffer(b_as, b)), + ) => { + // HACK(eddyb) the `AddrSpace` field is entirely redundant. + assert!(a_as == AddrSpace::Handles && b_as == AddrSpace::Handles); + + self.merge_mem(a, b).map(|usage| { + QPtrUsage::Handles(shapes::Handle::Buffer(AddrSpace::Handles, usage)) + }) + } + + (QPtrUsage::Memory(a), QPtrUsage::Memory(b)) => { + self.merge_mem(a, b).map(QPtrUsage::Memory) + } + + (a, b) => { + MergeResult { + // FIXME(eddyb) there may be a better choice here, but it + // generally doesn't matter, as this method only has one + // caller, and it just calls `.into_result()` right away. + merged: a.clone(), + error: Some(AnalysisError(Diag::bug([ + "merge: ".into(), + a.into(), + " vs ".into(), + b.into(), + ]))), + } + } + } + } + + fn merge_mem(&self, a: QPtrMemUsage, b: QPtrMemUsage) -> MergeResult { + // NOTE(eddyb) this is possible because it's currently impossible for + // the merged usage to be outside the bounds of *both* `a` and `b`. + let max_size = match (a.max_size, b.max_size) { + (Some(a), Some(b)) => Some(a.max(b)), + (None, _) | (_, None) => None, + }; + + // Ensure that `a` is "larger" than `b`, or at least the same size + // (when either they're identical, or one is a "newtype" of the other), + // to make it easier to handle all the possible interactions below, + // by skipping (or deprioritizing, if supported) the "wrong direction". + let mut sorted = [a, b]; + sorted.sort_by_key(|usage| { + #[derive(PartialEq, Eq, PartialOrd, Ord)] + enum MaxSize { + Fixed(T), + // FIXME(eddyb) this probably needs to track "min size"? + Dynamic, + } + let max_size = usage.max_size.map_or(MaxSize::Dynamic, MaxSize::Fixed); + + // When sizes are equal, pick the more restrictive side. + #[derive(PartialEq, Eq, PartialOrd, Ord)] + enum TypeStrictness { + Any, + Array, + Exact, + } + #[allow(clippy::match_same_arms)] + let type_strictness = match usage.kind { + QPtrMemUsageKind::Unused | QPtrMemUsageKind::OffsetBase(_) => TypeStrictness::Any, + + QPtrMemUsageKind::DynOffsetBase { .. } => TypeStrictness::Array, + + // FIXME(eddyb) this should be `Any`, even if in theory it + // could contain arrays or structs that need decomposition + // (note that, for typed reads/write, arrays do not need to be + // *indexed* to work, i.e. they *do not* require `DynOffset`s, + // `Offset`s suffice, and for them `DynOffsetBase` is at most + // a "run-length"/deduplication optimization over `OffsetBase`). + // NOTE(eddyb) this should still prefer `OpTypeVector` over `DynOffsetBase`! + QPtrMemUsageKind::DirectAccess(_) => TypeStrictness::Exact, + + QPtrMemUsageKind::StrictlyTyped(_) => TypeStrictness::Exact, + }; + + (max_size, type_strictness) + }); + let [b, a] = sorted; + assert_eq!(max_size, a.max_size); + + self.merge_mem_at(a, 0, b) + } + + // FIXME(eddyb) make the name of this clarify the asymmetric effect, something + // like "make `a` compatible with `offset => b`". + fn merge_mem_at( + &self, + a: QPtrMemUsage, + b_offset_in_a: u32, + b: QPtrMemUsage, + ) -> MergeResult { + // NOTE(eddyb) this is possible because it's currently impossible for + // the merged usage to be outside the bounds of *both* `a` and `b`. + let max_size = match (a.max_size, b.max_size) { + (Some(a), Some(b)) => Some(a.max(b.checked_add(b_offset_in_a).unwrap())), + (None, _) | (_, None) => None, + }; + + // HACK(eddyb) we require biased `a` vs `b` (see `merge_mem` method above). + assert_eq!(max_size, a.max_size); + + // Decompose the "smaller" and/or "less strict" side (`b`) first. + match b.kind { + // `Unused`s are always ignored. + QPtrMemUsageKind::Unused => return MergeResult::ok(a), + + QPtrMemUsageKind::OffsetBase(b_entries) + if { + // HACK(eddyb) this check was added later, after it turned out + // that *deep* flattening of arbitrary offsets in `b` would've + // required constant-folding of `qptr.offset` in `qptr::lift`, + // to not need all the type nesting levels for `OpAccessChain`. + b_offset_in_a == 0 + } => + { + // FIXME(eddyb) this whole dance only needed due to `Rc`. + let b_entries = Rc::try_unwrap(b_entries); + let b_entries = match b_entries { + Ok(entries) => Either::Left(entries.into_iter()), + Err(ref entries) => Either::Right(entries.iter().map(|(&k, v)| (k, v.clone()))), + }; + + let mut ab = a; + let mut all_errors = None; + for (b_offset, b_sub_usage) in b_entries { + let MergeResult { + merged, + error: new_error, + } = self.merge_mem_at( + ab, + b_offset.checked_add(b_offset_in_a).unwrap(), + b_sub_usage, + ); + ab = merged; + + // FIXME(eddyb) move some of this into `MergeResult`! + if let Some(AnalysisError(e)) = new_error { + let all_errors = &mut all_errors + .get_or_insert(AnalysisError(Diag::bug([]))) + .0 + .message; + // FIXME(eddyb) should this mean `MergeResult` should + // use `errors: Vec` instead of `Option`? + if !all_errors.is_empty() { + all_errors.push("\n".into()); + } + // FIXME(eddyb) this is scuffed because the error might + // (or really *should*) already refer to the right offset! + all_errors.push(format!("+{b_offset} => ").into()); + all_errors.extend(e.message); + } + } + return MergeResult { + merged: ab, + // FIXME(eddyb) should this mean `MergeResult` should + // use `errors: Vec` instead of `Option`? + error: all_errors.map(|AnalysisError(mut e)| { + e.message.insert(0, "merge_mem: conflicts:\n".into()); + AnalysisError(e) + }), + }; + } + + _ => {} + } + + let kind = match a.kind { + // `Unused`s are always ignored. + QPtrMemUsageKind::Unused => MergeResult::ok(b.kind), + + // Typed leaves must support any possible usage applied to them + // (when they match, or overtake, that usage, in size, like here), + // with their inherent hierarchy (i.e. their array/struct nesting). + QPtrMemUsageKind::StrictlyTyped(a_type) | QPtrMemUsageKind::DirectAccess(a_type) => { + let b_type_at_offset_0 = match b.kind { + QPtrMemUsageKind::StrictlyTyped(b_type) + | QPtrMemUsageKind::DirectAccess(b_type) + if b_offset_in_a == 0 => + { + Some(b_type) + } + _ => None, + }; + let ty = if Some(a_type) == b_type_at_offset_0 { + MergeResult::ok(a_type) + } else { + // Returns `Some(MergeResult::ok(ty))` iff `usage` is valid + // for type `ty`, and `None` iff invalid w/o layout errors + // (see `mem_layout_supports_usage_at_offset` for more details). + let type_supporting_usage_at_offset = |ty, usage_offset, usage| { + let supports_usage = match self.layout_of(ty) { + // FIXME(eddyb) should this be `unreachable!()`? also, is + // it possible to end up with `ty` being an `OpTypeStruct` + // decorated with `Block`, showing up as a `Buffer` handle? + // + // NOTE(eddyb) `Block`-annotated buffer types are *not* + // usable anywhere inside buffer data, since they would + // conflict with our own `Block`-annotated wrapper. + Ok(TypeLayout::Handle(_) | TypeLayout::HandleArray(..)) => { + Err(AnalysisError(Diag::bug([ + "merge_mem: impossible handle type for QPtrMemUsage".into(), + ]))) + } + Ok(TypeLayout::Concrete(concrete)) => { + Ok(concrete.supports_usage_at_offset(usage_offset, usage)) + } + + Err(e) => Err(e), + }; + match supports_usage { + Ok(false) => None, + Ok(true) | Err(_) => Some(MergeResult { + merged: ty, + error: supports_usage.err(), + }), + } + }; + + type_supporting_usage_at_offset(a_type, b_offset_in_a, &b) + .or_else(|| { + b_type_at_offset_0.and_then(|b_type_at_offset_0| { + type_supporting_usage_at_offset(b_type_at_offset_0, 0, &a) + }) + }) + .unwrap_or_else(|| { + MergeResult { + merged: a_type, + // FIXME(eddyb) this should ideally embed the types in the + // error somehow. + error: Some(AnalysisError(Diag::bug([ + "merge_mem: type subcomponents incompatible with usage (" + .into(), + QPtrUsage::Memory(a.clone()).into(), + " vs ".into(), + QPtrUsage::Memory(b.clone()).into(), + ")".into(), + ]))), + } + }) + }; + + // FIXME(eddyb) if the chosen (maybe-larger) side isn't strict, + // it should also be possible to expand it into its components, + // with the other (maybe-smaller) side becoming a leaf. + + // FIXME(eddyb) this might not enough because the + // strict leaf could be *nested* inside `b`!!! + let is_strict = |kind| matches!(kind, &QPtrMemUsageKind::StrictlyTyped(_)); + if is_strict(&a.kind) || is_strict(&b.kind) { + ty.map(QPtrMemUsageKind::StrictlyTyped) + } else { + ty.map(QPtrMemUsageKind::DirectAccess) + } + } + + QPtrMemUsageKind::DynOffsetBase { + element: mut a_element, + stride: a_stride, + } => { + let b_offset_in_a_element = b_offset_in_a % a_stride; + + // Array-like dynamic offsetting needs to always merge any usage that + // fits inside the stride, with its "element" usage, no matter how + // complex it may be (notably, this is needed for nested arrays). + if b.max_size + .and_then(|b_max_size| b_max_size.checked_add(b_offset_in_a_element)) + .map_or(false, |b_in_a_max_size| b_in_a_max_size <= a_stride.get()) + { + // FIXME(eddyb) this in-place merging dance only needed due to `Rc`. + ({ + let a_element_mut = Rc::make_mut(&mut a_element); + let a_element = mem::replace(a_element_mut, QPtrMemUsage::UNUSED); + // FIXME(eddyb) remove this silliness by making `merge_mem_at` do symmetrical sorting. + if b_offset_in_a_element == 0 { + self.merge_mem(a_element, b) + } else { + self.merge_mem_at(a_element, b_offset_in_a_element, b) + } + .map(|merged| *a_element_mut = merged) + }) + .map(|()| QPtrMemUsageKind::DynOffsetBase { + element: a_element, + stride: a_stride, + }) + } else { + match b.kind { + QPtrMemUsageKind::DynOffsetBase { + element: b_element, + stride: b_stride, + } if b_offset_in_a_element == 0 && a_stride == b_stride => { + // FIXME(eddyb) this in-place merging dance only needed due to `Rc`. + ({ + let a_element_mut = Rc::make_mut(&mut a_element); + let a_element = mem::replace(a_element_mut, QPtrMemUsage::UNUSED); + let b_element = + Rc::try_unwrap(b_element).unwrap_or_else(|e| (*e).clone()); + self.merge_mem(a_element, b_element) + .map(|merged| *a_element_mut = merged) + }) + .map(|()| { + QPtrMemUsageKind::DynOffsetBase { + element: a_element, + stride: a_stride, + } + }) + } + _ => { + // FIXME(eddyb) implement somehow (by adjusting stride?). + // NOTE(eddyb) with `b` as an `DynOffsetBase`/`OffsetBase`, it could + // also be possible to superimpose its offset patterns onto `a`, + // though that's easier for `OffsetBase` than `DynOffsetBase`. + // HACK(eddyb) needed due to `a` being moved out of. + let a = QPtrMemUsage { + max_size: a.max_size, + kind: QPtrMemUsageKind::DynOffsetBase { + element: a_element, + stride: a_stride, + }, + }; + MergeResult { + merged: a.kind.clone(), + error: Some(AnalysisError(Diag::bug([ + format!("merge_mem: unimplemented non-intra-element merging into stride={a_stride} (") + .into(), + QPtrUsage::Memory(a).into(), + " vs ".into(), + QPtrUsage::Memory(b).into(), + ")".into(), + ]))), + } + } + } + } + } + + QPtrMemUsageKind::OffsetBase(mut a_entries) => { + let overlapping_entries = a_entries + .range(( + Bound::Unbounded, + b.max_size.map_or(Bound::Unbounded, |b_max_size| { + Bound::Excluded(b_offset_in_a.checked_add(b_max_size).unwrap()) + }), + )) + .rev() + .take_while(|(a_sub_offset, a_sub_usage)| { + a_sub_usage.max_size.map_or(true, |a_sub_max_size| { + a_sub_offset.checked_add(a_sub_max_size).unwrap() > b_offset_in_a + }) + }); + + // FIXME(eddyb) this is a bit inefficient but we don't have + // cursors, so we have to buffer the `BTreeMap` keys here. + let overlapping_offsets: SmallVec<[u32; 16]> = overlapping_entries + .map(|(&a_sub_offset, _)| a_sub_offset) + .collect(); + let a_entries_mut = Rc::make_mut(&mut a_entries); + let mut all_errors = None; + let (mut b_offset_in_a, mut b) = (b_offset_in_a, b); + for a_sub_offset in overlapping_offsets { + let a_sub_usage = a_entries_mut.remove(&a_sub_offset).unwrap(); + + // HACK(eddyb) this replicates the condition in which + // `merge_mem_at` would fail its similar assert, some of + // the cases denied here might be legal, but they're rare + // enough that we can do this for now. + let is_illegal = a_sub_offset != b_offset_in_a && { + let (a_sub_total_max_size, b_total_max_size) = ( + a_sub_usage + .max_size + .map(|a| a.checked_add(a_sub_offset).unwrap()), + b.max_size.map(|b| b.checked_add(b_offset_in_a).unwrap()), + ); + let total_max_size_merged = match (a_sub_total_max_size, b_total_max_size) { + (Some(a), Some(b)) => Some(a.max(b)), + (None, _) | (_, None) => None, + }; + total_max_size_merged + != if a_sub_offset < b_offset_in_a { + a_sub_total_max_size + } else { + b_total_max_size + } + }; + if is_illegal { + // HACK(eddyb) needed due to `a` being moved out of. + let a = QPtrMemUsage { + max_size: a.max_size, + kind: QPtrMemUsageKind::OffsetBase(a_entries.clone()), + }; + return MergeResult { + merged: QPtrMemUsage { + max_size, + kind: QPtrMemUsageKind::OffsetBase(a_entries), + }, + error: Some(AnalysisError(Diag::bug([ + format!( + "merge_mem: unsupported straddling overlap \ + at offsets {a_sub_offset} vs {b_offset_in_a} (" + ) + .into(), + QPtrUsage::Memory(a).into(), + " vs ".into(), + QPtrUsage::Memory(b).into(), + ")".into(), + ]))), + }; + } + + let new_error; + ( + b_offset_in_a, + MergeResult { + merged: b, + error: new_error, + }, + ) = if a_sub_offset < b_offset_in_a { + ( + a_sub_offset, + self.merge_mem_at(a_sub_usage, b_offset_in_a - a_sub_offset, b), + ) + } else { + // FIXME(eddyb) remove this silliness by making `merge_mem_at` do symmetrical sorting. + if a_sub_offset - b_offset_in_a == 0 { + (b_offset_in_a, self.merge_mem(b, a_sub_usage)) + } else { + ( + b_offset_in_a, + self.merge_mem_at(b, a_sub_offset - b_offset_in_a, a_sub_usage), + ) + } + }; + + // FIXME(eddyb) move some of this into `MergeResult`! + if let Some(AnalysisError(e)) = new_error { + let all_errors = &mut all_errors + .get_or_insert(AnalysisError(Diag::bug([]))) + .0 + .message; + // FIXME(eddyb) should this mean `MergeResult` should + // use `errors: Vec` instead of `Option`? + if !all_errors.is_empty() { + all_errors.push("\n".into()); + } + // FIXME(eddyb) this is scuffed because the error might + // (or really *should*) already refer to the right offset! + all_errors.push(format!("+{a_sub_offset} => ").into()); + all_errors.extend(e.message); + } + } + a_entries_mut.insert(b_offset_in_a, b); + MergeResult { + merged: QPtrMemUsageKind::OffsetBase(a_entries), + // FIXME(eddyb) should this mean `MergeResult` should + // use `errors: Vec` instead of `Option`? + error: all_errors.map(|AnalysisError(mut e)| { + e.message.insert(0, "merge_mem: conflicts:\n".into()); + AnalysisError(e) + }), + } + } + }; + kind.map(|kind| QPtrMemUsage { max_size, kind }) + } + + /// Attempt to compute a `TypeLayout` for a given (SPIR-V) `Type`. + fn layout_of(&self, ty: Type) -> Result { + self.layout_cache + .layout_of(ty) + .map_err(|LayoutError(err)| AnalysisError(err)) + } +} + +impl MemTypeLayout { + /// Determine if this layout is compatible with `usage` at `usage_offset`. + /// + /// That is, all typed leaves of `usage` must be found inside `self`, at + /// their respective offsets, and all [`QPtrMemUsageKind::DynOffsetBase`]s + /// must find a same-stride array inside `self` (to allow dynamic indexing). + // + // FIXME(eddyb) consider using `Result` to make it unambiguous. + fn supports_usage_at_offset(&self, usage_offset: u32, usage: &QPtrMemUsage) -> bool { + if let QPtrMemUsageKind::Unused = usage.kind { + return true; + } + + // "Fast accept" based on type alone (expected as recursion base case). + if let QPtrMemUsageKind::StrictlyTyped(usage_type) + | QPtrMemUsageKind::DirectAccess(usage_type) = usage.kind + { + if usage_offset == 0 && self.original_type == usage_type { + return true; + } + } + + { + // FIXME(eddyb) should `QPtrMemUsage` track a `min_size` as well? + // FIXME(eddyb) duplicated below. + let min_usage_offset_range = + usage_offset..usage_offset.saturating_add(usage.max_size.unwrap_or(0)); + + // "Fast reject" based on size alone (expected w/ multiple attempts). + if self.mem_layout.dyn_unit_stride.is_none() + && (self.mem_layout.fixed_base.size < min_usage_offset_range.end + || usage.max_size.is_none()) + { + return false; + } + } + + let any_component_supports = |usage_offset: u32, usage: &QPtrMemUsage| { + // FIXME(eddyb) should `QPtrMemUsage` track a `min_size` as well? + // FIXME(eddyb) duplicated above. + let min_usage_offset_range = + usage_offset..usage_offset.saturating_add(usage.max_size.unwrap_or(0)); + + // FIXME(eddyb) `find_components_containing` is linear today but + // could be made logarithmic (via binary search). + self.components + .find_components_containing(min_usage_offset_range) + .any(|idx| match &self.components { + Components::Scalar => unreachable!(), + Components::Elements { stride, elem, .. } => { + elem.supports_usage_at_offset(usage_offset % stride.get(), usage) + } + Components::Fields { + offsets, layouts, .. + } => layouts[idx].supports_usage_at_offset(usage_offset - offsets[idx], usage), + }) + }; + match &usage.kind { + _ if any_component_supports(usage_offset, usage) => true, + + QPtrMemUsageKind::Unused => unreachable!(), + + QPtrMemUsageKind::StrictlyTyped(_) | QPtrMemUsageKind::DirectAccess(_) => false, + + QPtrMemUsageKind::OffsetBase(entries) => { + entries.iter().all(|(&sub_offset, sub_usage)| { + // FIXME(eddyb) maybe this overflow should be propagated up, + // as a sign that `usage` is malformed? + usage_offset + .checked_add(sub_offset) + .map_or(false, |combined_offset| { + // NOTE(eddyb) the reason this is only applicable to + // offset `0` is that *in all other cases*, every + // individual `OffsetBase` requires its own type, to + // allow performing offsets *in steps* (even if the + // offsets could easily be constant-folded, they'd + // *have to* be constant-folded *before* analysis, + // to ensure there is no need for the intermediaries). + if combined_offset == 0 { + self.supports_usage_at_offset(0, sub_usage) + } else { + any_component_supports(combined_offset, sub_usage) + } + }) + }) + } + + // Finding an array entirely nested in a component was handled above, + // so here `layout` can only be a matching array (same stride and length). + QPtrMemUsageKind::DynOffsetBase { + element: usage_elem, + stride: usage_stride, + } => { + let usage_fixed_len = usage + .max_size + .map(|size| { + if size % usage_stride.get() != 0 { + // FIXME(eddyb) maybe this should be propagated up, + // as a sign that `usage` is malformed? + return Err(()); + } + NonZeroU32::new(size / usage_stride.get()).ok_or(()) + }) + .transpose(); + + match &self.components { + // Dynamic offsetting into non-arrays is not supported, and it'd + // only make sense for legalization (or small-length arrays where + // selecting elements based on the index may be a practical choice). + Components::Scalar | Components::Fields { .. } => false, + + Components::Elements { + stride: layout_stride, + elem: layout_elem, + fixed_len: layout_fixed_len, + } => { + // HACK(eddyb) extend the max length implied by `usage`, + // such that the array can start at offset `0`. + let ext_usage_offset = usage_offset % usage_stride.get(); + let ext_usage_fixed_len = usage_fixed_len.and_then(|usage_fixed_len| { + usage_fixed_len + .map(|usage_fixed_len| { + NonZeroU32::new( + // FIXME(eddyb) maybe this overflow should be propagated up, + // as a sign that `usage` is malformed? + (usage_offset / usage_stride.get()) + .checked_add(usage_fixed_len.get()) + .ok_or(())?, + ) + .ok_or(()) + }) + .transpose() + }); + + // FIXME(eddyb) this could maybe be allowed if there is still + // some kind of divisibility relation between the strides. + if ext_usage_offset != 0 { + return false; + } + + layout_stride == usage_stride + && Ok(*layout_fixed_len) == ext_usage_fixed_len + && layout_elem.supports_usage_at_offset(0, usage_elem) + } + } + } + } + } +} + +struct FuncInferUsageResults { + param_usages: SmallVec<[Option>; 2]>, + usage_or_err_attrs_to_attach: Vec<(Value, Result)>, +} + +#[derive(Clone)] +enum FuncInferUsageState { + InProgress, + Complete(Rc), +} + +pub struct InferUsage<'a> { + cx: Rc, + layout_cache: LayoutCache<'a>, + + global_var_usages: FxIndexMap>>, + func_states: FxIndexMap, +} + +impl<'a> InferUsage<'a> { + pub fn new(cx: Rc, layout_config: &'a LayoutConfig) -> Self { + Self { + cx: cx.clone(), + layout_cache: LayoutCache::new(cx, layout_config), + + global_var_usages: Default::default(), + func_states: Default::default(), + } + } + + pub fn infer_usage_in_module(mut self, module: &mut Module) { + for (export_key, &exportee) in &module.exports { + if let Exportee::Func(func) = exportee { + self.infer_usage_in_func(module, func); + } + + // Ensure even unused interface variables get their `qptr.usage`. + match export_key { + ExportKey::LinkName(_) => {} + ExportKey::SpvEntryPoint { + imms: _, + interface_global_vars, + } => { + for &gv in interface_global_vars { + self.global_var_usages.entry(gv).or_insert_with(|| { + Some(Ok(match module.global_vars[gv].shape { + Some(shapes::GlobalVarShape::Handles { handle, .. }) => { + QPtrUsage::Handles(match handle { + shapes::Handle::Opaque(ty) => shapes::Handle::Opaque(ty), + shapes::Handle::Buffer(..) => shapes::Handle::Buffer( + AddrSpace::Handles, + QPtrMemUsage::UNUSED, + ), + }) + } + _ => QPtrUsage::Memory(QPtrMemUsage::UNUSED), + })) + }); + } + } + } + } + + // Analysis over, write all attributes back to the module. + for (gv, usage) in self.global_var_usages { + if let Some(usage) = usage { + let global_var_def = &mut module.global_vars[gv]; + match usage { + Ok(usage) => { + // FIXME(eddyb) deduplicate attribute manipulation. + global_var_def.attrs = self.cx.intern(AttrSetDef { + attrs: self.cx[global_var_def.attrs] + .attrs + .iter() + .cloned() + .chain([Attr::QPtr(QPtrAttr::Usage(OrdAssertEq(usage)))]) + .collect(), + }); + } + Err(AnalysisError(e)) => { + global_var_def.attrs.push_diag(&self.cx, e); + } + } + } + } + for (func, state) in self.func_states { + match state { + FuncInferUsageState::InProgress => unreachable!(), + FuncInferUsageState::Complete(func_results) => { + let FuncInferUsageResults { + param_usages, + usage_or_err_attrs_to_attach, + } = Rc::try_unwrap(func_results).ok().unwrap(); + + let func_decl = &mut module.funcs[func]; + for (param_decl, usage) in func_decl.params.iter_mut().zip(param_usages) { + if let Some(usage) = usage { + match usage { + Ok(usage) => { + // FIXME(eddyb) deduplicate attribute manipulation. + param_decl.attrs = self.cx.intern(AttrSetDef { + attrs: self.cx[param_decl.attrs] + .attrs + .iter() + .cloned() + .chain([Attr::QPtr(QPtrAttr::Usage(OrdAssertEq( + usage, + )))]) + .collect(), + }); + } + Err(AnalysisError(e)) => { + param_decl.attrs.push_diag(&self.cx, e); + } + } + } + } + + let func_def_body = match &mut module.funcs[func].def { + DeclDef::Present(func_def_body) => func_def_body, + DeclDef::Imported(_) => continue, + }; + + for (v, usage) in usage_or_err_attrs_to_attach { + let attrs = match v { + Value::Const(_) => unreachable!(), + Value::ControlRegionInput { region, input_idx } => { + &mut func_def_body.at_mut(region).def().inputs[input_idx as usize] + .attrs + } + Value::ControlNodeOutput { + control_node, + output_idx, + } => { + &mut func_def_body.at_mut(control_node).def().outputs + [output_idx as usize] + .attrs + } + Value::DataInstOutput(data_inst) => { + &mut func_def_body.at_mut(data_inst).def().attrs + } + }; + match usage { + Ok(usage) => { + // FIXME(eddyb) deduplicate attribute manipulation. + *attrs = self.cx.intern(AttrSetDef { + attrs: self.cx[*attrs] + .attrs + .iter() + .cloned() + .chain([Attr::QPtr(QPtrAttr::Usage(OrdAssertEq(usage)))]) + .collect(), + }); + } + Err(AnalysisError(e)) => { + attrs.push_diag(&self.cx, e); + } + } + } + } + } + } + } + + // HACK(eddyb) `FuncInferUsageState` also serves to indicate recursion errors. + fn infer_usage_in_func(&mut self, module: &Module, func: Func) -> FuncInferUsageState { + if let Some(cached) = self.func_states.get(&func).cloned() { + return cached; + } + + self.func_states + .insert(func, FuncInferUsageState::InProgress); + + let completed_state = + FuncInferUsageState::Complete(Rc::new(self.infer_usage_in_func_uncached(module, func))); + + self.func_states.insert(func, completed_state.clone()); + completed_state + } + fn infer_usage_in_func_uncached( + &mut self, + module: &Module, + func: Func, + ) -> FuncInferUsageResults { + let cx = self.cx.clone(); + let is_qptr = |ty: Type| matches!(cx[ty].ctor, TypeCtor::QPtr); + + let func_decl = &module.funcs[func]; + let mut param_usages: SmallVec<[_; 2]> = + (0..func_decl.params.len()).map(|_| None).collect(); + let mut usage_or_err_attrs_to_attach = vec![]; + + let func_def_body = match &module.funcs[func].def { + DeclDef::Present(func_def_body) => func_def_body, + DeclDef::Imported(_) => { + for (param, param_usage) in func_decl.params.iter().zip(&mut param_usages) { + if is_qptr(param.ty) { + *param_usage = Some(Err(AnalysisError(Diag::bug([ + "pointer param of imported func".into(), + ])))); + } + } + return FuncInferUsageResults { + param_usages, + usage_or_err_attrs_to_attach, + }; + } + }; + + let mut all_data_insts = CollectAllDataInsts::default(); + func_def_body.inner_visit_with(&mut all_data_insts); + + let mut data_inst_output_usages = FxHashMap::default(); + for insts in all_data_insts.0.into_iter().rev() { + for func_at_inst in func_def_body.at(insts).into_iter().rev() { + let data_inst = func_at_inst.position; + let data_inst_def = func_at_inst.def(); + let output_usage = data_inst_output_usages.remove(&data_inst).flatten(); + + let mut generate_usage = |this: &mut Self, ptr: Value, new_usage| { + let slot = match ptr { + Value::Const(ct) => match cx[ct].ctor { + ConstCtor::PtrToGlobalVar(gv) => { + this.global_var_usages.entry(gv).or_default() + } + // FIXME(eddyb) may be relevant? + _ => unreachable!(), + }, + Value::ControlRegionInput { region, input_idx } + if region == func_def_body.body => + { + &mut param_usages[input_idx as usize] + } + // FIXME(eddyb) implement + Value::ControlRegionInput { .. } | Value::ControlNodeOutput { .. } => { + usage_or_err_attrs_to_attach.push(( + ptr, + Err(AnalysisError(Diag::bug(["unsupported φ".into()]))), + )); + return; + } + Value::DataInstOutput(ptr_inst) => { + data_inst_output_usages.entry(ptr_inst).or_default() + } + }; + *slot = Some(match slot.take() { + Some(old) => old.and_then(|old| { + UsageMerger { + layout_cache: &this.layout_cache, + } + .merge(old, new_usage?) + .into_result() + }), + None => new_usage, + }); + }; + match &data_inst_def.kind { + &DataInstKind::FuncCall(callee) => { + match self.infer_usage_in_func(module, callee) { + FuncInferUsageState::Complete(callee_results) => { + for (&arg, param_usage) in data_inst_def + .inputs + .iter() + .zip(&callee_results.param_usages) + { + if let Some(param_usage) = param_usage { + generate_usage(self, arg, param_usage.clone()); + } + } + } + FuncInferUsageState::InProgress => { + usage_or_err_attrs_to_attach.push(( + Value::DataInstOutput(data_inst), + Err(AnalysisError(Diag::bug([ + "unsupported recursive call".into() + ]))), + )); + } + }; + if data_inst_def.output_type.map_or(false, is_qptr) { + if let Some(usage) = output_usage { + usage_or_err_attrs_to_attach + .push((Value::DataInstOutput(data_inst), usage)); + } + } + } + + DataInstKind::QPtr(QPtrOp::FuncLocalVar(_)) => { + if let Some(usage) = output_usage { + usage_or_err_attrs_to_attach + .push((Value::DataInstOutput(data_inst), usage)); + } + } + DataInstKind::QPtr(QPtrOp::HandleArrayIndex) => { + generate_usage( + self, + data_inst_def.inputs[0], + output_usage + .unwrap_or_else(|| { + Err(AnalysisError(Diag::bug([ + "HandleArrayIndex: unknown element".into(), + ]))) + }) + .and_then(|usage| match usage { + QPtrUsage::Handles(handle) => Ok(QPtrUsage::Handles(handle)), + QPtrUsage::Memory(_) => Err(AnalysisError(Diag::bug([ + "HandleArrayIndex: cannot be used as Memory".into(), + ]))), + }), + ); + } + DataInstKind::QPtr(QPtrOp::BufferData) => { + generate_usage( + self, + data_inst_def.inputs[0], + output_usage + .unwrap_or(Ok(QPtrUsage::Memory(QPtrMemUsage::UNUSED))) + .and_then(|usage| { + let usage = match usage { + QPtrUsage::Handles(_) => { + return Err(AnalysisError(Diag::bug([ + "BufferData: cannot be used as Handles".into(), + ]))); + } + QPtrUsage::Memory(usage) => usage, + }; + Ok(QPtrUsage::Handles(shapes::Handle::Buffer( + AddrSpace::Handles, + usage, + ))) + }), + ); + } + &DataInstKind::QPtr(QPtrOp::BufferDynLen { + fixed_base_size, + dyn_unit_stride, + }) => { + let array_usage = QPtrMemUsage { + max_size: None, + kind: QPtrMemUsageKind::DynOffsetBase { + element: Rc::new(QPtrMemUsage::UNUSED), + stride: dyn_unit_stride, + }, + }; + let buf_data_usage = if fixed_base_size == 0 { + array_usage + } else { + QPtrMemUsage { + max_size: None, + kind: QPtrMemUsageKind::OffsetBase(Rc::new( + [(fixed_base_size, array_usage)].into(), + )), + } + }; + generate_usage( + self, + data_inst_def.inputs[0], + Ok(QPtrUsage::Handles(shapes::Handle::Buffer( + AddrSpace::Handles, + buf_data_usage, + ))), + ); + } + &DataInstKind::QPtr(QPtrOp::Offset(offset)) => { + generate_usage( + self, + data_inst_def.inputs[0], + output_usage + .unwrap_or(Ok(QPtrUsage::Memory(QPtrMemUsage::UNUSED))) + .and_then(|usage| { + let usage = match usage { + QPtrUsage::Handles(_) => { + return Err(AnalysisError(Diag::bug([format!( + "Offset({offset}): cannot offset Handles" + ).into()]))); + } + QPtrUsage::Memory(usage) => usage, + }; + let offset = u32::try_from(offset).ok().ok_or_else(|| { + AnalysisError(Diag::bug([format!("Offset({offset}): negative offset").into()])) + })?; + + // FIXME(eddyb) these should be normalized + // (e.g. constant-folded) out of existence, + // but while they exist, they should be noops. + if offset == 0 { + return Ok(QPtrUsage::Memory(usage)); + } + + Ok(QPtrUsage::Memory(QPtrMemUsage { + max_size: usage + .max_size + .map(|max_size| offset.checked_add(max_size).ok_or_else(|| { + AnalysisError(Diag::bug([format!("Offset({offset}): size overflow ({offset}+{max_size})").into()])) + })).transpose()?, + // FIXME(eddyb) allocating `Rc>` + // to represent the one-element case, seems + // quite wasteful when it's likely consumed. + kind: QPtrMemUsageKind::OffsetBase(Rc::new( + [(offset, usage)].into(), + )), + })) + }), + ); + } + DataInstKind::QPtr(QPtrOp::DynOffset { + stride, + index_bounds, + }) => { + generate_usage( + self, + data_inst_def.inputs[0], + output_usage + .unwrap_or(Ok(QPtrUsage::Memory(QPtrMemUsage::UNUSED))) + .and_then(|usage| { + let usage = match usage { + QPtrUsage::Handles(_) => { + return Err(AnalysisError(Diag::bug(["DynOffset: cannot offset Handles".into()]))); + } + QPtrUsage::Memory(usage) => usage, + }; + match usage.max_size { + None => { + return Err(AnalysisError(Diag::bug(["DynOffset: unsized element".into()]))); + } + // FIXME(eddyb) support this by "folding" + // the usage onto itself (i.e. applying + // `%= stride` on all offsets inside). + Some(max_size) if max_size > stride.get() => { + return Err(AnalysisError(Diag::bug(["DynOffset: element max_size exceeds stride".into()]))); + } + Some(_) => {} + } + Ok(QPtrUsage::Memory(QPtrMemUsage { + // FIXME(eddyb) does the `None` case allow + // for negative offsets? + max_size: index_bounds + .as_ref() + .map(|index_bounds| { + if index_bounds.start < 0 || index_bounds.end < 0 { + return Err(AnalysisError( + Diag::bug([ + "DynOffset: potentially negative offset" + .into(), + ]) + )); + } + let index_bounds_end = u32::try_from(index_bounds.end).unwrap(); + index_bounds_end.checked_mul(stride.get()).ok_or_else(|| { + AnalysisError(Diag::bug([ + format!("DynOffset: size overflow ({index_bounds_end}*{stride})").into(), + ])) + }) + }) + .transpose()?, + kind: QPtrMemUsageKind::DynOffsetBase { + element: Rc::new(usage), + stride: *stride, + }, + })) + }), + ); + } + DataInstKind::QPtr(op @ (QPtrOp::Load | QPtrOp::Store)) => { + let (op_name, access_type) = match op { + QPtrOp::Load => ("Load", data_inst_def.output_type.unwrap()), + QPtrOp::Store => ( + "Store", + func_at_inst.at(data_inst_def.inputs[1]).type_of(&cx), + ), + _ => unreachable!(), + }; + generate_usage( + self, + data_inst_def.inputs[0], + self.layout_cache + .layout_of(access_type) + .map_err(|LayoutError(e)| AnalysisError(e)) + .and_then(|layout| match layout { + TypeLayout::Handle(shapes::Handle::Opaque(ty)) => { + Ok(QPtrUsage::Handles(shapes::Handle::Opaque(ty))) + } + TypeLayout::Handle(shapes::Handle::Buffer(..)) => { + Err(AnalysisError(Diag::bug([format!( + "{op_name}: cannot access whole Buffer" + ) + .into()]))) + } + TypeLayout::HandleArray(..) => { + Err(AnalysisError(Diag::bug([format!( + "{op_name}: cannot access whole HandleArray" + ) + .into()]))) + } + TypeLayout::Concrete(concrete) + if concrete.mem_layout.dyn_unit_stride.is_some() => + { + Err(AnalysisError(Diag::bug([format!( + "{op_name}: cannot access unsized type" + ) + .into()]))) + } + TypeLayout::Concrete(concrete) => { + Ok(QPtrUsage::Memory(QPtrMemUsage { + max_size: Some(concrete.mem_layout.fixed_base.size), + kind: QPtrMemUsageKind::DirectAccess(access_type), + })) + } + }), + ); + } + + DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => { + let mut has_from_spv_ptr_output_attr = false; + for attr in &cx[data_inst_def.attrs].attrs { + match *attr { + Attr::QPtr(QPtrAttr::ToSpvPtrInput { input_idx, pointee }) => { + let ty = pointee.0; + generate_usage( + self, + data_inst_def.inputs[input_idx as usize], + self.layout_cache + .layout_of(ty) + .map_err(|LayoutError(e)| AnalysisError(e)) + .and_then(|layout| match layout { + TypeLayout::Handle(handle) => { + let handle = match handle { + shapes::Handle::Opaque(ty) => { + shapes::Handle::Opaque(ty) + } + // NOTE(eddyb) this error is important, + // as the `Block` annotation on the + // buffer type means the type is *not* + // usable anywhere inside buffer data, + // since it would conflict with our + // own `Block`-annotated wrapper. + shapes::Handle::Buffer(..) => { + return Err(AnalysisError(Diag::bug(["ToSpvPtrInput: whole Buffer ambiguous (handle vs buffer data)".into()]) + )); + } + }; + Ok(QPtrUsage::Handles(handle)) + } + // NOTE(eddyb) because we can't represent + // the original type, in the same way we + // use `QPtrMemUsageKind::StrictlyTyped` + // for non-handles, we can't guarantee + // a generated type that matches the + // desired `pointee` type. + TypeLayout::HandleArray(..) => { + Err(AnalysisError(Diag::bug(["ToSpvPtrInput: whole handle array unrepresentable".into()]) + )) + } + TypeLayout::Concrete(concrete) => { + Ok(QPtrUsage::Memory(QPtrMemUsage { + max_size: if concrete + .mem_layout + .dyn_unit_stride + .is_some() + { + None + } else { + Some( + concrete.mem_layout.fixed_base.size, + ) + }, + kind: QPtrMemUsageKind::StrictlyTyped(ty), + })) + } + }), + ); + } + Attr::QPtr(QPtrAttr::FromSpvPtrOutput { + addr_space: _, + pointee: _, + }) => { + has_from_spv_ptr_output_attr = true; + } + _ => {} + } + } + + if has_from_spv_ptr_output_attr { + // FIXME(eddyb) merge with `FromSpvPtrOutput`'s `pointee`. + if let Some(usage) = output_usage { + usage_or_err_attrs_to_attach + .push((Value::DataInstOutput(data_inst), usage)); + } + } + } + } + } + } + + FuncInferUsageResults { + param_usages, + usage_or_err_attrs_to_attach, + } + } +} + +// HACK(eddyb) this is easier than implementing a proper reverse traversal. +#[derive(Default)] +struct CollectAllDataInsts(Vec>); + +impl Visitor<'_> for CollectAllDataInsts { + // FIXME(eddyb) this is excessive, maybe different kinds of + // visitors should exist for module-level and func-level? + fn visit_attr_set_use(&mut self, _: AttrSet) {} + fn visit_type_use(&mut self, _: Type) {} + fn visit_const_use(&mut self, _: Const) {} + fn visit_global_var_use(&mut self, _: GlobalVar) {} + fn visit_func_use(&mut self, _: Func) {} + + fn visit_control_node_def(&mut self, func_at_control_node: FuncAt<'_, ControlNode>) { + if let ControlNodeKind::Block { insts } = func_at_control_node.def().kind { + self.0.push(insts); + } + func_at_control_node.inner_visit_with(self); + } +} diff --git a/src/qptr/layout.rs b/src/qptr/layout.rs new file mode 100644 index 0000000..3b447c7 --- /dev/null +++ b/src/qptr/layout.rs @@ -0,0 +1,690 @@ +// FIXME(eddyb) layouts are a bit tricky: this recomputes them from several passes. + +use crate::qptr::shapes; +use crate::{ + spv, AddrSpace, Attr, Const, ConstCtor, Context, Diag, FxIndexMap, Type, TypeCtor, TypeCtorArg, +}; +use itertools::Either; +use smallvec::SmallVec; +use std::cell::RefCell; +use std::cmp::Ordering; +use std::num::NonZeroU32; +use std::ops::Range; +use std::rc::Rc; + +/// Various toggles for layout-related behavior that is not unambiguous from the +/// SPIR-V alone, or involves intermediary illegal SPIR-V (during legalization). +// +// FIXME(eddyb) use proper newtypes (and log2 for align!). +pub struct LayoutConfig { + pub ignore_legacy_align: bool, + pub min_aggregate_legacy_align: u32, + + /// Assumed size and alignment for `OpTypeBool`, even if unusable + /// with externally-visible concrete memory (i.e. buffers). + /// + /// This is only useful for accurate handling of illegal SPIR-V relying on + /// e.g. pointer casts, and as such defaults to `(1, 1)`, to merely ensure + /// unique offsets and guarantee `qptr::lift` can tell fields apart. + // + // FIXME(eddyb) might be nice to default to an "offsets/sizes are abstract" + // mode, which disallows reinterpretation on the basis that the precise + // offsets/sizes may not match between types (but that's its own nightmare). + pub abstract_bool_size_align: (u32, u32), + + /// Assumed size and alignment for logical `OpTypePointer`s, even if unusable + /// with externally-visible concrete memory (i.e. buffers). + /// + /// This is only useful for accurate handling of illegal SPIR-V relying on + /// e.g. pointer casts, and as such defaults to `(1, 1)`, to merely ensure + /// unique offsets and guarantee `qptr::lift` can tell fields apart. + // + // FIXME(eddyb) might be nice to default to an "offsets/sizes are abstract" + // mode, which disallows reinterpretation on the basis that the precise + // offsets/sizes may not match between types (but that's its own nightmare). + pub logical_ptr_size_align: (u32, u32), +} + +impl LayoutConfig { + pub const VULKAN_SCALAR_LAYOUT: Self = Self { + ignore_legacy_align: true, + min_aggregate_legacy_align: 1, + + abstract_bool_size_align: (1, 1), + logical_ptr_size_align: (1, 1), + }; + pub const VULKAN_STANDARD_LAYOUT: Self = Self { + ignore_legacy_align: false, + ..Self::VULKAN_SCALAR_LAYOUT + }; + // FIXME(eddyb) is this even useful? (all the storage classes that have any + // kind of alignment requirements, require explicit offsets) + pub const VULKAN_EXTENDED_ALIGN_UBO_LAYOUT: Self = Self { + min_aggregate_legacy_align: 16, + ..Self::VULKAN_STANDARD_LAYOUT + }; +} + +pub(super) struct LayoutError(pub(super) Diag); + +#[derive(Clone)] +pub(super) enum TypeLayout { + Handle(HandleLayout), + HandleArray(HandleLayout, Option), + + // FIXME(eddyb) unify terminology around "concrete"/"memory"/"untyped (data)". + Concrete(Rc), +} + +// NOTE(eddyb) `Handle` is parameterized over the `Buffer` layout. +pub(super) type HandleLayout = shapes::Handle>; + +pub(super) struct MemTypeLayout { + pub(super) original_type: Type, + pub(super) mem_layout: shapes::MaybeDynMemLayout, + pub(super) components: Components, +} + +// FIXME(eddyb) use proper newtypes for byte sizes. +pub(super) enum Components { + Scalar, + + /// Vector and array elements (all of them having the same `elem` layout). + Elements { + stride: NonZeroU32, + elem: Rc, + fixed_len: Option, + }, + + Fields { + // FIXME(eddyb) should these be fused? (but `u32` is smaller than `Rc`) + offsets: SmallVec<[u32; 4]>, + layouts: SmallVec<[Rc; 4]>, + }, +} + +impl Components { + /// Return all components (by index), which completely contain `offset_range`. + /// + /// If `offset_range` is zero-sized (`offset_range.start == offset_range.end`), + /// this can return multiple components, with at most one ever being non-ZST. + // + // FIXME(eddyb) be more aggressive in pruning ZSTs so this can be simpler. + pub(super) fn find_components_containing( + &self, + // FIXME(eddyb) consider renaming such offset ranges to "extent". + offset_range: Range, + ) -> impl Iterator + '_ { + match self { + Components::Scalar => Either::Left(None.into_iter()), + Components::Elements { + stride, + elem, + fixed_len, + } => { + Either::Left( + Some(offset_range.start / stride.get()) + .and_then(|elem_idx| { + let elem_idx_vs_len = fixed_len + .map_or(Ordering::Less, |fixed_len| elem_idx.cmp(&fixed_len.get())); + let elem_size = match elem_idx_vs_len { + Ordering::Less => elem.mem_layout.fixed_base.size, + + // HACK(eddyb) this allows one-past-the-end pointers. + Ordering::Equal => 0, + + Ordering::Greater => return None, + }; + let elem_start = elem_idx * stride.get(); + Some((elem_idx, elem_start..elem_start.checked_add(elem_size)?)) + }) + .filter(|(_, elem_range)| offset_range.end <= elem_range.end) + .and_then(|(elem_idx, _)| usize::try_from(elem_idx).ok()) + .into_iter(), + ) + } + // FIXME(eddyb) this is inefficient, we should be doing binary search + // on offsets if they're ordered (with an optional `BTreeMap`?) + // - ideally this needs an abstraction tho, some kind of "binary-searchable array"? + Components::Fields { offsets, layouts } => Either::Right( + offsets + .iter() + .zip(layouts) + .map(|(&field_offset, field)| { + // HACK(eddyb) really need a maybe-open-ended range type. + if field.mem_layout.dyn_unit_stride.is_some() { + Err(field_offset..) + } else { + Ok(field_offset + ..field_offset + .checked_add(field.mem_layout.fixed_base.size) + .unwrap()) + } + }) + .enumerate() + .filter(move |(_, field_range)| match field_range { + Ok(field_range) => { + field_range.start <= offset_range.start + && offset_range.end <= field_range.end + } + Err(field_range) => field_range.start <= offset_range.start, + }) + .map(|(field_idx, _)| field_idx), + ), + } + } +} + +/// Context for computing `TypeLayout`s from `Type`s (with caching). +pub(super) struct LayoutCache<'a> { + cx: Rc, + wk: &'static spv::spec::WellKnown, + + config: &'a LayoutConfig, + + cache: RefCell>, +} + +impl<'a> LayoutCache<'a> { + pub(super) fn new(cx: Rc, config: &'a LayoutConfig) -> Self { + Self { + cx, + wk: &spv::spec::Spec::get().well_known, + + config, + + cache: Default::default(), + } + } + + // FIXME(eddyb) properly distinguish between zero-extension and sign-extension. + fn const_as_u32(&self, ct: Const) -> Option { + match &self.cx[ct].ctor { + ConstCtor::SpvInst(spv_inst) + if spv_inst.opcode == self.wk.OpConstant && spv_inst.imms.len() == 1 => + { + match spv_inst.imms[..] { + [spv::Imm::Short(_, x)] => Some(x), + _ => unreachable!(), + } + } + _ => None, + } + } + + /// Attempt to compute a `TypeLayout` for a given (SPIR-V) `Type`. + pub(super) fn layout_of(&self, ty: Type) -> Result { + if let Some(cached) = self.cache.borrow().get(&ty).cloned() { + return Ok(cached); + } + + let cx = &self.cx; + let wk = self.wk; + + let ty_def = &cx[ty]; + let spv_inst = match &ty_def.ctor { + // FIXME(eddyb) treat `QPtr`s as scalars. + TypeCtor::QPtr => { + return Err(LayoutError(Diag::bug([ + "`layout_of(qptr)` (already lowered?)".into(), + ]))); + } + TypeCtor::SpvInst(spv_inst) => spv_inst, + TypeCtor::SpvStringLiteralForExtInst => { + return Err(LayoutError(Diag::bug([ + "`layout_of(type_of(OpString<\"...\">))`".into(), + ]))); + } + }; + + let scalar_with_size_and_align = |(size, align)| { + TypeLayout::Concrete(Rc::new(MemTypeLayout { + original_type: ty, + mem_layout: shapes::MaybeDynMemLayout { + fixed_base: shapes::MemLayout { + align, + legacy_align: align, + size, + }, + dyn_unit_stride: None, + }, + components: Components::Scalar, + })) + }; + let scalar = |width: u32| { + assert!(width.is_power_of_two()); + let size = width / 8; + assert_eq!(size * 8, width); + scalar_with_size_and_align((size, size)) + }; + let align_to = |size: u32, align: u32| { + assert!(align.is_power_of_two() && align > 0); + Ok(size.checked_add(align - 1).ok_or_else(|| { + LayoutError(Diag::bug([format!( + "`align_to({size}, {align})` overflowed `u32`" + ) + .into()])) + })? & !(align - 1)) + }; + // HACK(eddyb) named arguments for the `array` closure. + struct ArrayParams { + fixed_len: Option, + known_stride: Option, + min_legacy_align: u32, + legacy_align_multiplier: u32, + } + let array = |elem_type: Type, + ArrayParams { + fixed_len, + known_stride, + min_legacy_align, + legacy_align_multiplier, + }| { + let fixed_len = fixed_len + .map(|x| { + NonZeroU32::new(x).ok_or_else(|| { + LayoutError(Diag::err(["SPIR-V disallows arrays of `0` length".into()])) + }) + }) + .transpose()?; + match self.layout_of(elem_type)? { + TypeLayout::Handle(handle) => Ok(TypeLayout::HandleArray(handle, fixed_len)), + TypeLayout::HandleArray(..) => Err(LayoutError(Diag::err([ + "handle array `".into(), + elem_type.into(), + "` cannot be further wrapped in an array".into(), + ]))), + TypeLayout::Concrete(elem) => { + if elem.mem_layout.dyn_unit_stride.is_some() { + return Err(LayoutError(Diag::err([ + "dynamically sized type `".into(), + elem_type.into(), + "` cannot be further wrapped in an array".into(), + ]))); + } + let stride = match known_stride { + Some(stride) => stride, + None => { + let shapes::MemLayout { + align, + legacy_align, + size, + } = elem.mem_layout.fixed_base; + let (stride, legacy_stride) = + (align_to(size, align)?, align_to(size, legacy_align)?); + + // FIXME(eddyb) this whole ambiguity mechanism is strange and + // maybe unnecessary? (all the storage classes that have any + // kind of alignment requirements, require explicit offsets) + if !self.config.ignore_legacy_align && stride != legacy_stride { + return Err(LayoutError(Diag::bug([format!( + "ambiguous stride: \ + {stride} (scalar) vs {legacy_stride} (legacy), \ + due to alignment differences \ + ({align} scalar vs {legacy_align} legacy)", + ) + .into()]))); + } + stride + } + }; + let stride = NonZeroU32::new(stride).ok_or_else(|| { + LayoutError(Diag::err(["SPIR-V disallows arrays of `0` stride".into()])) + })?; + Ok(TypeLayout::Concrete(Rc::new(MemTypeLayout { + original_type: ty, + mem_layout: shapes::MaybeDynMemLayout { + fixed_base: shapes::MemLayout { + align: elem.mem_layout.fixed_base.align, + legacy_align: elem + .mem_layout + .fixed_base + .legacy_align + .checked_mul(legacy_align_multiplier) + .unwrap() + .max(min_legacy_align), + size: fixed_len + .map(|len| { + stride.checked_mul(len).ok_or_else(|| { + LayoutError(Diag::bug([format!( + "`{stride} * {len}` overflowed `u32`" + ) + .into()])) + }) + }) + .transpose()? + .map_or(0, |size| size.get()), + }, + dyn_unit_stride: if fixed_len.is_none() { + Some(stride) + } else { + None + }, + }, + components: Components::Elements { + stride, + elem, + fixed_len, + }, + }))) + } + } + }; + let short_imm_at = |i| match spv_inst.imms[i] { + spv::Imm::Short(_, x) => x, + _ => unreachable!(), + }; + + // FIXME(eddyb) !!! what if... types had a min/max size and then... + // that would allow surrounding offsets to limit their size... but... ugh... + // ugh this doesn't make any sense. maybe if the front-end specifies + // offsets with "abstract types", it must configure `qptr::layout`? + let layout = if spv_inst.opcode == wk.OpTypeBool { + // FIXME(eddyb) make this properly abstract instead of only configurable. + scalar_with_size_and_align(self.config.abstract_bool_size_align) + } else if spv_inst.opcode == wk.OpTypePointer { + // FIXME(eddyb) make this properly abstract instead of only configurable. + // FIXME(eddyb) categorize `OpTypePointer` by storage class and split on + // logical vs physical here. + scalar_with_size_and_align(self.config.logical_ptr_size_align) + } else if [wk.OpTypeInt, wk.OpTypeFloat].contains(&spv_inst.opcode) { + scalar(short_imm_at(0)) + } else if [wk.OpTypeVector, wk.OpTypeMatrix].contains(&spv_inst.opcode) { + let len = short_imm_at(0); + let (min_legacy_align, legacy_align_multiplier) = if spv_inst.opcode == wk.OpTypeVector + { + // NOTE(eddyb) this is specifically Vulkan "base alignment". + (1, if len <= 2 { 2 } else { 4 }) + } else { + (self.config.min_aggregate_legacy_align, 1) + }; + // NOTE(eddyb) `RowMajor` is disallowed on `OpTypeStruct` members below. + array( + match ty_def.ctor_args[..] { + [TypeCtorArg::Type(elem_type)] => elem_type, + _ => unreachable!(), + }, + ArrayParams { + fixed_len: Some(len), + known_stride: None, + min_legacy_align, + legacy_align_multiplier, + }, + )? + } else if [wk.OpTypeArray, wk.OpTypeRuntimeArray].contains(&spv_inst.opcode) { + let len = ty_def + .ctor_args + .get(1) + .map(|&len| { + let len = match len { + TypeCtorArg::Const(len) => len, + TypeCtorArg::Type(_) => unreachable!(), + }; + self.const_as_u32(len).ok_or_else(|| { + LayoutError(Diag::bug([ + "specialization constants not supported yet".into() + ])) + }) + }) + .transpose()?; + let mut stride_decoration = None; + for attr in &cx[ty_def.attrs].attrs { + match attr { + Attr::SpvAnnotation(attr_spv_inst) + if attr_spv_inst.opcode == wk.OpDecorate + && attr_spv_inst.imms[0] + == spv::Imm::Short(wk.Decoration, wk.ArrayStride) => + { + stride_decoration = Some(match attr_spv_inst.imms[1] { + spv::Imm::Short(_, x) => x, + _ => unreachable!(), + }); + break; + } + _ => {} + } + } + array( + match ty_def.ctor_args[0] { + TypeCtorArg::Type(elem_type) => elem_type, + TypeCtorArg::Const(_) => unreachable!(), + }, + ArrayParams { + fixed_len: len, + known_stride: stride_decoration, + min_legacy_align: self.config.min_aggregate_legacy_align, + legacy_align_multiplier: 1, + }, + )? + } else if spv_inst.opcode == wk.OpTypeStruct { + let field_layouts: SmallVec<[_; 4]> = ty_def + .ctor_args + .iter() + .map(|&arg| match arg { + TypeCtorArg::Type(field_type) => field_type, + TypeCtorArg::Const(_) => unreachable!(), + }) + .map(|field_type| match self.layout_of(field_type)? { + TypeLayout::Handle(_) | TypeLayout::HandleArray(..) => { + Err(LayoutError(Diag::bug([ + "handles cannot be placed in a struct field".into(), + ]))) + } + TypeLayout::Concrete(field_layout) => Ok(field_layout), + }) + .collect::>()?; + + let mut field_offsets: SmallVec<[_; 4]> = SmallVec::with_capacity(field_layouts.len()); + for attr in &cx[ty_def.attrs].attrs { + match attr { + Attr::SpvAnnotation(attr_spv_inst) + if attr_spv_inst.opcode == wk.OpMemberDecorate + && attr_spv_inst.imms[1] + == spv::Imm::Short(wk.Decoration, wk.RowMajor) => + { + return Err(LayoutError(Diag::bug([ + "`RowMajor` matrix types unsupported".into(), + ]))); + } + Attr::SpvAnnotation(attr_spv_inst) + if attr_spv_inst.opcode == wk.OpMemberDecorate + && attr_spv_inst.imms[1] + == spv::Imm::Short(wk.Decoration, wk.Offset) => + { + let (field_idx, field_offset) = match attr_spv_inst.imms[..] { + [spv::Imm::Short(_, idx), _, spv::Imm::Short(_, offset)] => { + (idx, offset) + } + _ => unreachable!(), + }; + let field_idx = usize::try_from(field_idx).unwrap(); + match field_idx.cmp(&field_offsets.len()) { + Ordering::Less => { + return Err(LayoutError(Diag::bug([ + "a struct field cannot have more than one explicit offset" + .into(), + ]))); + } + Ordering::Greater => { + return Err(LayoutError(Diag::bug([ + "structs with explicit offsets must provide them for all fields" + .into(), + ]))); + } + Ordering::Equal => { + field_offsets.push(field_offset); + } + } + } + _ => {} + } + } + let mut mem_layout = shapes::MaybeDynMemLayout { + fixed_base: shapes::MemLayout { + align: 1, + legacy_align: self.config.min_aggregate_legacy_align, + size: 0, + }, + dyn_unit_stride: None, + }; + if !field_offsets.is_empty() { + if field_offsets.len() != field_layouts.len() { + return Err(LayoutError(Diag::bug([ + "structs with explicit offsets must provide them for all fields".into(), + ]))); + } + + // HACK(eddyb) this treats the struct more like an union, but + // it shold nevertheless work (the other approach would be to + // search for the "last field (in offset order)", and/or iterate + // all fields in offset order, to validate the lack of overlap), + // and also "last field (in offset order)" approaches would still + // have to look at all the fields in order to compute alignment. + for (&field_offset, field_layout) in field_offsets.iter().zip(&field_layouts) { + let field = field_layout.mem_layout; + + mem_layout.fixed_base.align = + mem_layout.fixed_base.align.max(field.fixed_base.align); + mem_layout.fixed_base.legacy_align = mem_layout + .fixed_base + .legacy_align + .max(field.fixed_base.legacy_align); + mem_layout.fixed_base.size = mem_layout.fixed_base.size.max( + field_offset + .checked_add(field.fixed_base.size) + .ok_or_else(|| { + LayoutError(Diag::bug([format!( + "`{} + {}` overflowed `u32`", + field_offset, field.fixed_base.size + ) + .into()])) + })?, + ); + + // FIXME(eddyb) validate sized-vs-unsized fields, too. + if let Some(field_dyn_unit_stride) = field.dyn_unit_stride { + if mem_layout.dyn_unit_stride.is_some() { + return Err(LayoutError(Diag::bug([ + "only one field of a struct can have a dynamically sized type" + .into(), + ]))); + } + mem_layout.dyn_unit_stride = Some(field_dyn_unit_stride); + } + } + } else { + for field_layout in &field_layouts { + if mem_layout.dyn_unit_stride.is_some() { + return Err(LayoutError(Diag::bug([ + "only the last field of a struct can have a dynamically sized type" + .into(), + ]))); + } + + let field = field_layout.mem_layout; + + let (offset, legacy_offset) = ( + align_to(mem_layout.fixed_base.size, field.fixed_base.align)?, + align_to(mem_layout.fixed_base.size, field.fixed_base.legacy_align)?, + ); + // FIXME(eddyb) this whole ambiguity mechanism is strange and + // maybe unnecessary? (all the storage classes that have any + // kind of alignment requirements, require explicit offsets) + if !self.config.ignore_legacy_align && offset != legacy_offset { + return Err(LayoutError(Diag::bug([format!( + "ambiguous offset: {offset} (scalar) vs {legacy_offset} (legacy), \ + due to alignment differences ({} scalar vs {} legacy)", + field.fixed_base.align, field.fixed_base.legacy_align + ) + .into()]))); + } + + field_offsets.push(offset); + + mem_layout.fixed_base.align = + mem_layout.fixed_base.align.max(field.fixed_base.align); + mem_layout.fixed_base.legacy_align = mem_layout + .fixed_base + .legacy_align + .max(field.fixed_base.legacy_align); + mem_layout.fixed_base.size = + offset.checked_add(field.fixed_base.size).ok_or_else(|| { + LayoutError(Diag::bug([format!( + "`{} + {}` overflowed `u32`", + offset, field.fixed_base.size + ) + .into()])) + })?; + + assert!(mem_layout.dyn_unit_stride.is_none()); + mem_layout.dyn_unit_stride = field.dyn_unit_stride; + } + } + // FIXME(eddyb) how should the fixed base be aligned in unsized structs? + if mem_layout.dyn_unit_stride.is_none() { + mem_layout.fixed_base.size = + align_to(mem_layout.fixed_base.size, mem_layout.fixed_base.align)?; + } + + let concrete = Rc::new(MemTypeLayout { + original_type: ty, + mem_layout, + components: Components::Fields { + offsets: field_offsets, + layouts: field_layouts, + }, + }); + let mut is_interface_block = false; + for attr in &cx[ty_def.attrs].attrs { + match attr { + Attr::SpvAnnotation(attr_spv_inst) + if attr_spv_inst.opcode == wk.OpDecorate + && attr_spv_inst.imms[0] + == spv::Imm::Short(wk.Decoration, wk.Block) => + { + is_interface_block = true; + break; + } + _ => {} + } + } + // FIXME(eddyb) not all "interface blocks" imply buffers, so this may + // need to be ignored based on the SPIR-V storage class of a `GlobalVar`. + // + // FIXME(eddyb) but the lowering of operations on pointers depend on + // whether the pointer is to a buffer or a data type - without the + // way Rust-GPU uses `Generic`, it should at least be possible to + // determine from the pointer type itself, at the op lowering time, + // but with storage class inference this isn't knowable... + // + // OTOH, Rust-GPU doesn't really use `Block` outside of buffers, so + // it's plausible there could be `qptr` customization options which + // Rust-GPU uses to unambiguously communicate its (mis)use of SPIR-V + // (long-term it should probably have different Rust types per + // storage class, or even represent buffers as Rust pointers?) + if is_interface_block { + // HACK(eddyb) we need an `AddrSpace` but it's not known yet. + TypeLayout::Handle(shapes::Handle::Buffer(AddrSpace::Handles, concrete)) + } else { + TypeLayout::Concrete(concrete) + } + } else if [ + wk.OpTypeImage, + wk.OpTypeSampler, + wk.OpTypeSampledImage, + wk.OpTypeAccelerationStructureKHR, + ] + .contains(&spv_inst.opcode) + { + TypeLayout::Handle(shapes::Handle::Opaque(ty)) + } else { + return Err(LayoutError(Diag::bug([format!( + "unknown/unsupported SPIR-V type `{}`", + spv_inst.opcode.name() + ) + .into()]))); + }; + self.cache.borrow_mut().insert(ty, layout.clone()); + Ok(layout) + } +} diff --git a/src/qptr/lift.rs b/src/qptr/lift.rs new file mode 100644 index 0000000..bd172d9 --- /dev/null +++ b/src/qptr/lift.rs @@ -0,0 +1,1222 @@ +//! [`QPtr`](crate::TypeCtor::QPtr) lifting (e.g. to SPIR-V). + +// HACK(eddyb) sharing layout code with other modules. +use super::layout::*; + +use crate::func_at::FuncAtMut; +use crate::qptr::{shapes, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; +use crate::transform::{InnerInPlaceTransform, InnerTransform, Transformed, Transformer}; +use crate::{ + spv, AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstCtor, ConstDef, Context, ControlNode, + ControlNodeKind, DataInst, DataInstDef, DataInstKind, DeclDef, Diag, DiagLevel, EntityDefs, + EntityOrientedDenseMap, Func, FuncDecl, FxIndexMap, GlobalVar, GlobalVarDecl, Module, Type, + TypeCtor, TypeCtorArg, TypeDef, Value, +}; +use smallvec::SmallVec; +use std::cell::Cell; +use std::mem; +use std::num::NonZeroU32; +use std::rc::Rc; + +struct LiftError(Diag); + +/// Context for lifting `QPtr`s to SPIR-V `OpTypePointer`s. +/// +/// See also `passes::qptr::lift_to_spv_ptrs` (which drives this). +pub struct LiftToSpvPtrs<'a> { + cx: Rc, + wk: &'static spv::spec::WellKnown, + layout_cache: LayoutCache<'a>, + + cached_u32_type: Cell>, +} + +impl<'a> LiftToSpvPtrs<'a> { + pub fn new(cx: Rc, layout_config: &'a LayoutConfig) -> Self { + Self { + cx: cx.clone(), + wk: &spv::spec::Spec::get().well_known, + layout_cache: LayoutCache::new(cx, layout_config), + cached_u32_type: Default::default(), + } + } + + pub fn lift_global_var(&self, global_var_decl: &mut GlobalVarDecl) { + match self.spv_ptr_type_and_addr_space_for_global_var(global_var_decl) { + Ok((spv_ptr_type, addr_space)) => { + global_var_decl.attrs = self.strip_qptr_usage_attr(global_var_decl.attrs); + global_var_decl.type_of_ptr_to = spv_ptr_type; + global_var_decl.addr_space = addr_space; + global_var_decl.shape = None; + } + Err(LiftError(e)) => { + global_var_decl.attrs.push_diag(&self.cx, e); + } + } + // FIXME(eddyb) if globals have initializers pointing at other globals, + // here is where they might get fixed up, but that usage is illegal so + // likely needs to get legalized on `qptr`s, before here. + } + + pub fn lift_all_funcs(&self, module: &mut Module, funcs: impl IntoIterator) { + for func in funcs { + LiftToSpvPtrInstsInFunc { + lifter: self, + global_vars: &module.global_vars, + + deferred_ptr_noops: Default::default(), + data_inst_use_counts: Default::default(), + + func_has_qptr_analysis_bug_diags: false, + } + .in_place_transform_func_decl(&mut module.funcs[func]); + } + } + + fn find_qptr_usage_attr(&self, attrs: AttrSet) -> Result<&QPtrUsage, LiftError> { + self.cx[attrs] + .attrs + .iter() + .find_map(|attr| match attr { + Attr::QPtr(QPtrAttr::Usage(usage)) => Some(&usage.0), + _ => None, + }) + .ok_or_else(|| LiftError(Diag::bug(["missing `qptr.usage` attribute".into()]))) + } + + fn strip_qptr_usage_attr(&self, attrs: AttrSet) -> AttrSet { + self.cx.intern(AttrSetDef { + attrs: self.cx[attrs] + .attrs + .iter() + .filter(|attr| !matches!(attr, Attr::QPtr(QPtrAttr::Usage(_)))) + .cloned() + .collect(), + }) + } + + fn spv_ptr_type_and_addr_space_for_global_var( + &self, + global_var_decl: &GlobalVarDecl, + ) -> Result<(Type, AddrSpace), LiftError> { + let wk = self.wk; + + let qptr_usage = self.find_qptr_usage_attr(global_var_decl.attrs)?; + + let shape = global_var_decl + .shape + .ok_or_else(|| LiftError(Diag::bug(["missing shape".into()])))?; + let (storage_class, pointee_type) = match (global_var_decl.addr_space, shape) { + ( + AddrSpace::Handles, + shapes::GlobalVarShape::Handles { + handle, + fixed_count, + }, + ) => { + let (storage_class, handle_type) = match handle { + shapes::Handle::Opaque(ty) => { + if self.pointee_type_for_usage(qptr_usage)? != ty { + return Err(LiftError(Diag::bug([ + "mismatched opaque handle types in `qptr.usage` vs `shape`".into(), + ]))); + } + (wk.UniformConstant, ty) + } + // FIXME(eddyb) validate usage against `buf` and/or expand + // the type to make sure it has the right size. + shapes::Handle::Buffer(AddrSpace::SpvStorageClass(storage_class), _buf) => { + (storage_class, self.pointee_type_for_usage(qptr_usage)?) + } + shapes::Handle::Buffer(AddrSpace::Handles, _) => { + return Err(LiftError(Diag::bug([ + "invalid `AddrSpace::Handles` in `Handle::Buffer`".into(), + ]))); + } + }; + ( + storage_class, + if fixed_count == Some(NonZeroU32::new(1).unwrap()) { + handle_type + } else { + self.spv_op_type_array(handle_type, fixed_count.map(|c| c.get()), None)? + }, + ) + } + // FIXME(eddyb) validate usage against `layout` and/or expand + // the type to make sure it has the right size. + ( + AddrSpace::SpvStorageClass(storage_class), + shapes::GlobalVarShape::UntypedData(_layout), + ) => (storage_class, self.pointee_type_for_usage(qptr_usage)?), + ( + AddrSpace::SpvStorageClass(storage_class), + shapes::GlobalVarShape::TypedInterface(ty), + ) => (storage_class, ty), + + ( + AddrSpace::Handles, + shapes::GlobalVarShape::UntypedData(_) | shapes::GlobalVarShape::TypedInterface(_), + ) + | (AddrSpace::SpvStorageClass(_), shapes::GlobalVarShape::Handles { .. }) => { + return Err(LiftError(Diag::bug([ + "mismatched `addr_space` and `shape`".into(), + ]))); + } + }; + let addr_space = AddrSpace::SpvStorageClass(storage_class); + Ok((self.spv_ptr_type(addr_space, pointee_type), addr_space)) + } + + /// Returns `Some` iff `ty` is a SPIR-V `OpTypePointer`. + // + // FIXME(eddyb) deduplicate with `qptr::lower`. + fn as_spv_ptr_type(&self, ty: Type) -> Option<(AddrSpace, Type)> { + let ty_def = &self.cx[ty]; + match &ty_def.ctor { + TypeCtor::SpvInst(spv_inst) if spv_inst.opcode == self.wk.OpTypePointer => { + let sc = match spv_inst.imms[..] { + [spv::Imm::Short(_, sc)] => sc, + _ => unreachable!(), + }; + let pointee = match ty_def.ctor_args[..] { + [TypeCtorArg::Type(elem_type)] => elem_type, + _ => unreachable!(), + }; + Some((AddrSpace::SpvStorageClass(sc), pointee)) + } + _ => None, + } + } + + fn spv_ptr_type(&self, addr_space: AddrSpace, pointee_type: Type) -> Type { + let wk = self.wk; + + let storage_class = match addr_space { + AddrSpace::Handles => unreachable!(), + AddrSpace::SpvStorageClass(storage_class) => storage_class, + }; + self.cx.intern(TypeDef { + attrs: AttrSet::default(), + ctor: TypeCtor::SpvInst(spv::Inst { + opcode: wk.OpTypePointer, + imms: [spv::Imm::Short(wk.StorageClass, storage_class)] + .into_iter() + .collect(), + }), + ctor_args: [TypeCtorArg::Type(pointee_type)].into_iter().collect(), + }) + } + + fn pointee_type_for_usage(&self, usage: &QPtrUsage) -> Result { + let wk = self.wk; + + match usage { + &QPtrUsage::Handles(shapes::Handle::Opaque(ty)) => Ok(ty), + QPtrUsage::Handles(shapes::Handle::Buffer(_, data_usage)) => { + let attr_spv_decorate_block = Attr::SpvAnnotation(spv::Inst { + opcode: wk.OpDecorate, + imms: [spv::Imm::Short(wk.Decoration, wk.Block)] + .into_iter() + .collect(), + }); + match &data_usage.kind { + QPtrMemUsageKind::Unused => { + self.spv_op_type_struct([], [attr_spv_decorate_block]) + } + QPtrMemUsageKind::OffsetBase(fields) => self.spv_op_type_struct( + fields.iter().map(|(&field_offset, field_usage)| { + Ok((field_offset, self.pointee_type_for_mem_usage(field_usage)?)) + }), + [attr_spv_decorate_block], + ), + QPtrMemUsageKind::StrictlyTyped(_) + | QPtrMemUsageKind::DirectAccess(_) + | QPtrMemUsageKind::DynOffsetBase { .. } => self.spv_op_type_struct( + [Ok((0, self.pointee_type_for_mem_usage(data_usage)?))], + [attr_spv_decorate_block], + ), + } + } + QPtrUsage::Memory(usage) => self.pointee_type_for_mem_usage(usage), + } + } + + fn pointee_type_for_mem_usage(&self, usage: &QPtrMemUsage) -> Result { + match &usage.kind { + QPtrMemUsageKind::Unused => self.spv_op_type_struct([], []), + &QPtrMemUsageKind::StrictlyTyped(ty) | &QPtrMemUsageKind::DirectAccess(ty) => Ok(ty), + QPtrMemUsageKind::OffsetBase(fields) => self.spv_op_type_struct( + fields.iter().map(|(&field_offset, field_usage)| { + Ok((field_offset, self.pointee_type_for_mem_usage(field_usage)?)) + }), + [], + ), + QPtrMemUsageKind::DynOffsetBase { element, stride } => { + let element_type = self.pointee_type_for_mem_usage(element)?; + + let fixed_len = usage + .max_size + .map(|size| { + if size % stride.get() != 0 { + return Err(LiftError(Diag::bug([format!( + "DynOffsetBase: size ({size}) not a multiple of stride ({stride})" + ) + .into()]))); + } + Ok(size / stride.get()) + }) + .transpose()?; + + self.spv_op_type_array(element_type, fixed_len, Some(*stride)) + } + } + } + + fn spv_op_type_array( + &self, + element_type: Type, + fixed_len: Option, + stride: Option, + ) -> Result { + let wk = self.wk; + + let stride_attrs = stride.map(|stride| { + self.cx.intern(AttrSetDef { + attrs: [Attr::SpvAnnotation(spv::Inst { + opcode: wk.OpDecorate, + imms: [ + spv::Imm::Short(wk.Decoration, wk.ArrayStride), + spv::Imm::Short(wk.LiteralInteger, stride.get()), + ] + .into_iter() + .collect(), + })] + .into(), + }) + }); + + let spv_opcode = if fixed_len.is_some() { + wk.OpTypeArray + } else { + wk.OpTypeRuntimeArray + }; + + Ok(self.cx.intern(TypeDef { + attrs: stride_attrs.unwrap_or_default(), + ctor: TypeCtor::SpvInst(spv_opcode.into()), + ctor_args: [TypeCtorArg::Type(element_type)] + .into_iter() + .chain(fixed_len.map(|len| TypeCtorArg::Const(self.const_u32(len)))) + .collect(), + })) + } + + fn spv_op_type_struct( + &self, + field_offsets_and_types: impl IntoIterator>, + extra_attrs: impl IntoIterator, + ) -> Result { + let wk = self.wk; + + let field_offsets_and_types = field_offsets_and_types.into_iter(); + let mut attrs = AttrSetDef::default(); + let mut type_ctor_args = SmallVec::with_capacity(field_offsets_and_types.size_hint().0); + for (i, field_offset_and_type) in field_offsets_and_types.enumerate() { + let (offset, field_type) = field_offset_and_type?; + attrs.attrs.insert(Attr::SpvAnnotation(spv::Inst { + opcode: wk.OpMemberDecorate, + imms: [ + spv::Imm::Short(wk.LiteralInteger, i.try_into().unwrap()), + spv::Imm::Short(wk.Decoration, wk.Offset), + spv::Imm::Short(wk.LiteralInteger, offset), + ] + .into_iter() + .collect(), + })); + type_ctor_args.push(TypeCtorArg::Type(field_type)); + } + attrs.attrs.extend(extra_attrs); + Ok(self.cx.intern(TypeDef { + attrs: self.cx.intern(attrs), + ctor: TypeCtor::SpvInst(wk.OpTypeStruct.into()), + ctor_args: type_ctor_args, + })) + } + + /// Get the (likely cached) `u32` type. + fn u32_type(&self) -> Type { + if let Some(cached) = self.cached_u32_type.get() { + return cached; + } + let wk = self.wk; + let ty = self.cx.intern(TypeDef { + attrs: AttrSet::default(), + ctor: TypeCtor::SpvInst(spv::Inst { + opcode: wk.OpTypeInt, + imms: [ + spv::Imm::Short(wk.LiteralInteger, 32), + spv::Imm::Short(wk.LiteralInteger, 0), + ] + .into_iter() + .collect(), + }), + ctor_args: [].into_iter().collect(), + }); + self.cached_u32_type.set(Some(ty)); + ty + } + + fn const_u32(&self, x: u32) -> Const { + let wk = self.wk; + + self.cx.intern(ConstDef { + attrs: AttrSet::default(), + ty: self.u32_type(), + ctor: ConstCtor::SpvInst(spv::Inst { + opcode: wk.OpConstant, + imms: [spv::Imm::Short(wk.LiteralContextDependentNumber, x)] + .into_iter() + .collect(), + }), + ctor_args: [].into_iter().collect(), + }) + } + + /// Attempt to compute a `TypeLayout` for a given (SPIR-V) `Type`. + fn layout_of(&self, ty: Type) -> Result { + self.layout_cache + .layout_of(ty) + .map_err(|LayoutError(err)| LiftError(err)) + } +} + +struct LiftToSpvPtrInstsInFunc<'a> { + lifter: &'a LiftToSpvPtrs<'a>, + global_vars: &'a EntityDefs, + + /// Some `QPtr`->`QPtr` `QPtrOp`s must be noops in SPIR-V, but because some + /// of them have meaningful semantic differences in SPIR-T, replacement of + /// their uses must be deferred until after `try_lift_data_inst_def` has had + /// a chance to observe the distinction. + /// + /// E.g. `QPtrOp::BufferData`s cannot adjust the SPIR-V pointer type, due to + /// interactions between the `Block` annotation and any potential trailing + /// `OpTypeRuntimeArray`s (which cannot be nested in non-`Block` structs). + /// + /// The `QPtrOp` itself is only removed after the entire function is lifted, + /// (using `data_inst_use_counts` to determine whether they're truly unused). + deferred_ptr_noops: FxIndexMap, + + // FIXME(eddyb) consider removing this and just do a full second traversal. + data_inst_use_counts: EntityOrientedDenseMap, + + // HACK(eddyb) this is used to avoid noise when `qptr::analyze` failed. + func_has_qptr_analysis_bug_diags: bool, +} + +struct DeferredPtrNoop { + output_pointer: Value, + + output_pointer_addr_space: AddrSpace, + + /// Should be equivalent to `layout_of` on `output_pointer`'s pointee type, + /// except in the case of `QPtrOp::BufferData`. + output_pointee_layout: TypeLayout, + + parent_block: ControlNode, +} + +impl LiftToSpvPtrInstsInFunc<'_> { + fn try_lift_data_inst_def( + &mut self, + mut func_at_data_inst: FuncAtMut<'_, DataInst>, + parent_block: ControlNode, + ) -> Result, LiftError> { + let wk = self.lifter.wk; + let cx = &self.lifter.cx; + + let func_at_data_inst_frozen = func_at_data_inst.reborrow().freeze(); + let data_inst = func_at_data_inst_frozen.position; + let data_inst_def = func_at_data_inst_frozen.def(); + let func = func_at_data_inst_frozen.at(()); + let type_of_val = |v: Value| func.at(v).type_of(cx); + // FIXME(eddyb) maybe all this data should be packaged up together in a + // type with fields like those of `DeferredPtrNoop` (or even more). + let type_of_val_as_spv_ptr_with_layout = |v: Value| { + if let Value::DataInstOutput(v_data_inst) = v { + if let Some(ptr_noop) = self.deferred_ptr_noops.get(&v_data_inst) { + return Ok(( + ptr_noop.output_pointer_addr_space, + ptr_noop.output_pointee_layout.clone(), + )); + } + } + + let (addr_space, pointee_type) = + self.lifter.as_spv_ptr_type(type_of_val(v)).ok_or_else(|| { + LiftError(Diag::bug(["pointer input not an `OpTypePointer`".into()])) + })?; + + Ok((addr_space, self.lifter.layout_of(pointee_type)?)) + }; + let replacement_data_inst_def = match &data_inst_def.kind { + &DataInstKind::FuncCall(_callee) => { + for &v in &data_inst_def.inputs { + if self.lifter.as_spv_ptr_type(type_of_val(v)).is_some() { + return Err(LiftError(Diag::bug([ + "unimplemented calls with pointer args".into(), + ]))); + } + } + return Ok(Transformed::Unchanged); + } + + DataInstKind::QPtr(QPtrOp::FuncLocalVar(_mem_layout)) => { + let qptr_usage = self.lifter.find_qptr_usage_attr(data_inst_def.attrs)?; + + // FIXME(eddyb) validate against `mem_layout`! + let pointee_type = self.lifter.pointee_type_for_usage(qptr_usage)?; + DataInstDef { + attrs: self.lifter.strip_qptr_usage_attr(data_inst_def.attrs), + kind: DataInstKind::SpvInst(spv::Inst { + opcode: wk.OpVariable, + imms: [spv::Imm::Short(wk.StorageClass, wk.Function)] + .into_iter() + .collect(), + }), + output_type: Some( + self.lifter + .spv_ptr_type(AddrSpace::SpvStorageClass(wk.Function), pointee_type), + ), + inputs: data_inst_def.inputs.clone(), + } + } + DataInstKind::QPtr(QPtrOp::HandleArrayIndex) => { + let (addr_space, layout) = + type_of_val_as_spv_ptr_with_layout(data_inst_def.inputs[0])?; + let handle = match layout { + // FIXME(eddyb) standardize variant order in enum/match. + TypeLayout::HandleArray(handle, _) => handle, + TypeLayout::Handle(_) => { + return Err(LiftError(Diag::bug(["cannot index single Handle".into()]))); + } + TypeLayout::Concrete(_) => { + return Err(LiftError(Diag::bug([ + "cannot index memory as handles".into() + ]))); + } + }; + let handle_type = match handle { + shapes::Handle::Opaque(ty) => ty, + shapes::Handle::Buffer(_, buf) => buf.original_type, + }; + DataInstDef { + attrs: data_inst_def.attrs, + kind: DataInstKind::SpvInst(wk.OpAccessChain.into()), + output_type: Some(self.lifter.spv_ptr_type(addr_space, handle_type)), + inputs: data_inst_def.inputs.clone(), + } + } + DataInstKind::QPtr(QPtrOp::BufferData) => { + let buf_ptr = data_inst_def.inputs[0]; + let (addr_space, buf_layout) = type_of_val_as_spv_ptr_with_layout(buf_ptr)?; + + let buf_data_layout = match buf_layout { + TypeLayout::Handle(shapes::Handle::Buffer(_, buf)) => TypeLayout::Concrete(buf), + _ => return Err(LiftError(Diag::bug(["non-Buffer pointee".into()]))), + }; + + self.deferred_ptr_noops.insert( + data_inst, + DeferredPtrNoop { + output_pointer: buf_ptr, + output_pointer_addr_space: addr_space, + output_pointee_layout: buf_data_layout, + parent_block, + }, + ); + + DataInstDef { + // FIXME(eddyb) avoid the repeated call to `type_of_val`, + // maybe don't even replace the `QPtrOp::Buffer` instruction? + output_type: Some(type_of_val(buf_ptr)), + ..data_inst_def.clone() + } + } + &DataInstKind::QPtr(QPtrOp::BufferDynLen { + fixed_base_size, + dyn_unit_stride, + }) => { + let buf_ptr = data_inst_def.inputs[0]; + let (_, buf_layout) = type_of_val_as_spv_ptr_with_layout(buf_ptr)?; + + let buf_data_layout = match buf_layout { + TypeLayout::Handle(shapes::Handle::Buffer(_, buf)) => buf, + _ => return Err(LiftError(Diag::bug(["non-Buffer pointee".into()]))), + }; + + let field_idx = match &buf_data_layout.components { + Components::Fields { offsets, layouts } + if offsets.last() == Some(&fixed_base_size) + && layouts.last().map_or(false, |last_field| { + last_field.mem_layout.fixed_base.size == 0 + && last_field.mem_layout.dyn_unit_stride + == Some(dyn_unit_stride) + && matches!( + last_field.components, + Components::Elements { + fixed_len: None, + .. + } + ) + }) => + { + u32::try_from(offsets.len() - 1).unwrap() + } + // FIXME(eddyb) support/diagnose more cases. + _ => { + return Err(LiftError(Diag::bug([ + "buffer data type shape mismatch".into() + ]))); + } + }; + + DataInstDef { + kind: DataInstKind::SpvInst(spv::Inst { + opcode: wk.OpArrayLength, + imms: [spv::Imm::Short(wk.LiteralInteger, field_idx)] + .into_iter() + .collect(), + }), + ..data_inst_def.clone() + } + } + &DataInstKind::QPtr(QPtrOp::Offset(offset)) => { + let base_ptr = data_inst_def.inputs[0]; + let (addr_space, layout) = type_of_val_as_spv_ptr_with_layout(base_ptr)?; + let mut layout = match layout { + TypeLayout::Handle(_) | TypeLayout::HandleArray(..) => { + return Err(LiftError(Diag::bug(["cannot offset Handles".into()]))); + } + TypeLayout::Concrete(mem_layout) => mem_layout, + }; + let mut offset = u32::try_from(offset) + .ok() + .ok_or_else(|| LiftError(Diag::bug(["negative offset".into()])))?; + + let mut access_chain_inputs: SmallVec<_> = [base_ptr].into_iter().collect(); + // FIXME(eddyb) deduplicate with access chain loop for Load/Store. + while offset > 0 { + let idx = { + // HACK(eddyb) supporting ZSTs would be a pain because + // they can "fit" in weird ways, e.g. given 3 offsets + // A, B, C (before/between/after a pair of fields), + // `B..B` is included in both `A..B` and `B..C`. + let allow_zst = false; + let offset_range = if allow_zst { + offset..offset + } else { + offset..offset.saturating_add(1) + }; + let mut component_indices = + layout.components.find_components_containing(offset_range); + match (component_indices.next(), component_indices.next()) { + (None, _) => { + // FIXME(eddyb) this could include the chosen indices, + // and maybe the current type and/or layout. + return Err(LiftError(Diag::bug([format!( + "offset {offset} not found in type layout, after {} access chain indices", + access_chain_inputs.len() - 1 + ).into()]))); + } + (Some(idx), Some(_)) => { + // FIXME(eddyb) !!! this can also be illegal overlap + if allow_zst { + return Err(LiftError(Diag::bug([ + "ambiguity due to ZSTs in type layout".into(), + ]))); + } + // HACK(eddyb) letting illegal overlap through + idx + } + (Some(idx), None) => idx, + } + }; + + let idx_as_i32 = i32::try_from(idx).ok().ok_or_else(|| { + LiftError(Diag::bug([format!( + "{idx} not representable as a positive s32" + ) + .into()])) + })?; + access_chain_inputs + .push(Value::Const(self.lifter.const_u32(idx_as_i32 as u32))); + + match &layout.components { + Components::Scalar => unreachable!(), + Components::Elements { stride, elem, .. } => { + offset %= stride.get(); + layout = elem.clone(); + } + Components::Fields { offsets, layouts } => { + offset -= offsets[idx]; + layout = layouts[idx].clone(); + } + } + } + + if access_chain_inputs.len() == 1 { + self.deferred_ptr_noops.insert( + data_inst, + DeferredPtrNoop { + output_pointer: base_ptr, + output_pointer_addr_space: addr_space, + output_pointee_layout: TypeLayout::Concrete(layout), + parent_block, + }, + ); + DataInstDef { + // FIXME(eddyb) avoid the repeated call to `type_of_val`, + // maybe don't even replace the `QPtrOp::Offset` instruction? + output_type: Some(type_of_val(base_ptr)), + ..data_inst_def.clone() + } + } else { + DataInstDef { + attrs: data_inst_def.attrs, + kind: DataInstKind::SpvInst(wk.OpAccessChain.into()), + output_type: Some( + self.lifter.spv_ptr_type(addr_space, layout.original_type), + ), + inputs: access_chain_inputs, + } + } + } + DataInstKind::QPtr(QPtrOp::DynOffset { + stride, + index_bounds, + }) => { + let base_ptr = data_inst_def.inputs[0]; + let (addr_space, layout) = type_of_val_as_spv_ptr_with_layout(base_ptr)?; + let mut layout = match layout { + TypeLayout::Handle(_) | TypeLayout::HandleArray(..) => { + return Err(LiftError(Diag::bug(["cannot offset Handles".into()]))); + } + TypeLayout::Concrete(mem_layout) => mem_layout, + }; + + let mut access_chain_inputs: SmallVec<_> = [base_ptr].into_iter().collect(); + loop { + if let Components::Elements { + stride: layout_stride, + elem, + fixed_len, + } = &layout.components + { + if layout_stride == stride + && Ok(index_bounds.clone()) + == fixed_len + .map(|len| i32::try_from(len.get()).map(|len| 0..len)) + .transpose() + { + access_chain_inputs.push(data_inst_def.inputs[1]); + layout = elem.clone(); + break; + } + } + + // FIXME(eddyb) deduplicate with `maybe_adjust_pointer_for_access`. + let idx = { + // FIXME(eddyb) there might be a better way to + // estimate a relevant offset range for the array, + // maybe assume length >= 1 so the minimum range + // is always `0..stride`? + let min_expected_len = index_bounds + .clone() + .and_then(|index_bounds| u32::try_from(index_bounds.end).ok()) + .unwrap_or(0); + let offset_range = + 0..min_expected_len.checked_add(stride.get()).unwrap_or(0); + let mut component_indices = + layout.components.find_components_containing(offset_range); + match (component_indices.next(), component_indices.next()) { + (None, _) => { + return Err(LiftError(Diag::bug([ + "matching array not found in pointee type layout".into(), + ]))); + } + // FIXME(eddyb) obsolete this case entirely, + // by removing stores of ZSTs, and replacing + // loads of ZSTs with `OpUndef` constants. + (Some(_), Some(_)) => { + return Err(LiftError(Diag::bug([ + "ambiguity due to ZSTs in pointee type layout".into(), + ]))); + } + (Some(idx), None) => idx, + } + }; + + let idx_as_i32 = i32::try_from(idx).ok().ok_or_else(|| { + LiftError(Diag::bug([format!( + "{idx} not representable as a positive s32" + ) + .into()])) + })?; + access_chain_inputs + .push(Value::Const(self.lifter.const_u32(idx_as_i32 as u32))); + + layout = match &layout.components { + Components::Scalar => unreachable!(), + Components::Elements { elem, .. } => elem.clone(), + Components::Fields { layouts, .. } => layouts[idx].clone(), + }; + } + DataInstDef { + attrs: data_inst_def.attrs, + kind: DataInstKind::SpvInst(wk.OpAccessChain.into()), + output_type: Some(self.lifter.spv_ptr_type(addr_space, layout.original_type)), + inputs: access_chain_inputs, + } + } + DataInstKind::QPtr(op @ (QPtrOp::Load | QPtrOp::Store)) => { + let (spv_opcode, access_type) = match op { + QPtrOp::Load => (wk.OpLoad, data_inst_def.output_type.unwrap()), + QPtrOp::Store => (wk.OpStore, type_of_val(data_inst_def.inputs[1])), + _ => unreachable!(), + }; + + // FIXME(eddyb) written in a more general style for future deduplication. + let maybe_ajustment = { + let input_idx = 0; + let ptr = data_inst_def.inputs[input_idx]; + let (addr_space, pointee_layout) = type_of_val_as_spv_ptr_with_layout(ptr)?; + self.maybe_adjust_pointer_for_access( + ptr, + addr_space, + pointee_layout, + access_type, + )? + .map(|access_chain_data_inst_def| (input_idx, access_chain_data_inst_def)) + .into_iter() + }; + + let mut new_data_inst_def = DataInstDef { + kind: DataInstKind::SpvInst(spv_opcode.into()), + ..data_inst_def.clone() + }; + + // FIXME(eddyb) written in a more general style for future deduplication. + for (input_idx, mut access_chain_data_inst_def) in maybe_ajustment { + // HACK(eddyb) account for `deferred_ptr_noops` interactions. + self.resolve_deferred_ptr_noop_uses(&mut access_chain_data_inst_def.inputs); + self.add_value_uses(&access_chain_data_inst_def.inputs); + + let access_chain_data_inst = func_at_data_inst + .reborrow() + .data_insts + .define(cx, access_chain_data_inst_def.into()); + + // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, + // due to the need to borrow `control_nodes` and `data_insts` + // at the same time - perhaps some kind of `FuncAtMut` position + // types for "where a list is in a parent entity" could be used + // to make this more ergonomic, although the potential need for + // an actual list entity of its own, should be considered. + let data_inst = func_at_data_inst.position; + let func = func_at_data_inst.reborrow().at(()); + match &mut func.control_nodes[parent_block].kind { + ControlNodeKind::Block { insts } => { + insts.insert_before(access_chain_data_inst, data_inst, func.data_insts); + } + _ => unreachable!(), + } + + new_data_inst_def.inputs[input_idx] = + Value::DataInstOutput(access_chain_data_inst); + } + + new_data_inst_def + } + + DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => { + let mut to_spv_ptr_input_adjustments = vec![]; + let mut from_spv_ptr_output = None; + for attr in &cx[data_inst_def.attrs].attrs { + match *attr { + Attr::QPtr(QPtrAttr::ToSpvPtrInput { + input_idx, + pointee: expected_pointee_type, + }) => { + let input_idx = usize::try_from(input_idx).unwrap(); + let expected_pointee_type = expected_pointee_type.0; + + let input_ptr = data_inst_def.inputs[input_idx]; + let (input_ptr_addr_space, input_pointee_layout) = + type_of_val_as_spv_ptr_with_layout(input_ptr)?; + + if let Some(access_chain_data_inst_def) = self + .maybe_adjust_pointer_for_access( + input_ptr, + input_ptr_addr_space, + input_pointee_layout, + expected_pointee_type, + )? + { + to_spv_ptr_input_adjustments + .push((input_idx, access_chain_data_inst_def)); + } + } + Attr::QPtr(QPtrAttr::FromSpvPtrOutput { + addr_space, + pointee, + }) => { + assert!(from_spv_ptr_output.is_none()); + from_spv_ptr_output = Some((addr_space.0, pointee.0)); + } + _ => {} + } + } + + if to_spv_ptr_input_adjustments.is_empty() && from_spv_ptr_output.is_none() { + return Ok(Transformed::Unchanged); + } + + let mut new_data_inst_def = data_inst_def.clone(); + + // FIXME(eddyb) deduplicate with `Load`/`Store`. + for (input_idx, mut access_chain_data_inst_def) in to_spv_ptr_input_adjustments { + // HACK(eddyb) account for `deferred_ptr_noops` interactions. + self.resolve_deferred_ptr_noop_uses(&mut access_chain_data_inst_def.inputs); + self.add_value_uses(&access_chain_data_inst_def.inputs); + + let access_chain_data_inst = func_at_data_inst + .reborrow() + .data_insts + .define(cx, access_chain_data_inst_def.into()); + + // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, + // due to the need to borrow `control_nodes` and `data_insts` + // at the same time - perhaps some kind of `FuncAtMut` position + // types for "where a list is in a parent entity" could be used + // to make this more ergonomic, although the potential need for + // an actual list entity of its own, should be considered. + let data_inst = func_at_data_inst.position; + let func = func_at_data_inst.reborrow().at(()); + match &mut func.control_nodes[parent_block].kind { + ControlNodeKind::Block { insts } => { + insts.insert_before(access_chain_data_inst, data_inst, func.data_insts); + } + _ => unreachable!(), + } + + new_data_inst_def.inputs[input_idx] = + Value::DataInstOutput(access_chain_data_inst); + } + + if let Some((addr_space, pointee_type)) = from_spv_ptr_output { + new_data_inst_def.output_type = + Some(self.lifter.spv_ptr_type(addr_space, pointee_type)); + } + + new_data_inst_def + } + }; + Ok(Transformed::Changed(replacement_data_inst_def)) + } + + /// If necessary, construct an `OpAccessChain` instruction to turn `ptr` + /// (pointing to a type with `pointee_layout`) into a pointer to `access_type` + /// (which can then be used with e.g. `OpLoad`/`OpStore`). + // + // FIXME(eddyb) customize errors, to tell apart Load/Store/ToSpvPtrInput. + fn maybe_adjust_pointer_for_access( + &self, + ptr: Value, + addr_space: AddrSpace, + mut pointee_layout: TypeLayout, + access_type: Type, + ) -> Result, LiftError> { + let wk = self.lifter.wk; + + let access_layout = self.lifter.layout_of(access_type)?; + + // The access type might be merely a prefix of the pointee type, + // requiring injecting an extra `OpAccessChain` to "dig in". + let mut access_chain_inputs: SmallVec<_> = [ptr].into_iter().collect(); + + if let TypeLayout::HandleArray(handle, _) = pointee_layout { + access_chain_inputs.push(Value::Const(self.lifter.const_u32(0))); + pointee_layout = TypeLayout::Handle(handle); + } + match (pointee_layout, access_layout) { + (TypeLayout::HandleArray(..), _) => unreachable!(), + + // All the illegal cases are here to keep the rest tidier. + (_, TypeLayout::Handle(shapes::Handle::Buffer(..))) => { + return Err(LiftError(Diag::bug(["cannot access whole Buffer".into()]))); + } + (_, TypeLayout::HandleArray(..)) => { + return Err(LiftError(Diag::bug([ + "cannot access whole HandleArray".into() + ]))); + } + (_, TypeLayout::Concrete(access_layout)) + if access_layout.mem_layout.dyn_unit_stride.is_some() => + { + return Err(LiftError(Diag::bug(["cannot access unsized type".into()]))); + } + (TypeLayout::Handle(shapes::Handle::Buffer(..)), _) => { + return Err(LiftError(Diag::bug(["cannot access into Buffer".into()]))); + } + (TypeLayout::Handle(_), TypeLayout::Concrete(_)) => { + return Err(LiftError(Diag::bug([ + "cannot access Handle as memory".into() + ]))); + } + (TypeLayout::Concrete(_), TypeLayout::Handle(_)) => { + return Err(LiftError(Diag::bug([ + "cannot access memory as Handle".into() + ]))); + } + + ( + TypeLayout::Handle(shapes::Handle::Opaque(pointee_handle_type)), + TypeLayout::Handle(shapes::Handle::Opaque(access_handle_type)), + ) => { + if pointee_handle_type != access_handle_type { + return Err(LiftError(Diag::bug([ + "(opaque handle) pointer vs access type mismatch".into(), + ]))); + } + } + + (TypeLayout::Concrete(mut pointee_layout), TypeLayout::Concrete(access_layout)) => { + // FIXME(eddyb) deduplicate with access chain loop for Offset. + while pointee_layout.original_type != access_layout.original_type { + let idx = { + let offset_range = 0..access_layout.mem_layout.fixed_base.size; + let mut component_indices = pointee_layout + .components + .find_components_containing(offset_range); + match (component_indices.next(), component_indices.next()) { + (None, _) => { + return Err(LiftError(Diag::bug([ + "accessed type not found in pointee type layout".into(), + ]))); + } + // FIXME(eddyb) obsolete this case entirely, + // by removing stores of ZSTs, and replacing + // loads of ZSTs with `OpUndef` constants. + (Some(_), Some(_)) => { + return Err(LiftError(Diag::bug([ + "ambiguity due to ZSTs in pointee type layout".into(), + ]))); + } + (Some(idx), None) => idx, + } + }; + + let idx_as_i32 = i32::try_from(idx).ok().ok_or_else(|| { + LiftError(Diag::bug([format!( + "{idx} not representable as a positive s32" + ) + .into()])) + })?; + access_chain_inputs + .push(Value::Const(self.lifter.const_u32(idx_as_i32 as u32))); + + pointee_layout = match &pointee_layout.components { + Components::Scalar => unreachable!(), + Components::Elements { elem, .. } => elem.clone(), + Components::Fields { layouts, .. } => layouts[idx].clone(), + }; + } + } + } + + Ok(if access_chain_inputs.len() > 1 { + Some(DataInstDef { + attrs: Default::default(), + kind: DataInstKind::SpvInst(wk.OpAccessChain.into()), + output_type: Some(self.lifter.spv_ptr_type(addr_space, access_type)), + inputs: access_chain_inputs, + }) + } else { + None + }) + } + + /// Apply rewrites implied by `deferred_ptr_noops` to `values`. + /// + /// This **does not** update `data_inst_use_counts` - in order to do that, + /// you must call `self.remove_value_uses(values)` beforehand, and then also + /// call `self.after_value_uses(values)` afterwards. + fn resolve_deferred_ptr_noop_uses(&self, values: &mut [Value]) { + for v in values { + // FIXME(eddyb) the loop could theoretically be avoided, but that'd + // make tracking use counts harder. + while let Value::DataInstOutput(data_inst) = *v { + match self.deferred_ptr_noops.get(&data_inst) { + Some(ptr_noop) => { + *v = ptr_noop.output_pointer; + } + None => break, + } + } + } + } + + // FIXME(eddyb) these are only this whacky because an `u32` is being + // encoded as `Option` for (dense) map entry reasons. + fn add_value_uses(&mut self, values: &[Value]) { + for &v in values { + if let Value::DataInstOutput(data_inst) = v { + let count = self.data_inst_use_counts.entry(data_inst); + *count = Some( + NonZeroU32::new(count.map_or(0, |c| c.get()).checked_add(1).unwrap()).unwrap(), + ); + } + } + } + fn remove_value_uses(&mut self, values: &[Value]) { + for &v in values { + if let Value::DataInstOutput(data_inst) = v { + let count = self.data_inst_use_counts.entry(data_inst); + *count = NonZeroU32::new(count.unwrap().get() - 1); + } + } + } +} + +impl Transformer for LiftToSpvPtrInstsInFunc<'_> { + // FIXME(eddyb) this is intentionally *shallow* and will not handle pointers + // "hidden" in composites (which should be handled in SPIR-T explicitly). + fn transform_const_use(&mut self, ct: Const) -> Transformed { + // FIXME(eddyb) maybe cache this remap (in `LiftToSpvPtrs`, globally). + let ct_def = &self.lifter.cx[ct]; + if let ConstCtor::PtrToGlobalVar(gv) = ct_def.ctor { + Transformed::Changed(self.lifter.cx.intern(ConstDef { + attrs: ct_def.attrs, + ty: self.global_vars[gv].type_of_ptr_to, + ctor: ct_def.ctor.clone(), + ctor_args: ct_def.ctor_args.clone(), + })) + } else { + Transformed::Unchanged + } + } + + fn transform_value_use(&mut self, v: &Value) -> Transformed { + self.add_value_uses(&[*v]); + + v.inner_transform_with(self) + } + + // HACK(eddyb) while we want to transform `DataInstDef`s, we can't inject + // adjacent instructions without access to the parent `ControlNodeKind::Block`, + // and to fix this would likely require list nodes to carry some handle to + // the list they're part of, either the whole semantic parent, or something + // more contrived, where lists are actually allocated entities of their own, + // perhaps something where an `EntityListDefs` contains both: + // - an `EntityDefs>` (keyed by `DataInst`) + // - an `EntityDefs>` (keyed by `EntityList`) + fn in_place_transform_control_node_def( + &mut self, + mut func_at_control_node: FuncAtMut<'_, ControlNode>, + ) { + func_at_control_node + .reborrow() + .inner_in_place_transform_with(self); + + let control_node = func_at_control_node.position; + if let ControlNodeKind::Block { insts } = func_at_control_node.reborrow().def().kind { + let mut func_at_inst_iter = func_at_control_node.reborrow().at(insts).into_iter(); + while let Some(mut func_at_inst) = func_at_inst_iter.next() { + let mut lifted = self.try_lift_data_inst_def(func_at_inst.reborrow(), control_node); + if let Ok(Transformed::Unchanged) = lifted { + let data_inst_def = func_at_inst.reborrow().def(); + if let DataInstKind::QPtr(_) = data_inst_def.kind { + lifted = Err(LiftError(Diag::bug([ + "unimplemented qptr instruction".into() + ]))); + } else if let Some(ty) = data_inst_def.output_type { + if matches!(self.lifter.cx[ty].ctor, TypeCtor::QPtr) { + lifted = Err(LiftError(Diag::bug([ + "unimplemented qptr-producing instruction".into(), + ]))); + } + } + } + match lifted { + Ok(Transformed::Unchanged) => {} + Ok(Transformed::Changed(new_def)) => { + // HACK(eddyb) this whole dance ensures that use counts + // remain accurate, no matter what rewrites occur. + let data_inst_def = func_at_inst.def(); + self.remove_value_uses(&data_inst_def.inputs); + *data_inst_def = new_def; + self.resolve_deferred_ptr_noop_uses(&mut data_inst_def.inputs); + self.add_value_uses(&data_inst_def.inputs); + } + Err(LiftError(e)) => { + let data_inst_def = func_at_inst.def(); + + // HACK(eddyb) do not add redundant errors to `qptr::analyze` bugs. + self.func_has_qptr_analysis_bug_diags = self + .func_has_qptr_analysis_bug_diags + || self.lifter.cx[data_inst_def.attrs] + .attrs + .iter() + .any(|attr| match attr { + Attr::Diagnostics(diags) => { + diags.0.iter().any(|diag| match diag.level { + DiagLevel::Bug(loc) => { + loc.file().ends_with("qptr/analyze.rs") + || loc.file().ends_with("qptr\\analyze.rs") + } + _ => false, + }) + } + _ => false, + }); + + if !self.func_has_qptr_analysis_bug_diags { + data_inst_def.attrs.push_diag(&self.lifter.cx, e); + } + } + } + } + } + } + + fn in_place_transform_func_decl(&mut self, func_decl: &mut FuncDecl) { + func_decl.inner_in_place_transform_with(self); + + // Remove all `deferred_ptr_noops` instructions that are truly unused. + if let DeclDef::Present(func_def_body) = &mut func_decl.def { + let deferred_ptr_noops = mem::take(&mut self.deferred_ptr_noops); + // NOTE(eddyb) reverse order is important, as each removal can reduce + // use counts of an earlier definition, allowing further removal. + for (inst, ptr_noop) in deferred_ptr_noops.into_iter().rev() { + if self.data_inst_use_counts.get(inst).is_none() { + // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, + // due to the need to borrow `control_nodes` and `data_insts` + // at the same time - perhaps some kind of `FuncAtMut` position + // types for "where a list is in a parent entity" could be used + // to make this more ergonomic, although the potential need for + // an actual list entity of its own, should be considered. + match &mut func_def_body.control_nodes[ptr_noop.parent_block].kind { + ControlNodeKind::Block { insts } => { + insts.remove(inst, &mut func_def_body.data_insts); + } + _ => unreachable!(), + } + + self.remove_value_uses(&func_def_body.at(inst).def().inputs); + } + } + } + } +} diff --git a/src/qptr/lower.rs b/src/qptr/lower.rs new file mode 100644 index 0000000..3a10c3f --- /dev/null +++ b/src/qptr/lower.rs @@ -0,0 +1,735 @@ +//! [`QPtr`](crate::TypeCtor::QPtr) lowering (e.g. from SPIR-V). + +// HACK(eddyb) layout code used to be in this module. +use super::layout::*; + +use crate::func_at::FuncAtMut; +use crate::qptr::{shapes, QPtrAttr, QPtrOp}; +use crate::transform::{InnerInPlaceTransform, Transformed, Transformer}; +use crate::{ + spv, AddrSpace, AttrSet, AttrSetDef, Const, ConstCtor, ConstDef, Context, ControlNode, + ControlNodeKind, DataInst, DataInstDef, DataInstKind, Diag, FuncDecl, GlobalVarDecl, + OrdAssertEq, Type, TypeCtor, TypeCtorArg, TypeDef, Value, +}; +use smallvec::SmallVec; +use std::cell::Cell; +use std::num::NonZeroU32; +use std::rc::Rc; + +struct LowerError(Diag); + +/// Context for lowering SPIR-V `OpTypePointer`s to `QPtr`s. +/// +/// See also `passes::qptr::lower_from_spv_ptrs` (which drives this). +pub struct LowerFromSpvPtrs<'a> { + cx: Rc, + wk: &'static spv::spec::WellKnown, + layout_cache: LayoutCache<'a>, + + cached_qptr_type: Cell>, +} + +impl<'a> LowerFromSpvPtrs<'a> { + pub fn new(cx: Rc, layout_config: &'a LayoutConfig) -> Self { + Self { + cx: cx.clone(), + wk: &spv::spec::Spec::get().well_known, + layout_cache: LayoutCache::new(cx, layout_config), + cached_qptr_type: Default::default(), + } + } + + pub fn lower_global_var(&self, global_var_decl: &mut GlobalVarDecl) { + let wk = self.wk; + + let (_, pointee_type) = self + .as_spv_ptr_type(global_var_decl.type_of_ptr_to) + .unwrap(); + let handle_layout_to_handle = |handle_layout: HandleLayout| match handle_layout { + shapes::Handle::Opaque(ty) => shapes::Handle::Opaque(ty), + shapes::Handle::Buffer(addr_space, buf) => { + shapes::Handle::Buffer(addr_space, buf.mem_layout) + } + }; + let mut shape_result = self.layout_of(pointee_type).and_then(|layout| { + Ok(match layout { + TypeLayout::Handle(handle) => shapes::GlobalVarShape::Handles { + handle: handle_layout_to_handle(handle), + fixed_count: Some(NonZeroU32::new(1).unwrap()), + }, + TypeLayout::HandleArray(handle, fixed_count) => shapes::GlobalVarShape::Handles { + handle: handle_layout_to_handle(handle), + fixed_count, + }, + TypeLayout::Concrete(concrete) => { + if concrete.mem_layout.dyn_unit_stride.is_some() { + return Err(LowerError(Diag::err([ + "global variable cannot have dynamically sized type `".into(), + pointee_type.into(), + "`".into(), + ]))); + } + match global_var_decl.addr_space { + // These SPIR-V Storage Classes are defined to require + // exact types, either because they're `BuiltIn`s, or + // for "interface matching" between pipeline stages. + AddrSpace::SpvStorageClass(sc) + if [ + wk.Input, + wk.Output, + wk.IncomingRayPayloadKHR, + wk.IncomingCallableDataKHR, + wk.HitAttributeKHR, + wk.RayPayloadKHR, + wk.CallableDataKHR, + ] + .contains(&sc) => + { + shapes::GlobalVarShape::TypedInterface(pointee_type) + } + + _ => shapes::GlobalVarShape::UntypedData(concrete.mem_layout.fixed_base), + } + } + }) + }); + if let Ok(shapes::GlobalVarShape::Handles { handle, .. }) = &mut shape_result { + match handle { + shapes::Handle::Opaque(_) => { + if global_var_decl.addr_space != AddrSpace::SpvStorageClass(wk.UniformConstant) + { + shape_result = Err(LowerError(Diag::bug([ + "opaque Handles require UniformConstant".into(), + ]))); + } + } + // FIXME(eddyb) not all "interface blocks" imply buffers, so this + // may need to be ignored based on the SPIR-V storage class. + // + // OH GOD but the lowering of operations to the right thing....... + // depends on whether it's a buffer or not...... outside of + // Rust-GPU's abuse of `Generic` it should at least be possible + // to determine it from the pointer type itself, at the lowering + // op time, but with storage class inference.... THIS IS FUCKED + // OTOH, Rust-GPU doesn't really use `Block` outside of buffers! + // Long-term it should probably have different types per storage + // class, or even represent buffers as pointers. + shapes::Handle::Buffer(buf_addr_space, _) => { + // HACK(eddyb) it couldn't have been known in `layout_of`. + assert!(*buf_addr_space == AddrSpace::Handles); + *buf_addr_space = global_var_decl.addr_space; + } + } + if shape_result.is_ok() { + global_var_decl.addr_space = AddrSpace::Handles; + } + } + match shape_result { + Ok(shape) => { + global_var_decl.shape = Some(shape); + + // HACK(eddyb) this should handle shallow `QPtr` in the initializer, but + // typed initializers should be replaced with miri/linker-style ones. + EraseSpvPtrs { lowerer: self }.in_place_transform_global_var_decl(global_var_decl); + } + Err(LowerError(e)) => { + global_var_decl.attrs.push_diag(&self.cx, e); + } + } + } + + pub fn lower_func(&self, func_decl: &mut FuncDecl) { + // HACK(eddyb) two-step to avoid having to record the original types + // separately - so `LowerFromSpvPtrInstsInFunc` will leave all value defs + // (including replaced instructions!) with unchanged `OpTypePointer` + // types, that only `EraseSpvPtrs`, later, replaces with `QPtr`. + LowerFromSpvPtrInstsInFunc { lowerer: self }.in_place_transform_func_decl(func_decl); + EraseSpvPtrs { lowerer: self }.in_place_transform_func_decl(func_decl); + } + + /// Returns `Some` iff `ty` is a SPIR-V `OpTypePointer`. + // + // FIXME(eddyb) deduplicate with `qptr::lift`. + // + // FIXME(eddyb) consider using the storage class to determine whether a + // `Block`-annotated type is a buffer or just interface nonsense. + // (!!! may cause bad interactions with storage class inference `Generic` abuse) + fn as_spv_ptr_type(&self, ty: Type) -> Option<(AddrSpace, Type)> { + let ty_def = &self.cx[ty]; + match &ty_def.ctor { + TypeCtor::SpvInst(spv_inst) if spv_inst.opcode == self.wk.OpTypePointer => { + let sc = match spv_inst.imms[..] { + [spv::Imm::Short(_, sc)] => sc, + _ => unreachable!(), + }; + let pointee = match ty_def.ctor_args[..] { + [TypeCtorArg::Type(elem_type)] => elem_type, + _ => unreachable!(), + }; + Some((AddrSpace::SpvStorageClass(sc), pointee)) + } + _ => None, + } + } + + // FIXME(eddyb) properly distinguish between zero-extension and sign-extension. + fn const_as_u32(&self, ct: Const) -> Option { + match &self.cx[ct].ctor { + ConstCtor::SpvInst(spv_inst) + if spv_inst.opcode == self.wk.OpConstant && spv_inst.imms.len() == 1 => + { + match spv_inst.imms[..] { + [spv::Imm::Short(_, x)] => Some(x), + _ => unreachable!(), + } + } + _ => None, + } + } + + /// Get the (likely cached) `QPtr` type. + fn qptr_type(&self) -> Type { + if let Some(cached) = self.cached_qptr_type.get() { + return cached; + } + let ty = self.cx.intern(TypeDef { + attrs: Default::default(), + ctor: TypeCtor::QPtr, + ctor_args: Default::default(), + }); + self.cached_qptr_type.set(Some(ty)); + ty + } + + /// Attempt to compute a `TypeLayout` for a given (SPIR-V) `Type`. + fn layout_of(&self, ty: Type) -> Result { + self.layout_cache + .layout_of(ty) + .map_err(|LayoutError(err)| LowerError(err)) + } +} + +struct EraseSpvPtrs<'a> { + lowerer: &'a LowerFromSpvPtrs<'a>, +} + +impl Transformer for EraseSpvPtrs<'_> { + // FIXME(eddyb) this is intentionally *shallow* and will not handle pointers + // "hidden" in composites (which should be handled in SPIR-T explicitly). + fn transform_type_use(&mut self, ty: Type) -> Transformed { + // FIXME(eddyb) maybe cache this remap (in `LowerFromSpvPtrs`, globally). + if self.lowerer.as_spv_ptr_type(ty).is_some() { + Transformed::Changed(self.lowerer.qptr_type()) + } else { + Transformed::Unchanged + } + } + + // FIXME(eddyb) this is intentionally *shallow* and will not handle pointers + // "hidden" in composites (which should be handled in SPIR-T explicitly). + fn transform_const_use(&mut self, ct: Const) -> Transformed { + // FIXME(eddyb) maybe cache this remap (in `LowerFromSpvPtrs`, globally). + let ct_def = &self.lowerer.cx[ct]; + if let ConstCtor::PtrToGlobalVar(_) = ct_def.ctor { + Transformed::Changed(self.lowerer.cx.intern(ConstDef { + attrs: ct_def.attrs, + ty: self.lowerer.qptr_type(), + ctor: ct_def.ctor.clone(), + ctor_args: ct_def.ctor_args.clone(), + })) + } else { + Transformed::Unchanged + } + } +} + +struct LowerFromSpvPtrInstsInFunc<'a> { + lowerer: &'a LowerFromSpvPtrs<'a>, +} + +/// One `QPtr`->`QPtr` step used in the lowering of `Op*AccessChain`. +/// +/// The `op` should take a `QPtr` as its first input and produce a `QPtr`. +struct QPtrChainStep { + op: QPtrOp, + + /// For `QPtrOp::HandleArrayIndex` and `QPtrOp::DynOffset`, this is the + /// second input (after the `QPtr` which is automatically handled). + dyn_idx: Option, +} + +impl QPtrChainStep { + fn into_data_inst_kind_and_inputs( + self, + in_qptr: Value, + ) -> (DataInstKind, SmallVec<[Value; 2]>) { + let Self { op, dyn_idx } = self; + (op.into(), [in_qptr].into_iter().chain(dyn_idx).collect()) + } +} + +impl LowerFromSpvPtrInstsInFunc<'_> { + fn try_lower_access_chain( + &self, + mut layout: TypeLayout, + indices: &[Value], + ) -> Result, LowerError> { + // FIXME(eddyb) pass in the `AddrSpace` to determine this correctly. + let is_logical_addressing = true; + + let const_idx_as_i32 = |idx| match idx { + // FIXME(eddyb) figure out the signedness semantics here. + Value::Const(idx) => self.lowerer.const_as_u32(idx).map(|idx_u32| idx_u32 as i32), + _ => None, + }; + + let mut steps: SmallVec<[QPtrChainStep; 4]> = SmallVec::new(); + let mut indices = indices.iter().copied(); + while indices.len() > 0 { + let (mut op, component_layout) = match layout { + TypeLayout::Handle(shapes::Handle::Opaque(_)) => { + return Err(LowerError(Diag::bug([ + "opaque handles have no sub-components".into(), + ]))); + } + TypeLayout::Handle(shapes::Handle::Buffer(_, buffer_data_layout)) => { + (QPtrOp::BufferData, TypeLayout::Concrete(buffer_data_layout)) + } + TypeLayout::HandleArray(handle, _) => { + (QPtrOp::HandleArrayIndex, TypeLayout::Handle(handle)) + } + TypeLayout::Concrete(concrete) => match &concrete.components { + Components::Scalar => { + return Err(LowerError(Diag::bug([ + "scalars have no sub-components".into() + ]))); + } + // FIXME(eddyb) handle the weird `OpTypeMatrix` layout when `RowMajor`. + Components::Elements { + stride, + elem, + fixed_len, + } => ( + QPtrOp::DynOffset { + stride: *stride, + // FIXME(eddyb) even without a fixed length, logical + // addressing still implies the index is *positive*, + // that should be encoded here, to help analysis. + index_bounds: fixed_len + .filter(|_| is_logical_addressing) + .and_then(|len| Some(0..len.get().try_into().ok()?)), + }, + TypeLayout::Concrete(elem.clone()), + ), + Components::Fields { offsets, layouts } => { + let field_idx = + const_idx_as_i32(indices.next().unwrap()).ok_or_else(|| { + LowerError(Diag::bug(["non-constant field index".into()])) + })?; + let (field_offset, field_layout) = usize::try_from(field_idx) + .ok() + .and_then(|field_idx| { + Some((*offsets.get(field_idx)?, layouts.get(field_idx)?.clone())) + }) + .ok_or_else(|| { + LowerError(Diag::bug([format!( + "field {field_idx} out of bounds (expected 0..{})", + offsets.len() + ) + .into()])) + })?; + ( + QPtrOp::Offset(i32::try_from(field_offset).ok().ok_or_else(|| { + LowerError(Diag::bug([format!( + "{field_offset} not representable as a positive s32" + ) + .into()])) + })?), + TypeLayout::Concrete(field_layout), + ) + } + }, + }; + layout = component_layout; + + // Automatically grab the dynamic index, whenever necessary. + let mut dyn_idx = match op { + QPtrOp::HandleArrayIndex | QPtrOp::DynOffset { .. } => { + Some(indices.next().unwrap()) + } + _ => None, + }; + + // Constant-fold dynamic indexing, whenever possible. + if let QPtrOp::DynOffset { + stride, + index_bounds, + } = &op + { + let const_offset = const_idx_as_i32(dyn_idx.unwrap()) + .filter(|const_idx| { + index_bounds + .as_ref() + .map_or(true, |bounds| bounds.contains(const_idx)) + }) + .and_then(|const_idx| i32::try_from(stride.get()).ok()?.checked_mul(const_idx)); + if let Some(const_offset) = const_offset { + op = QPtrOp::Offset(const_offset); + dyn_idx = None; + } + } + + // Combine consecutive immediate offsets, whenever possible. + match (steps.last_mut().map(|last_step| &mut last_step.op), &op) { + // Complete ignore noop offsets. + (_, QPtrOp::Offset(0)) => {} + + (Some(QPtrOp::Offset(last_offset)), &QPtrOp::Offset(new_offset)) => { + *last_offset = last_offset.checked_add(new_offset).ok_or_else(|| { + LowerError(Diag::bug([format!( + "offset overflow ({last_offset}+{new_offset})" + ) + .into()])) + })?; + } + + _ => steps.push(QPtrChainStep { op, dyn_idx }), + } + } + Ok(steps) + } + + fn try_lower_data_inst_def( + &self, + mut func_at_data_inst: FuncAtMut<'_, DataInst>, + parent_block: ControlNode, + ) -> Result, LowerError> { + let cx = &self.lowerer.cx; + let wk = self.lowerer.wk; + + let func_at_data_inst_frozen = func_at_data_inst.reborrow().freeze(); + let data_inst = func_at_data_inst_frozen.position; + let data_inst_def = func_at_data_inst_frozen.def(); + + // FIXME(eddyb) is this a good convention? + let func = func_at_data_inst_frozen.at(()); + + let spv_inst = match &data_inst_def.kind { + DataInstKind::SpvInst(spv_inst) => spv_inst, + _ => return Ok(Transformed::Unchanged), + }; + + let DataInstDef { + mut attrs, + output_type, + .. + } = *data_inst_def; + + let replacement_kind_and_inputs = if spv_inst.opcode == wk.OpVariable { + assert!(data_inst_def.inputs.len() <= 1); + let (_, var_data_type) = self + .lowerer + .as_spv_ptr_type(output_type.unwrap()) + .ok_or_else(|| { + LowerError(Diag::bug(["output type not an `OpTypePointer`".into()])) + })?; + match self.lowerer.layout_of(var_data_type)? { + TypeLayout::Concrete(concrete) if concrete.mem_layout.dyn_unit_stride.is_none() => { + ( + QPtrOp::FuncLocalVar(concrete.mem_layout.fixed_base).into(), + data_inst_def.inputs.clone(), + ) + } + _ => return Ok(Transformed::Unchanged), + } + } else if spv_inst.opcode == wk.OpLoad { + // FIXME(eddyb) support memory operands somehow. + if !spv_inst.imms.is_empty() { + return Ok(Transformed::Unchanged); + } + assert_eq!(data_inst_def.inputs.len(), 1); + (QPtrOp::Load.into(), data_inst_def.inputs.clone()) + } else if spv_inst.opcode == wk.OpStore { + // FIXME(eddyb) support memory operands somehow. + if !spv_inst.imms.is_empty() { + return Ok(Transformed::Unchanged); + } + assert_eq!(data_inst_def.inputs.len(), 2); + (QPtrOp::Store.into(), data_inst_def.inputs.clone()) + } else if spv_inst.opcode == wk.OpArrayLength { + let field_idx = match spv_inst.imms[..] { + [spv::Imm::Short(_, field_idx)] => field_idx, + _ => unreachable!(), + }; + assert_eq!(data_inst_def.inputs.len(), 1); + let ptr = data_inst_def.inputs[0]; + let (_, pointee_type) = self + .lowerer + .as_spv_ptr_type(func.at(ptr).type_of(cx)) + .ok_or_else(|| { + LowerError(Diag::bug(["pointer input not an `OpTypePointer`".into()])) + })?; + + let buf_data_layout = match self.lowerer.layout_of(pointee_type)? { + TypeLayout::Handle(shapes::Handle::Buffer(_, buf)) => buf, + _ => return Err(LowerError(Diag::bug(["non-Buffer pointee".into()]))), + }; + + let (field_offset, field_layout) = match &buf_data_layout.components { + Components::Fields { offsets, layouts } => usize::try_from(field_idx) + .ok() + .and_then(|field_idx| { + Some((*offsets.get(field_idx)?, layouts.get(field_idx)?.clone())) + }) + .ok_or_else(|| { + LowerError(Diag::bug([format!( + "field {field_idx} out of bounds (expected 0..{})", + offsets.len() + ) + .into()])) + })?, + + _ => { + return Err(LowerError(Diag::bug([ + "buffer data not an `OpTypeStruct`".into() + ]))); + } + }; + let array_stride = match field_layout.components { + Components::Elements { + stride, + fixed_len: None, + .. + } => stride, + + _ => { + return Err(LowerError(Diag::bug([format!( + "buffer data field #{field_idx} not an `OpTypeRuntimeArray`" + ) + .into()]))); + } + }; + + // Sanity-check layout invariants (should always hold given above checks). + assert_eq!(field_layout.mem_layout.fixed_base.size, 0); + assert_eq!(field_layout.mem_layout.dyn_unit_stride, Some(array_stride)); + assert_eq!(buf_data_layout.mem_layout.fixed_base.size, field_offset); + assert_eq!( + buf_data_layout.mem_layout.dyn_unit_stride, + Some(array_stride) + ); + + ( + QPtrOp::BufferDynLen { + fixed_base_size: field_offset, + dyn_unit_stride: array_stride, + } + .into(), + data_inst_def.inputs.clone(), + ) + } else if [ + wk.OpAccessChain, + wk.OpInBoundsAccessChain, + wk.OpPtrAccessChain, + wk.OpInBoundsPtrAccessChain, + ] + .contains(&spv_inst.opcode) + { + // FIXME(eddyb) avoid erasing the "inbounds" qualifier. + let base_ptr = data_inst_def.inputs[0]; + let (_, base_pointee_type) = self + .lowerer + .as_spv_ptr_type(func.at(base_ptr).type_of(cx)) + .ok_or_else(|| { + LowerError(Diag::bug(["pointer input not an `OpTypePointer`".into()])) + })?; + + // HACK(eddyb) for `OpPtrAccessChain`, this pretends to be indexing + // a `OpTypeRuntimeArray`, with the original type as the element type. + let access_chain_base_layout = + if [wk.OpPtrAccessChain, wk.OpInBoundsPtrAccessChain].contains(&spv_inst.opcode) { + self.lowerer.layout_of(cx.intern(TypeDef { + attrs: AttrSet::default(), + ctor: TypeCtor::SpvInst(wk.OpTypeRuntimeArray.into()), + ctor_args: [TypeCtorArg::Type(base_pointee_type)].into_iter().collect(), + }))? + } else { + self.lowerer.layout_of(base_pointee_type)? + }; + + let mut steps = + self.try_lower_access_chain(access_chain_base_layout, &data_inst_def.inputs[1..])?; + // HACK(eddyb) noop cases should probably not use any `DataInst`s at all, + // but that would require the ability to replace all uses of a `Value`. + let final_step = steps.pop().unwrap_or(QPtrChainStep { + op: QPtrOp::Offset(0), + dyn_idx: None, + }); + + let mut ptr = base_ptr; + for step in steps { + let (kind, inputs) = step.into_data_inst_kind_and_inputs(ptr); + let step_data_inst = func_at_data_inst.reborrow().data_insts.define( + cx, + DataInstDef { + attrs: Default::default(), + kind, + output_type: Some(self.lowerer.qptr_type()), + inputs, + } + .into(), + ); + + // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, + // due to the need to borrow `control_nodes` and `data_insts` + // at the same time - perhaps some kind of `FuncAtMut` position + // types for "where a list is in a parent entity" could be used + // to make this more ergonomic, although the potential need for + // an actual list entity of its own, should be considered. + let func = func_at_data_inst.reborrow().at(()); + match &mut func.control_nodes[parent_block].kind { + ControlNodeKind::Block { insts } => { + insts.insert_before(step_data_inst, data_inst, func.data_insts); + } + _ => unreachable!(), + } + + ptr = Value::DataInstOutput(step_data_inst); + } + final_step.into_data_inst_kind_and_inputs(ptr) + } else if spv_inst.opcode == wk.OpBitcast { + let input = data_inst_def.inputs[0]; + // Pointer-to-pointer casts are noops on `qptr`. + if self + .lowerer + .as_spv_ptr_type(func.at(input).type_of(cx)) + .is_some() + && self.lowerer.as_spv_ptr_type(output_type.unwrap()).is_some() + { + // HACK(eddyb) noop cases should not use any `DataInst`s at all, + // but that would require the ability to replace all uses of a `Value`. + let noop_step = QPtrChainStep { + op: QPtrOp::Offset(0), + dyn_idx: None, + }; + + // HACK(eddyb) since we're not removing the `DataInst` entirely, + // at least get rid of its attributes to clearly mark it as synthetic. + attrs = AttrSet::default(); + + noop_step.into_data_inst_kind_and_inputs(input) + } else { + return Ok(Transformed::Unchanged); + } + } else { + return Ok(Transformed::Unchanged); + }; + // FIXME(eddyb) should the `if`-`else` chain above produce `DataInstDef`s? + let (new_kind, new_inputs) = replacement_kind_and_inputs; + Ok(Transformed::Changed(DataInstDef { + attrs, + kind: new_kind, + output_type, + inputs: new_inputs, + })) + } + + fn add_fallback_attrs_to_data_inst_def( + &self, + mut func_at_data_inst: FuncAtMut<'_, DataInst>, + extra_error: Option, + ) { + let cx = &self.lowerer.cx; + + let func_at_data_inst_frozen = func_at_data_inst.reborrow().freeze(); + let data_inst_def = func_at_data_inst_frozen.def(); + + // FIXME(eddyb) is this a good convention? + let func = func_at_data_inst_frozen.at(()); + + match data_inst_def.kind { + // Known semantics, no need to preserve SPIR-V pointer information. + DataInstKind::FuncCall(_) | DataInstKind::QPtr(_) => return, + + DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => {} + } + + let mut old_and_new_attrs = None; + let get_old_attrs = || AttrSetDef { + attrs: cx[data_inst_def.attrs].attrs.clone(), + }; + + for (input_idx, &v) in data_inst_def.inputs.iter().enumerate() { + if let Some((_, pointee)) = self.lowerer.as_spv_ptr_type(func.at(v).type_of(cx)) { + old_and_new_attrs + .get_or_insert_with(get_old_attrs) + .attrs + .insert( + QPtrAttr::ToSpvPtrInput { + input_idx: input_idx.try_into().unwrap(), + pointee: OrdAssertEq(pointee), + } + .into(), + ); + } + } + if let Some(output_type) = data_inst_def.output_type { + if let Some((addr_space, pointee)) = self.lowerer.as_spv_ptr_type(output_type) { + old_and_new_attrs + .get_or_insert_with(get_old_attrs) + .attrs + .insert( + QPtrAttr::FromSpvPtrOutput { + addr_space: OrdAssertEq(addr_space), + pointee: OrdAssertEq(pointee), + } + .into(), + ); + } + } + + if let Some(LowerError(e)) = extra_error { + old_and_new_attrs + .get_or_insert_with(get_old_attrs) + .push_diag(e); + } + + if let Some(attrs) = old_and_new_attrs { + func_at_data_inst.def().attrs = cx.intern(attrs); + } + } +} + +impl Transformer for LowerFromSpvPtrInstsInFunc<'_> { + // HACK(eddyb) while we want to transform `DataInstDef`s, we can't inject + // adjacent instructions without access to the parent `ControlNodeKind::Block`, + // and to fix this would likely require list nodes to carry some handle to + // the list they're part of, either the whole semantic parent, or something + // more contrived, where lists are actually allocated entities of their own, + // perhaps something where an `EntityListDefs` contains both: + // - an `EntityDefs>` (keyed by `DataInst`) + // - an `EntityDefs>` (keyed by `EntityList`) + fn in_place_transform_control_node_def( + &mut self, + mut func_at_control_node: FuncAtMut<'_, ControlNode>, + ) { + func_at_control_node + .reborrow() + .inner_in_place_transform_with(self); + + let control_node = func_at_control_node.position; + if let ControlNodeKind::Block { insts } = func_at_control_node.reborrow().def().kind { + let mut func_at_inst_iter = func_at_control_node.reborrow().at(insts).into_iter(); + while let Some(mut func_at_inst) = func_at_inst_iter.next() { + match self.try_lower_data_inst_def(func_at_inst.reborrow(), control_node) { + Ok(Transformed::Changed(new_def)) => { + *func_at_inst.def() = new_def; + } + result @ (Ok(Transformed::Unchanged) | Err(_)) => { + self.add_fallback_attrs_to_data_inst_def(func_at_inst, result.err()); + } + } + } + } + } +} diff --git a/src/qptr/mod.rs b/src/qptr/mod.rs new file mode 100644 index 0000000..25ffe93 --- /dev/null +++ b/src/qptr/mod.rs @@ -0,0 +1,226 @@ +//! [`QPtr`](crate::TypeCtor::QPtr)-related type definitions and passes. +// +// FIXME(eddyb) consider `#[cfg(doc)] use crate::TypeCtor::QPtr;` for doc comments. +// FIXME(eddyb) PR description of https://github.com/EmbarkStudios/spirt/pull/24 +// has more useful docs that could be copied here. + +use crate::{AddrSpace, Attr, DataInstKind, OrdAssertEq, Type}; +use std::collections::BTreeMap; +use std::num::NonZeroU32; +use std::ops::Range; +use std::rc::Rc; + +// NOTE(eddyb) all the modules are declared here, but they're documented "inside" +// (i.e. using inner doc comments). +pub mod analyze; +mod layout; +pub mod lift; +pub mod lower; +pub mod shapes; + +pub use layout::LayoutConfig; + +/// `QPtr`-specific attributes ([`Attr::QPtr`]). +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum QPtrAttr { + /// When applied to a `DataInst` with a `QPtr`-typed `inputs[input_idx]`, + /// this describes the original `OpTypePointer` consumed by an unknown + /// SPIR-V instruction (which may, or may not, access memory, at all). + /// + /// Assumes the original SPIR-V `StorageClass` is redundant (i.e. can be + /// deduced from the pointer's provenance), and that any accesses performed + /// through the pointer (or any pointers derived from it) stay within bounds + /// (i.e. logical pointer semantics, unsuited for e.g. `OpPtrAccessChain`). + // + // FIXME(eddyb) reduce usage by modeling more of SPIR-V inside SPIR-T. + ToSpvPtrInput { + input_idx: u32, + pointee: OrdAssertEq, + }, + + /// When applied to a `DataInst` with a `QPtr`-typed output value, + /// this describes the original `OpTypePointer` produced by an unknown + /// SPIR-V instruction (likely creating it, without deriving from an input). + /// + /// Assumes the original SPIR-V `StorageClass` is significant (e.g. fresh + /// provenance being created on the fly via `OpConvertUToPtr`, or derived + /// internally by the implementation via `OpImageTexelPointer`). + // + // FIXME(eddyb) reduce usage by modeling more of SPIR-V inside SPIR-T, or + // at least using some kind of bitcast instead of `QPtr` + this attribute. + // FIXME(eddyb) `OpConvertUToPtr` creates a physical pointer, could we avoid + // dealing with those at all in `QPtr`? (as its focus is logical legalization) + FromSpvPtrOutput { + // FIXME(eddyb) should this use a special `spv::StorageClass` type? + addr_space: OrdAssertEq, + pointee: OrdAssertEq, + }, + + /// When applied to a `QPtr`-typed `GlobalVar`, `DataInst`, + /// `ControlRegionInputDecl` or `ControlNodeOutputDecl`, this tracks all the + /// ways in which the pointer may be used (see `QPtrUsage`). + Usage(OrdAssertEq), +} + +impl From for Attr { + fn from(attr: QPtrAttr) -> Self { + Attr::QPtr(attr) + } +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum QPtrUsage { + /// Used to access one or more handles (i.e. optionally indexed by + /// [`QPtrOp::HandleArrayIndex`]), which can be: + /// - `Handle::Opaque(handle_type)`: all uses involve [`QPtrOp::Load`] or + /// [`QPtrAttr::ToSpvPtrInput`], with the common type `handle_type` + /// - `Handle::Buffer(data_usage)`: carries with it `data_usage`, i.e. the + /// usage of the memory that can be accessed through [`QPtrOp::BufferData`] + Handles(shapes::Handle), + + // FIXME(eddyb) unify terminology around "concrete"/"memory"/"untyped (data)". + Memory(QPtrMemUsage), +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct QPtrMemUsage { + /// If present, this is a worst-case upper bound on memory accesses that may + /// be performed through this pointer. + // + // FIXME(eddyb) use proper newtypes for byte amounts. + // + // FIXME(eddyb) suboptimal naming choice, but other options are too verbose, + // including maybe using `RangeTo<_>` to explicitly indicate "exclusive". + // + // FIXME(eddyb) consider renaming such information to "extent", but that might + // be ambiguous with an offset range (as opposed to min/max of *possible* + // `offset_range.end`, i.e. "size"). + pub max_size: Option, + + pub kind: QPtrMemUsageKind, +} + +impl QPtrMemUsage { + pub const UNUSED: Self = Self { + max_size: Some(0), + kind: QPtrMemUsageKind::Unused, + }; +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum QPtrMemUsageKind { + /// Not actually used, which could be caused by pointer offsetting operations + /// with unused results, or as an intermediary state during analyses. + Unused, + + // FIXME(eddyb) replace the two leaves with e.g. `Leaf(Type, QPtrMemLeafUsage)`. + // + // + // + /// Used as a typed pointer (e.g. via unknown SPIR-V instructions), requiring + /// a specific choice of pointee type which cannot be modified, and has to be + /// reused as-is when lifting `QPtr`s back to typed pointers. + /// + /// Other overlapping uses can be merged into this one as long as they can + /// be fully expressed using the (transitive) components of this type. + StrictlyTyped(Type), + + /// Used directly to access memory (e.g. [`QPtrOp::Load`], [`QPtrOp::Store`]), + /// which can be decomposed as necessary (down to individual scalar leaves), + /// to allow maximal merging opportunities. + // + // FIXME(eddyb) track whether `Load`s and/or `Store`s are used, so that we + // can infer `NonWritable`/`NonReadable` annotations as well. + DirectAccess(Type), + + /// Used as a common base for (constant) offsetting, which requires it to have + /// its own (aggregate) type, when lifting `QPtr`s back to typed pointers. + OffsetBase(Rc>), + + /// Used as a common base for (dynamic) offsetting, which requires it to have + /// its own (array) type, when lifting `QPtr`s back to typed pointers, with + /// one single element type being repeated across the entire size. + DynOffsetBase { + // FIXME(eddyb) this feels inefficient. + element: Rc, + stride: NonZeroU32, + }, + // FIXME(eddyb) consider adding an `Union` case for driving legalization. +} + +/// `QPtr`-specific operations ([`DataInstKind::QPtr`]). +#[derive(Clone, PartialEq, Eq)] +pub enum QPtrOp { + // HACK(eddyb) `OpVariable` replacement, which itself should not be kept as + // a `SpvInst` - once fn-local variables are lowered, this should go there. + FuncLocalVar(shapes::MemLayout), + + /// Adjust a **handle array** `QPtr` (`inputs[0]`), by selecting the handle + /// at the index (`inputs[1]`) from the handle array (i.e. the resulting + /// `QPtr` is limited to that one handle and can't be further "moved around"). + // + // FIXME(eddyb) this could maybe use `DynOffset`, if `stride` is changed to + // be `enum { Handle, Bytes(u32) }`, but that feels a bit too much? + HandleArrayIndex, + + /// Get a **memory** `QPtr` pointing at the contents of the buffer whose + /// handle is (implicitly) loaded from a **handle** `QPtr` (`inputs[0]`). + // + // FIXME(eddyb) should buffers be a `Type` of their own, that can be loaded + // from a handle `QPtr`, and then has data pointer / length ops *on that*? + BufferData, + + /// Get the length of the buffer whose handle is (implicitly) loaded from a + /// **handle** `QPtr` (`inputs[0]`), converted to a count of "dynamic units" + /// (as per [`shapes::MaybeDynMemLayout`]) by subtracting `fixed_base_size`, + /// then dividing by `dyn_unit_stride`. + // + // FIXME(eddyb) should this handle _only_ "length in bytes", with additional + // integer subtraction+division operations on lowering to `QPtr`, and then + // multiplication+addition on lifting back to SPIR-V, followed by simplifying + // the redundant `(x * a + b - b) / a` to just `x`? + // + // FIXME(eddyb) actually lower `OpArrayLength` to this! + BufferDynLen { + fixed_base_size: u32, + dyn_unit_stride: NonZeroU32, + }, + + /// Adjust a **memory** `QPtr` (`inputs[0]`), by adding a (signed) immediate + /// amount of bytes to its "address" (whether physical or conceptual). + // + // FIXME(eddyb) some kind of `inbounds` would be very useful here, up to and + // including "capability slicing" to limit the usable range of the output. + Offset(i32), + + /// Adjust a **memory** `QPtr` (`inputs[0]`), by adding a (signed) dynamic + /// "index" (`inputs[1]`), multiplied by `stride` (bytes per element), + /// to its "address" (whether physical or conceptual). + DynOffset { + stride: NonZeroU32, + + /// Bounds on the dynamic "index" (`inputs[1]`). + // + // FIXME(eddyb) should this be an attribute/refinement? + index_bounds: Option>, + }, + + /// Read a single value from a `QPtr` (`inputs[0]`). + // + // FIXME(eddyb) limit this to memory, and scalars, maybe vectors at most. + Load, + + /// Write a single value (`inputs[1]`) to a `QPtr` (`inputs[0]`). + // + // FIXME(eddyb) limit this to memory, and scalars, maybe vectors at most. + Store, + // + // FIXME(eddyb) implement more ops! at the very least copying! + // (and lowering could ignore pointercasts, I guess?) +} + +impl From for DataInstKind { + fn from(op: QPtrOp) -> Self { + DataInstKind::QPtr(op) + } +} diff --git a/src/qptr/shapes.rs b/src/qptr/shapes.rs new file mode 100644 index 0000000..7c6b965 --- /dev/null +++ b/src/qptr/shapes.rs @@ -0,0 +1,105 @@ +//! Variable shapes (untyped memory layouts vs abstract resources). +// +// FIXME(eddyb) does this need its own module still? + +use crate::{AddrSpace, Type}; +use std::num::NonZeroU32; + +/// `GlobalVar`s are currently used for both chunks of plain data (i.e. memory), +/// and the "shader interface" (inherited by `Shader` SPIR-V from GLSL, whereas +/// `Kernel` SPIR-V ended up with `OpenCL`'s "resources are passed to entry-points +/// as regular function arguments", with `BuiltIn`+`Input` as a sole exception). +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum GlobalVarShape { + /// One or more (i.e. optionally arrayed) "abstract resource" `Handle`s + /// (see `Handle` documentation for more on what it can represent). + /// + /// The single handle case is equivalent to a length `1` array of handles, + /// and as such is represented by having `fixed_count` be `Some(1)`. + Handles { + handle: Handle, + fixed_count: Option, + }, + + // FIXME(eddyb) unify terminology around "concrete"/"memory"/"untyped (data)". + UntypedData(MemLayout), + + /// Non-memory pipeline interface, which must keep the exact original type, + /// even if that type is concrete and could be handled just like memory. + /// + /// Typically `Input` or `Output`, but extensions (e.g. ray-tracing) may add + /// more such interface storage classes with strict type requirements. + // + // FIXME(eddyb) consider replacing this with by-value entry-point args/return + // (though that would not solve some of the weirder ones). + TypedInterface(Type), +} + +/// "Abstract resource" handle, that can be found in non-memory `GlobalVar`s. +/// +/// This largely corresponds to the Vulkan concept of a "descriptor", and arrays +/// of handles (e.g. `GlobalVarShape::Handles` with `fixed_count != Some(1)`) +/// map to the "descriptor indexing" usecase. +// +// FIXME(eddyb) consider implementing "descriptor indexing" more like HLSL's +// "resource heap" (with types only specified at use sites, "casts" almost). +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub enum Handle { + /// Fully opaque resources (e.g. samplers, images). + Opaque(Type), + + /// Buffer resources, describing ranges of (technically) untyped memory in + /// some address space (e.g. `Uniform`, `StorageBuffer`), but being limited + /// by SPIR-V logical addressing (unlike e.g. `PhysicalStorageBuffer`). + /// + /// SPIR-V makes this particularly painful, through a couple of design flaws: + /// - forcing a static type (for the buffer contents) and disallowing any + /// pointer casts, despite the fact that any plausible representation for + /// "logical pointer into a buffer" (e.g. `(BufferDescriptor, Offset)`) + /// must be *fundamentally* untyped (as it must allow access to relatively + /// large amounts of memory, and also support dynamic array indexing), + /// even when not a "GPU memory address" (like `PhysicalStorageBuffer`) + /// - encoding the buffer type using a (GLSL-style) "interface block", where + /// instead of a special type (or a pointer with the right storage class), + /// an `OpTypeStruct` (having the statically typed buffer contents as fields) + /// with the `Block` decoration is used, and then this "interface block" + /// type can be further nested in `OpTypeArray` or `OpTypeRuntimeArray` + /// to allow descriptor indexing - which leads to constructs like a GLSL + /// `buffer { uint data[]; } bufs[];` being encoded with two levels of + /// `OpTypeRuntimeArray`, separated not by any explicit indirection, but + /// only by the `Block` decoration on the `OpTypeStruct` for `buffer {...}` + // + // FIXME(eddyb) should `PushConstant` use `GlobalVarShape::UntypedData` + // instead of being treated like a buffer? + // + // FIXME(eddyb) should this be a `Type` of its own, that can be loaded from + // a handle `QPtr`, and then has data pointer / length ops *on that*? + Buffer(AddrSpace, BL), +} + +/// Untyped memory shape with constant alignment and size. +/// +/// `align`/`legacy_align` correspond to "scalar"/"base" alignments in Vulkan, +/// and are both kept track of to detect ambiguity in implicit layouts, e.g. +/// field offsets when the `Offset` decoration isn't being used. +/// Note, however, that `legacy_align` can be raised to "extended" alignment, +/// or completeley ignored, using [`LayoutConfig`](crate::qptr::LayoutConfig). +/// +/// Only `align` is *required*, that is `size % align == 0` must be always enforced. +// +// FIXME(eddyb) consider supporting specialization-constant-length arrays. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct MemLayout { + // FIXME(eddyb) use proper newtypes (and log2 for align!). + pub align: u32, + pub legacy_align: u32, + pub size: u32, +} + +/// Untyped memory shape with constant alignment but potentially-dynamic size, +/// roughly corresponding to a Rust `(FixedBase, [DynUnit])` type's layout. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct MaybeDynMemLayout { + pub fixed_base: MemLayout, + pub dyn_unit_stride: Option, +} diff --git a/src/spv/lift.rs b/src/spv/lift.rs index d9aa8bc..f3d08df 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -122,6 +122,12 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { } let ty_def = &self.cx[ty]; match ty_def.ctor { + // FIXME(eddyb) this should be a proper `Result`-based error instead, + // and/or `spv::lift` should mutate the module for legalization. + TypeCtor::QPtr => { + unreachable!("`TypeCtor::QPtr` should be legalized away before lifting"); + } + TypeCtor::SpvInst(_) => {} TypeCtor::SpvStringLiteralForExtInst => { unreachable!( @@ -201,16 +207,26 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { } fn visit_attr(&mut self, attr: &Attr) { match *attr { - Attr::Diagnostics(_) | Attr::SpvAnnotation { .. } | Attr::SpvBitflagsOperand(_) => {} + Attr::Diagnostics(_) + | Attr::QPtr(_) + | Attr::SpvAnnotation { .. } + | Attr::SpvBitflagsOperand(_) => {} Attr::SpvDebugLine { file_path, .. } => { self.debug_strings.insert(&self.cx[file_path.0]); } } + attr.inner_visit_with(self); } fn visit_data_inst_def(&mut self, data_inst_def: &DataInstDef) { #[allow(clippy::match_same_arms)] match data_inst_def.kind { + // FIXME(eddyb) this should be a proper `Result`-based error instead, + // and/or `spv::lift` should mutate the module for legalization. + DataInstKind::QPtr(_) => { + unreachable!("`DataInstKind::QPtr` should be legalized away before lifting"); + } + DataInstKind::FuncCall(_) => {} DataInstKind::SpvInst(_) => {} @@ -1192,7 +1208,7 @@ impl LazyInst<'_, '_> { }, // Not inserted into `globals` while visiting. - TypeCtor::SpvStringLiteralForExtInst => unreachable!(), + TypeCtor::QPtr | TypeCtor::SpvStringLiteralForExtInst => unreachable!(), } } Global::Const(ct) => { @@ -1207,6 +1223,11 @@ impl LazyInst<'_, '_> { assert!(ct_def.ty == gv_decl.type_of_ptr_to); let storage_class = match gv_decl.addr_space { + AddrSpace::Handles => { + unreachable!( + "`AddrSpace::Handles` should be legalized away before lifting" + ); + } AddrSpace::SpvStorageClass(sc) => { spv::Imm::Short(wk.StorageClass, sc) } @@ -1306,6 +1327,9 @@ impl LazyInst<'_, '_> { data_inst_def, } => { let (inst, extra_initial_id_operand) = match &data_inst_def.kind { + // Disallowed while visiting. + DataInstKind::QPtr(_) => unreachable!(), + &DataInstKind::FuncCall(callee) => { (wk.OpFunctionCall.into(), Some(ids.funcs[&callee].func_id)) } @@ -1620,6 +1644,7 @@ impl Module { for attr in cx[attrs].attrs.iter() { match attr { Attr::Diagnostics(_) + | Attr::QPtr(_) | Attr::SpvDebugLine { .. } | Attr::SpvBitflagsOperand(_) => {} Attr::SpvAnnotation(inst @ spv::Inst { opcode, .. }) => { @@ -1778,6 +1803,9 @@ impl Module { })?; while !contents_rest.is_empty() { + // FIXME(eddyb) test with UTF-8! this `split_at` should + // actually take *less* than the full possible size, to + // avoid cutting a UTF-8 sequence. let (cont_chunk, rest) = contents_rest .split_at(contents_rest.len().min(MAX_OP_SOURCE_CONT_CONTENTS_LEN)); contents_rest = rest; diff --git a/src/spv/lower.rs b/src/spv/lower.rs index 26a9d78..104b9be 100644 --- a/src/spv/lower.rs +++ b/src/spv/lower.rs @@ -221,7 +221,7 @@ impl Module { &[spv::Imm::Short(l_kind, line), spv::Imm::Short(c_kind, col)], &[file_path_id], ) => { - assert!([l_kind, c_kind] == [wk.LiteralInteger; 2]); + assert_eq!([l_kind, c_kind], [wk.LiteralInteger; 2]); let file_path = match id_defs.get(&file_path_id) { Some(&IdDef::SpvDebugString(s)) => s, _ => { @@ -307,7 +307,7 @@ impl Module { assert!(inst.result_type_id.is_none() && inst.result_id.is_none()); let cap = match (&inst.imms[..], &inst.ids[..]) { (&[spv::Imm::Short(kind, cap)], &[]) => { - assert!(kind == wk.Capability); + assert_eq!(kind, wk.Capability); cap } _ => unreachable!(), @@ -349,7 +349,7 @@ impl Module { assert!(inst.result_type_id.is_none() && inst.result_id.is_none()); let (addressing_model, memory_model) = match (&inst.imms[..], &inst.ids[..]) { (&[spv::Imm::Short(am_kind, am), spv::Imm::Short(mm_kind, mm)], &[]) => { - assert!(am_kind == wk.AddressingModel && mm_kind == wk.MemoryModel); + assert_eq!([am_kind, mm_kind], [wk.AddressingModel, wk.MemoryModel]); (am, mm) } _ => unreachable!(), @@ -387,7 +387,7 @@ impl Module { spv::Imm::Short(v_kind, version), .., ] => { - assert!(l_kind == wk.SourceLanguage && v_kind == wk.LiteralInteger); + assert_eq!([l_kind, v_kind], [wk.SourceLanguage, wk.LiteralInteger]); (lang, version) } _ => unreachable!(), @@ -649,7 +649,7 @@ impl Module { let storage_class = match inst.imms[..] { [spv::Imm::Short(kind, storage_class)] => { - assert!(kind == wk.StorageClass); + assert_eq!(kind, wk.StorageClass); storage_class } _ => unreachable!(), @@ -691,6 +691,7 @@ impl Module { GlobalVarDecl { attrs: mem::take(&mut attrs), type_of_ptr_to: type_of_ptr_to_global_var, + shape: None, addr_space: AddrSpace::SpvStorageClass(storage_class), def, }, @@ -1349,7 +1350,7 @@ impl Module { let inst = match imms[..] { [spv::Imm::Short(kind, inst)] => { - assert!(kind == wk.LiteralExtInstInteger); + assert_eq!(kind, wk.LiteralExtInstInteger); inst } _ => unreachable!(), diff --git a/src/spv/mod.rs b/src/spv/mod.rs index 2067e22..d5e87e3 100644 --- a/src/spv/mod.rs +++ b/src/spv/mod.rs @@ -120,10 +120,10 @@ fn extract_literal_string(imms: &[Imm]) -> Result { let mut words = match *imms { [Imm::Short(kind, first_word)] | [Imm::LongStart(kind, first_word), ..] => { - assert!(kind == wk.LiteralString); + assert_eq!(kind, wk.LiteralString); iter::once(first_word).chain(imms[1..].iter().map(|&imm| match imm { Imm::LongCont(kind, word) => { - assert!(kind == wk.LiteralString); + assert_eq!(kind, wk.LiteralString); word } _ => unreachable!(), diff --git a/src/spv/print.rs b/src/spv/print.rs index 9c5da07..6d7e239 100644 --- a/src/spv/print.rs +++ b/src/spv/print.rs @@ -114,7 +114,7 @@ impl, ID, IDS: Iterator> OperandPrint words.push(first_word); while let Some(&spv::Imm::LongCont(cont_kind, word)) = self.imms.peek() { self.imms.next(); - assert!(kind == cont_kind); + assert_eq!(kind, cont_kind); words.push(word); } @@ -169,7 +169,7 @@ impl, ID, IDS: Iterator> OperandPrint let mut maybe_get_enum_word = || match self.imms.next() { Some(spv::Imm::Short(found_kind, word)) => { - assert!(kind == found_kind); + assert_eq!(kind, found_kind); Some(word) } Some(spv::Imm::LongStart(..) | spv::Imm::LongCont(..)) => unreachable!(), @@ -235,7 +235,7 @@ impl, ID, IDS: Iterator> OperandPrint Some( spv::Imm::Short(found_kind, word) | spv::Imm::LongStart(found_kind, word), ) => { - assert!(kind == found_kind); + assert_eq!(kind, found_kind); self.literal(kind, word); } Some(spv::Imm::LongCont(..)) => unreachable!(), diff --git a/src/spv/read.rs b/src/spv/read.rs index ebd8cfa..6b29e49 100644 --- a/src/spv/read.rs +++ b/src/spv/read.rs @@ -362,7 +362,7 @@ impl Iterator for ModuleParser { let known_id_def = if opcode == wk.OpTypeInt { KnownIdDef::TypeInt(match inst.imms[0] { spv::Imm::Short(kind, n) => { - assert!(kind == wk.LiteralInteger); + assert_eq!(kind, wk.LiteralInteger); n.try_into() .ok() .ok_or_else(|| invalid("Width cannot be 0"))? @@ -372,7 +372,7 @@ impl Iterator for ModuleParser { } else if opcode == wk.OpTypeFloat { KnownIdDef::TypeFloat(match inst.imms[0] { spv::Imm::Short(kind, n) => { - assert!(kind == wk.LiteralInteger); + assert_eq!(kind, wk.LiteralInteger); n.try_into() .ok() .ok_or_else(|| invalid("Width cannot be 0"))? diff --git a/src/spv/spec.rs b/src/spv/spec.rs index 959348d..9b8ef61 100644 --- a/src/spv/spec.rs +++ b/src/spv/spec.rs @@ -3,7 +3,7 @@ use arrayvec::ArrayVec; use lazy_static::lazy_static; use rustc_hash::FxHashMap; -use std::iter; +use std::{fmt, iter}; use self::indexed::FlatIdx as _; @@ -85,9 +85,17 @@ def_well_known! { OpTypeInt, OpTypeFloat, OpTypeVector, + OpTypeMatrix, + OpTypeArray, + OpTypeRuntimeArray, + OpTypeStruct, OpTypeForwardPointer, OpTypePointer, OpTypeFunction, + OpTypeImage, + OpTypeSampler, + OpTypeSampledImage, + OpTypeAccelerationStructureKHR, OpConstantFalse, OpConstantTrue, @@ -113,6 +121,15 @@ def_well_known! { OpSwitch, OpFunctionCall, + + OpLoad, + OpStore, + OpArrayLength, + OpAccessChain, + OpInBoundsAccessChain, + OpPtrAccessChain, + OpInBoundsPtrAccessChain, + OpBitcast, ], operand_kind: OperandKind = [ Capability, @@ -129,13 +146,30 @@ def_well_known! { LiteralInteger, LiteralExtInstInteger, LiteralString, + LiteralContextDependentNumber, ], // FIXME(eddyb) find a way to namespace these to avoid conflicts. storage_class: u32 = [ Function, + + UniformConstant, + Input, + Output, + + IncomingRayPayloadKHR, + IncomingCallableDataKHR, + HitAttributeKHR, + RayPayloadKHR, + CallableDataKHR, ], decoration: u32 = [ LinkageAttributes, + + ArrayStride, + + Block, + RowMajor, + Offset, ], linkage_type: u32 = [ Import, @@ -263,6 +297,12 @@ impl indexed::FlatIdx for OperandKind { } } +impl fmt::Debug for OperandKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "OperandKind({} => {:?})", self.0, self.name()) + } +} + impl OperandKind { /// Lookup the name & definition for this operand kind in the lazily-loaded [`Spec`]. #[inline] diff --git a/src/spv/write.rs b/src/spv/write.rs index 4a676ea..ff50515 100644 --- a/src/spv/write.rs +++ b/src/spv/write.rs @@ -87,7 +87,7 @@ impl OperandEmitter<'_> { let mut get_enum_word = || match self.imms.next() { Some(spv::Imm::Short(found_kind, word)) => { - assert!(kind == found_kind); + assert_eq!(kind, found_kind); Ok(word) } Some(spv::Imm::LongStart(..) | spv::Imm::LongCont(..)) => unreachable!(), @@ -123,17 +123,17 @@ impl OperandEmitter<'_> { spec::OperandKindDef::Literal { .. } => { match self.imms.next().ok_or(Error::NotEnoughImms)? { spv::Imm::Short(found_kind, word) => { - assert!(kind == found_kind); + assert_eq!(kind, found_kind); self.out.push(word); } spv::Imm::LongStart(found_kind, word) => { - assert!(kind == found_kind); + assert_eq!(kind, found_kind); self.out.push(word); while let Some(spv::Imm::LongCont(cont_kind, word)) = self.imms.clone().next() { self.imms.next(); - assert!(kind == cont_kind); + assert_eq!(kind, cont_kind); self.out.push(word); } } diff --git a/src/transform.rs b/src/transform.rs index e2b10c0..6f6a0dc 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -1,15 +1,17 @@ //! Mutable IR traversal. use crate::func_at::FuncAtMut; +use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; use crate::{ cfg, spv, AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstCtor, ConstDef, ControlNode, ControlNodeDef, ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionDef, - ControlRegionInputDecl, DataInstDef, DataInstKind, DeclDef, EntityListIter, ExportKey, - Exportee, Func, FuncDecl, FuncDefBody, FuncParam, GlobalVar, GlobalVarDecl, GlobalVarDefBody, - Import, Module, ModuleDebugInfo, ModuleDialect, SelectionKind, Type, TypeCtor, TypeCtorArg, - TypeDef, Value, + ControlRegionInputDecl, DataInst, DataInstDef, DataInstKind, DeclDef, EntityListIter, + ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, GlobalVar, GlobalVarDecl, + GlobalVarDefBody, Import, Module, ModuleDebugInfo, ModuleDialect, OrdAssertEq, SelectionKind, + Type, TypeCtor, TypeCtorArg, TypeDef, Value, }; use std::cmp::Ordering; +use std::rc::Rc; use std::slice; /// The result of a transformation (which is not in-place). @@ -152,11 +154,6 @@ pub trait Transformer: Sized { Transformed::Unchanged } - // Leaves (noop default behavior). - fn transform_attr(&mut self, _attr: &Attr) -> Transformed { - Transformed::Unchanged - } - // Leaves transformed in-place (noop default behavior). fn in_place_transform_spv_dialect(&mut self, _dialect: &mut spv::Dialect) {} fn in_place_transform_spv_module_debug_info(&mut self, _debug_info: &mut spv::ModuleDebugInfo) { @@ -166,6 +163,9 @@ pub trait Transformer: Sized { fn transform_attr_set_def(&mut self, attrs_def: &AttrSetDef) -> Transformed { attrs_def.inner_transform_with(self) } + fn transform_attr(&mut self, attr: &Attr) -> Transformed { + attr.inner_transform_with(self) + } fn transform_type_def(&mut self, ty_def: &TypeDef) -> Transformed { ty_def.inner_transform_with(self) } @@ -199,8 +199,8 @@ pub trait Transformer: Sized { ) { func_at_control_node.inner_in_place_transform_with(self); } - fn in_place_transform_data_inst_def(&mut self, data_inst_def: &mut DataInstDef) { - data_inst_def.inner_in_place_transform_with(self); + fn in_place_transform_data_inst_def(&mut self, mut func_at_data_inst: FuncAtMut<'_, DataInst>) { + func_at_data_inst.inner_in_place_transform_with(self); } } @@ -328,6 +328,95 @@ impl InnerTransform for AttrSetDef { } } +impl InnerTransform for Attr { + fn inner_transform_with(&self, transformer: &mut impl Transformer) -> Transformed { + match self { + Attr::Diagnostics(_) + | Attr::SpvAnnotation(_) + | Attr::SpvDebugLine { .. } + | Attr::SpvBitflagsOperand(_) => Transformed::Unchanged, + + Attr::QPtr(attr) => transform!({ + attr -> match attr { + &QPtrAttr::ToSpvPtrInput { input_idx, pointee } => transform!({ + pointee -> transformer.transform_type_use(pointee.0).map(OrdAssertEq), + } => QPtrAttr::ToSpvPtrInput { input_idx, pointee }), + + &QPtrAttr::FromSpvPtrOutput { + addr_space, + pointee, + } => transform!({ + pointee -> transformer.transform_type_use(pointee.0).map(OrdAssertEq), + } => QPtrAttr::FromSpvPtrOutput { + addr_space, + pointee, + }), + + QPtrAttr::Usage(OrdAssertEq(usage)) => transform!({ + usage -> match usage { + &QPtrUsage::Handles(qptr::shapes::Handle::Opaque(ty)) => transform!({ + ty -> transformer.transform_type_use(ty), + } => QPtrUsage::Handles(qptr::shapes::Handle::Opaque(ty))), + QPtrUsage::Handles(qptr::shapes::Handle::Buffer(addr_space, data_usage)) => transform!({ + data_usage -> data_usage.inner_transform_with(transformer), + } => QPtrUsage::Handles(qptr::shapes::Handle::Buffer(*addr_space, data_usage))), + QPtrUsage::Memory(usage) => transform!({ + usage -> usage.inner_transform_with(transformer), + } => QPtrUsage::Memory(usage)), + } + } => QPtrAttr::Usage(OrdAssertEq(usage))), + } + } => Attr::QPtr(attr)), + } + } +} + +// FIXME(eddyb) this should maybe be in a more general spot. +impl InnerTransform for Rc { + fn inner_transform_with(&self, transformer: &mut impl Transformer) -> Transformed { + (**self).inner_transform_with(transformer).map(Rc::new) + } +} + +impl InnerTransform for QPtrMemUsage { + fn inner_transform_with(&self, transformer: &mut impl Transformer) -> Transformed { + let Self { max_size, kind } = self; + + transform!({ + kind -> kind.inner_transform_with(transformer) + } => Self { + max_size: *max_size, + kind, + }) + } +} + +impl InnerTransform for QPtrMemUsageKind { + fn inner_transform_with(&self, transformer: &mut impl Transformer) -> Transformed { + match self { + Self::Unused => Transformed::Unchanged, + &Self::StrictlyTyped(ty) => transform!({ + ty -> transformer.transform_type_use(ty), + } => Self::StrictlyTyped(ty)), + &Self::DirectAccess(ty) => transform!({ + ty -> transformer.transform_type_use(ty), + } => Self::DirectAccess(ty)), + Self::OffsetBase(entries) => transform!({ + entries -> Transformed::map_iter(entries.values(), |sub_usage| { + sub_usage.inner_transform_with(transformer) + }).map(|new_iter| { + // HACK(eddyb) this is a bit inefficient but `Transformed::map_iter` + // limits us here in how it handles the whole `Clone` thing. + entries.keys().copied().zip(new_iter).collect() + }).map(Rc::new) + } => Self::OffsetBase(entries)), + Self::DynOffsetBase { element, stride } => transform!({ + element -> element.inner_transform_with(transformer), + } => Self::DynOffsetBase { element, stride: *stride }), + } + } +} + impl InnerTransform for TypeDef { fn inner_transform_with(&self, transformer: &mut impl Transformer) -> Transformed { let Self { @@ -339,7 +428,8 @@ impl InnerTransform for TypeDef { transform!({ attrs -> transformer.transform_attr_set_use(*attrs), ctor -> match ctor { - TypeCtor::SpvInst(_) + TypeCtor::QPtr + | TypeCtor::SpvInst(_) | TypeCtor::SpvStringLiteralForExtInst => Transformed::Unchanged, }, ctor_args -> Transformed::map_iter(ctor_args.iter(), |arg| match *arg { @@ -408,6 +498,7 @@ impl InnerInPlaceTransform for GlobalVarDecl { let Self { attrs, type_of_ptr_to, + shape, addr_space, def, } = self; @@ -416,8 +507,17 @@ impl InnerInPlaceTransform for GlobalVarDecl { transformer .transform_type_use(*type_of_ptr_to) .apply_to(type_of_ptr_to); + if let Some(shape) = shape { + match shape { + qptr::shapes::GlobalVarShape::TypedInterface(ty) => { + transformer.transform_type_use(*ty).apply_to(ty); + } + qptr::shapes::GlobalVarShape::Handles { .. } + | qptr::shapes::GlobalVarShape::UntypedData(_) => {} + } + } match addr_space { - AddrSpace::SpvStorageClass(_) => {} + AddrSpace::Handles | AddrSpace::SpvStorageClass(_) => {} } def.inner_in_place_transform_with(transformer); } @@ -562,7 +662,7 @@ impl InnerInPlaceTransform for FuncAtMut<'_, ControlNode> { &mut ControlNodeKind::Block { insts } => { let mut func_at_inst_iter = self.reborrow().at(insts).into_iter(); while let Some(func_at_inst) = func_at_inst_iter.next() { - transformer.in_place_transform_data_inst_def(func_at_inst.def()); + transformer.in_place_transform_data_inst_def(func_at_inst); } } ControlNodeKind::Select { @@ -636,18 +736,28 @@ impl InnerTransform for ControlNodeOutputDecl { } } -impl InnerInPlaceTransform for DataInstDef { +impl InnerInPlaceTransform for FuncAtMut<'_, DataInst> { fn inner_in_place_transform_with(&mut self, transformer: &mut impl Transformer) { - let Self { + let DataInstDef { attrs, kind, output_type, inputs, - } = self; + } = self.reborrow().def(); transformer.transform_attr_set_use(*attrs).apply_to(attrs); match kind { DataInstKind::FuncCall(func) => transformer.transform_func_use(*func).apply_to(func), + DataInstKind::QPtr(op) => match op { + QPtrOp::FuncLocalVar(_) + | QPtrOp::HandleArrayIndex + | QPtrOp::BufferData + | QPtrOp::BufferDynLen { .. } + | QPtrOp::Offset(_) + | QPtrOp::DynOffset { .. } + | QPtrOp::Load + | QPtrOp::Store => {} + }, DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => {} } if let Some(ty) = output_type { diff --git a/src/visit.rs b/src/visit.rs index 05beb84..0d2d230 100644 --- a/src/visit.rs +++ b/src/visit.rs @@ -1,6 +1,7 @@ //! Immutable IR traversal. use crate::func_at::FuncAt; +use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; use crate::{ cfg, spv, AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstCtor, ConstDef, ControlNode, ControlNodeDef, ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionDef, @@ -27,7 +28,6 @@ pub trait Visitor<'a>: Sized { // Leaves (noop default behavior). fn visit_spv_dialect(&mut self, _dialect: &spv::Dialect) {} fn visit_spv_module_debug_info(&mut self, _debug_info: &spv::ModuleDebugInfo) {} - fn visit_attr(&mut self, _attr: &'a Attr) {} fn visit_import(&mut self, _import: &Import) {} // Non-leaves (defaulting to calling `.inner_visit_with(self)`). @@ -43,6 +43,9 @@ pub trait Visitor<'a>: Sized { fn visit_attr_set_def(&mut self, attrs_def: &'a AttrSetDef) { attrs_def.inner_visit_with(self); } + fn visit_attr(&mut self, attr: &'a Attr) { + attr.inner_visit_with(self); + } fn visit_type_def(&mut self, ty_def: &'a TypeDef) { ty_def.inner_visit_with(self); } @@ -114,12 +117,12 @@ impl_visit! { by_ref { visit_spv_dialect(spv::Dialect), visit_spv_module_debug_info(spv::ModuleDebugInfo), - visit_attr(Attr), visit_import(Import), visit_module(Module), visit_module_dialect(ModuleDialect), visit_module_debug_info(ModuleDebugInfo), visit_attr_set_def(AttrSetDef), + visit_attr(Attr), visit_type_def(TypeDef), visit_const_def(ConstDef), visit_global_var_decl(GlobalVarDecl), @@ -251,12 +254,25 @@ impl InnerVisit for AttrSetDef { } impl InnerVisit for Attr { - fn inner_visit_with<'a>(&'a self, _visitor: &mut impl Visitor<'a>) { + fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { match self { Attr::Diagnostics(_) | Attr::SpvAnnotation(_) | Attr::SpvDebugLine { .. } | Attr::SpvBitflagsOperand(_) => {} + + Attr::QPtr(attr) => match attr { + QPtrAttr::ToSpvPtrInput { + input_idx: _, + pointee, + } + | QPtrAttr::FromSpvPtrOutput { + addr_space: _, + pointee, + } => visitor.visit_type_use(pointee.0), + + QPtrAttr::Usage(usage) => usage.0.inner_visit_with(visitor), + }, } } } @@ -271,6 +287,47 @@ impl InnerVisit for Vec { &DiagMsgPart::Attrs(attrs) => visitor.visit_attr_set_use(attrs), &DiagMsgPart::Type(ty) => visitor.visit_type_use(ty), &DiagMsgPart::Const(ct) => visitor.visit_const_use(ct), + DiagMsgPart::QPtrUsage(usage) => usage.inner_visit_with(visitor), + } + } + } +} + +impl InnerVisit for QPtrUsage { + fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { + match self { + &QPtrUsage::Handles(qptr::shapes::Handle::Opaque(ty)) => { + visitor.visit_type_use(ty); + } + QPtrUsage::Handles(qptr::shapes::Handle::Buffer(_, data_usage)) => { + data_usage.inner_visit_with(visitor); + } + QPtrUsage::Memory(usage) => usage.inner_visit_with(visitor), + } + } +} + +impl InnerVisit for QPtrMemUsage { + fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { + let Self { max_size: _, kind } = self; + kind.inner_visit_with(visitor); + } +} + +impl InnerVisit for QPtrMemUsageKind { + fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { + match self { + Self::Unused => {} + &Self::StrictlyTyped(ty) | &Self::DirectAccess(ty) => { + visitor.visit_type_use(ty); + } + Self::OffsetBase(entries) => { + for sub_usage in entries.values() { + sub_usage.inner_visit_with(visitor); + } + } + Self::DynOffsetBase { element, stride: _ } => { + element.inner_visit_with(visitor); } } } @@ -286,7 +343,7 @@ impl InnerVisit for TypeDef { visitor.visit_attr_set_use(*attrs); match ctor { - TypeCtor::SpvInst(_) | TypeCtor::SpvStringLiteralForExtInst => {} + TypeCtor::QPtr | TypeCtor::SpvInst(_) | TypeCtor::SpvStringLiteralForExtInst => {} } for &arg in ctor_args { match arg { @@ -332,14 +389,22 @@ impl InnerVisit for GlobalVarDecl { let Self { attrs, type_of_ptr_to, + shape, addr_space, def, } = self; visitor.visit_attr_set_use(*attrs); visitor.visit_type_use(*type_of_ptr_to); + if let Some(shape) = shape { + match shape { + qptr::shapes::GlobalVarShape::TypedInterface(ty) => visitor.visit_type_use(*ty), + qptr::shapes::GlobalVarShape::Handles { .. } + | qptr::shapes::GlobalVarShape::UntypedData(_) => {} + } + } match addr_space { - AddrSpace::SpvStorageClass(_) => {} + AddrSpace::Handles | AddrSpace::SpvStorageClass(_) => {} } def.inner_visit_with(visitor); } @@ -441,7 +506,7 @@ impl<'a> FuncAt<'a, EntityListIter> { // FIXME(eddyb) this can't implement `InnerVisit` because of the `&'a self` // requirement, whereas this has `'a` in `self: FuncAt<'a, ControlNode>`. impl<'a> FuncAt<'a, ControlNode> { - fn inner_visit_with(self, visitor: &mut impl Visitor<'a>) { + pub fn inner_visit_with(self, visitor: &mut impl Visitor<'a>) { let ControlNodeDef { kind, outputs } = self.def(); match kind { @@ -497,8 +562,18 @@ impl InnerVisit for DataInstDef { } = self; visitor.visit_attr_set_use(*attrs); - match *kind { - DataInstKind::FuncCall(func) => visitor.visit_func_use(func), + match kind { + &DataInstKind::FuncCall(func) => visitor.visit_func_use(func), + DataInstKind::QPtr(op) => match *op { + QPtrOp::FuncLocalVar(_) + | QPtrOp::HandleArrayIndex + | QPtrOp::BufferData + | QPtrOp::BufferDynLen { .. } + | QPtrOp::Offset(_) + | QPtrOp::DynOffset { .. } + | QPtrOp::Load + | QPtrOp::Store => {} + }, DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => {} } if let Some(ty) = *output_type {