-
Notifications
You must be signed in to change notification settings - Fork 984
/
Copy pathwriter.rs
6152 lines (5817 loc) · 257 KB
/
writer.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
use super::{sampler as sm, Error, LocationMode, Options, PipelineOptions, TranslationInfo};
use crate::{
arena::{Handle, HandleSet},
back::{self, Baked},
proc::index,
proc::{self, NameKey, TypeResolution},
valid, FastHashMap, FastHashSet,
};
#[cfg(test)]
use std::ptr;
use std::{
fmt::{Display, Error as FmtError, Formatter, Write},
iter,
};
/// Shorthand result used internally by the backend
type BackendResult = Result<(), Error>;
const NAMESPACE: &str = "metal";
// The name of the array member of the Metal struct types we generate to
// represent Naga `Array` types. See the comments in `Writer::write_type_defs`
// for details.
const WRAPPED_ARRAY_FIELD: &str = "inner";
// This is a hack: we need to pass a pointer to an atomic,
// but generally the backend isn't putting "&" in front of every pointer.
// Some more general handling of pointers is needed to be implemented here.
const ATOMIC_REFERENCE: &str = "&";
const RT_NAMESPACE: &str = "metal::raytracing";
const RAY_QUERY_TYPE: &str = "_RayQuery";
const RAY_QUERY_FIELD_INTERSECTOR: &str = "intersector";
const RAY_QUERY_FIELD_INTERSECTION: &str = "intersection";
const RAY_QUERY_FIELD_READY: &str = "ready";
const RAY_QUERY_FUN_MAP_INTERSECTION: &str = "_map_intersection_type";
pub(crate) const ATOMIC_COMP_EXCH_FUNCTION: &str = "naga_atomic_compare_exchange_weak_explicit";
pub(crate) const MODF_FUNCTION: &str = "naga_modf";
pub(crate) const FREXP_FUNCTION: &str = "naga_frexp";
/// Write the Metal name for a Naga numeric type: scalar, vector, or matrix.
///
/// The `sizes` slice determines whether this function writes a
/// scalar, vector, or matrix type:
///
/// - An empty slice produces a scalar type.
/// - A one-element slice produces a vector type.
/// - A two element slice `[ROWS COLUMNS]` produces a matrix of the given size.
fn put_numeric_type(
out: &mut impl Write,
scalar: crate::Scalar,
sizes: &[crate::VectorSize],
) -> Result<(), FmtError> {
match (scalar, sizes) {
(scalar, &[]) => {
write!(out, "{}", scalar.to_msl_name())
}
(scalar, &[rows]) => {
write!(
out,
"{}::{}{}",
NAMESPACE,
scalar.to_msl_name(),
back::vector_size_str(rows)
)
}
(scalar, &[rows, columns]) => {
write!(
out,
"{}::{}{}x{}",
NAMESPACE,
scalar.to_msl_name(),
back::vector_size_str(columns),
back::vector_size_str(rows)
)
}
(_, _) => Ok(()), // not meaningful
}
}
const fn scalar_is_int(scalar: crate::Scalar) -> bool {
use crate::ScalarKind::*;
match scalar.kind {
Sint | Uint | AbstractInt | Bool => true,
Float | AbstractFloat => false,
}
}
/// Prefix for cached clamped level-of-detail values for `ImageLoad` expressions.
const CLAMPED_LOD_LOAD_PREFIX: &str = "clamped_lod_e";
/// Wrapper for identifier names for clamped level-of-detail values
///
/// Values of this type implement [`std::fmt::Display`], formatting as
/// the name of the variable used to hold the cached clamped
/// level-of-detail value for an `ImageLoad` expression.
struct ClampedLod(Handle<crate::Expression>);
impl Display for ClampedLod {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.write_prefixed(f, CLAMPED_LOD_LOAD_PREFIX)
}
}
/// Wrapper for generating `struct _mslBufferSizes` member names for
/// runtime-sized array lengths.
///
/// On Metal, `wgpu_hal` passes the element counts for all runtime-sized arrays
/// as an argument to the entry point. This argument's type in the MSL is
/// `struct _mslBufferSizes`, a Naga-synthesized struct with a `uint` member for
/// each global variable containing a runtime-sized array.
///
/// If `global` is a [`Handle`] for a [`GlobalVariable`] that contains a
/// runtime-sized array, then the value `ArraySize(global)` implements
/// [`std::fmt::Display`], formatting as the name of the struct member carrying
/// the number of elements in that runtime-sized array.
///
/// [`GlobalVariable`]: crate::GlobalVariable
struct ArraySizeMember(Handle<crate::GlobalVariable>);
impl Display for ArraySizeMember {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.write_prefixed(f, "size")
}
}
struct TypeContext<'a> {
handle: Handle<crate::Type>,
gctx: proc::GlobalCtx<'a>,
names: &'a FastHashMap<NameKey, String>,
access: crate::StorageAccess,
binding: Option<&'a super::ResolvedBinding>,
first_time: bool,
}
impl TypeContext<'_> {
fn scalar(&self) -> Option<crate::Scalar> {
let ty = &self.gctx.types[self.handle];
ty.inner.scalar()
}
fn vertex_input_dimension(&self) -> u32 {
let ty = &self.gctx.types[self.handle];
match ty.inner {
crate::TypeInner::Scalar(_) => 1,
crate::TypeInner::Vector { size, .. } => size as u32,
_ => unreachable!(),
}
}
}
impl Display for TypeContext<'_> {
fn fmt(&self, out: &mut Formatter<'_>) -> Result<(), FmtError> {
let ty = &self.gctx.types[self.handle];
if ty.needs_alias() && !self.first_time {
let name = &self.names[&NameKey::Type(self.handle)];
return write!(out, "{name}");
}
match ty.inner {
crate::TypeInner::Scalar(scalar) => put_numeric_type(out, scalar, &[]),
crate::TypeInner::Atomic(scalar) => {
write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name())
}
crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]),
crate::TypeInner::Matrix { columns, rows, .. } => {
put_numeric_type(out, crate::Scalar::F32, &[rows, columns])
}
crate::TypeInner::Pointer { base, space } => {
let sub = Self {
handle: base,
first_time: false,
..*self
};
let space_name = match space.to_msl_name() {
Some(name) => name,
None => return Ok(()),
};
write!(out, "{space_name} {sub}&")
}
crate::TypeInner::ValuePointer {
size,
scalar,
space,
} => {
match space.to_msl_name() {
Some(name) => write!(out, "{name} ")?,
None => return Ok(()),
};
match size {
Some(rows) => put_numeric_type(out, scalar, &[rows])?,
None => put_numeric_type(out, scalar, &[])?,
};
write!(out, "&")
}
crate::TypeInner::Array { base, .. } => {
let sub = Self {
handle: base,
first_time: false,
..*self
};
// Array lengths go at the end of the type definition,
// so just print the element type here.
write!(out, "{sub}")
}
crate::TypeInner::Struct { .. } => unreachable!(),
crate::TypeInner::Image {
dim,
arrayed,
class,
} => {
let dim_str = match dim {
crate::ImageDimension::D1 => "1d",
crate::ImageDimension::D2 => "2d",
crate::ImageDimension::D3 => "3d",
crate::ImageDimension::Cube => "cube",
};
let (texture_str, msaa_str, scalar, access) = match class {
crate::ImageClass::Sampled { kind, multi } => {
let (msaa_str, access) = if multi {
("_ms", "read")
} else {
("", "sample")
};
let scalar = crate::Scalar { kind, width: 4 };
("texture", msaa_str, scalar, access)
}
crate::ImageClass::Depth { multi } => {
let (msaa_str, access) = if multi {
("_ms", "read")
} else {
("", "sample")
};
let scalar = crate::Scalar {
kind: crate::ScalarKind::Float,
width: 4,
};
("depth", msaa_str, scalar, access)
}
crate::ImageClass::Storage { format, .. } => {
let access = if self
.access
.contains(crate::StorageAccess::LOAD | crate::StorageAccess::STORE)
{
"read_write"
} else if self.access.contains(crate::StorageAccess::STORE) {
"write"
} else if self.access.contains(crate::StorageAccess::LOAD) {
"read"
} else {
log::warn!(
"Storage access for {:?} (name '{}'): {:?}",
self.handle,
ty.name.as_deref().unwrap_or_default(),
self.access
);
unreachable!("module is not valid");
};
("texture", "", format.into(), access)
}
};
let base_name = scalar.to_msl_name();
let array_str = if arrayed { "_array" } else { "" };
write!(
out,
"{NAMESPACE}::{texture_str}{dim_str}{msaa_str}{array_str}<{base_name}, {NAMESPACE}::access::{access}>",
)
}
crate::TypeInner::Sampler { comparison: _ } => {
write!(out, "{NAMESPACE}::sampler")
}
crate::TypeInner::AccelerationStructure => {
write!(out, "{RT_NAMESPACE}::instance_acceleration_structure")
}
crate::TypeInner::RayQuery => {
write!(out, "{RAY_QUERY_TYPE}")
}
crate::TypeInner::BindingArray { base, size } => {
let base_tyname = Self {
handle: base,
first_time: false,
..*self
};
if let Some(&super::ResolvedBinding::Resource(super::BindTarget {
binding_array_size: Some(override_size),
..
})) = self.binding
{
write!(out, "{NAMESPACE}::array<{base_tyname}, {override_size}>")
} else if let crate::ArraySize::Constant(size) = size {
write!(out, "{NAMESPACE}::array<{base_tyname}, {size}>")
} else {
unreachable!("metal requires all arrays be constant sized");
}
}
}
}
}
struct TypedGlobalVariable<'a> {
module: &'a crate::Module,
names: &'a FastHashMap<NameKey, String>,
handle: Handle<crate::GlobalVariable>,
usage: valid::GlobalUse,
binding: Option<&'a super::ResolvedBinding>,
reference: bool,
}
impl TypedGlobalVariable<'_> {
fn try_fmt<W: Write>(&self, out: &mut W) -> BackendResult {
let var = &self.module.global_variables[self.handle];
let name = &self.names[&NameKey::GlobalVariable(self.handle)];
let storage_access = match var.space {
crate::AddressSpace::Storage { access } => access,
_ => match self.module.types[var.ty].inner {
crate::TypeInner::Image {
class: crate::ImageClass::Storage { access, .. },
..
} => access,
crate::TypeInner::BindingArray { base, .. } => {
match self.module.types[base].inner {
crate::TypeInner::Image {
class: crate::ImageClass::Storage { access, .. },
..
} => access,
_ => crate::StorageAccess::default(),
}
}
_ => crate::StorageAccess::default(),
},
};
let ty_name = TypeContext {
handle: var.ty,
gctx: self.module.to_ctx(),
names: self.names,
access: storage_access,
binding: self.binding,
first_time: false,
};
let (space, access, reference) = match var.space.to_msl_name() {
Some(space) if self.reference => {
let access = if var.space.needs_access_qualifier()
&& !self.usage.contains(valid::GlobalUse::WRITE)
{
"const"
} else {
""
};
(space, access, "&")
}
_ => ("", "", ""),
};
Ok(write!(
out,
"{}{}{}{}{}{} {}",
space,
if space.is_empty() { "" } else { " " },
ty_name,
if access.is_empty() { "" } else { " " },
access,
reference,
name,
)?)
}
}
pub struct Writer<W> {
out: W,
names: FastHashMap<NameKey, String>,
named_expressions: crate::NamedExpressions,
/// Set of expressions that need to be baked to avoid unnecessary repetition in output
need_bake_expressions: back::NeedBakeExpressions,
namer: proc::Namer,
#[cfg(test)]
put_expression_stack_pointers: FastHashSet<*const ()>,
#[cfg(test)]
put_block_stack_pointers: FastHashSet<*const ()>,
/// Set of (struct type, struct field index) denoting which fields require
/// padding inserted **before** them (i.e. between fields at index - 1 and index)
struct_member_pads: FastHashSet<(Handle<crate::Type>, u32)>,
/// Name of the force-bounded-loop macro.
///
/// See `emit_force_bounded_loop_macro` for details.
force_bounded_loop_macro_name: String,
}
impl crate::Scalar {
fn to_msl_name(self) -> &'static str {
use crate::ScalarKind as Sk;
match self {
Self {
kind: Sk::Float,
width: _,
} => "float",
Self {
kind: Sk::Sint,
width: 4,
} => "int",
Self {
kind: Sk::Uint,
width: 4,
} => "uint",
Self {
kind: Sk::Sint,
width: 8,
} => "long",
Self {
kind: Sk::Uint,
width: 8,
} => "ulong",
Self {
kind: Sk::Bool,
width: _,
} => "bool",
Self {
kind: Sk::AbstractInt | Sk::AbstractFloat,
width: _,
} => unreachable!("Found Abstract scalar kind"),
_ => unreachable!("Unsupported scalar kind: {:?}", self),
}
}
}
const fn separate(need_separator: bool) -> &'static str {
if need_separator {
","
} else {
""
}
}
fn should_pack_struct_member(
members: &[crate::StructMember],
span: u32,
index: usize,
module: &crate::Module,
) -> Option<crate::Scalar> {
let member = &members[index];
let ty_inner = &module.types[member.ty].inner;
let last_offset = member.offset + ty_inner.size(module.to_ctx());
let next_offset = match members.get(index + 1) {
Some(next) => next.offset,
None => span,
};
let is_tight = next_offset == last_offset;
match *ty_inner {
crate::TypeInner::Vector {
size: crate::VectorSize::Tri,
scalar: scalar @ crate::Scalar { width: 4, .. },
} if is_tight => Some(scalar),
_ => None,
}
}
fn needs_array_length(ty: Handle<crate::Type>, arena: &crate::UniqueArena<crate::Type>) -> bool {
match arena[ty].inner {
crate::TypeInner::Struct { ref members, .. } => {
if let Some(member) = members.last() {
if let crate::TypeInner::Array {
size: crate::ArraySize::Dynamic,
..
} = arena[member.ty].inner
{
return true;
}
}
false
}
crate::TypeInner::Array {
size: crate::ArraySize::Dynamic,
..
} => true,
_ => false,
}
}
impl crate::AddressSpace {
/// Returns true if global variables in this address space are
/// passed in function arguments. These arguments need to be
/// passed through any functions called from the entry point.
const fn needs_pass_through(&self) -> bool {
match *self {
Self::Uniform
| Self::Storage { .. }
| Self::Private
| Self::WorkGroup
| Self::PushConstant
| Self::Handle => true,
Self::Function => false,
}
}
/// Returns true if the address space may need a "const" qualifier.
const fn needs_access_qualifier(&self) -> bool {
match *self {
//Note: we are ignoring the storage access here, and instead
// rely on the actual use of a global by functions. This means we
// may end up with "const" even if the binding is read-write,
// and that should be OK.
Self::Storage { .. } => true,
// These should always be read-write.
Self::Private | Self::WorkGroup => false,
// These translate to `constant` address space, no need for qualifiers.
Self::Uniform | Self::PushConstant => false,
// Not applicable.
Self::Handle | Self::Function => false,
}
}
const fn to_msl_name(self) -> Option<&'static str> {
match self {
Self::Handle => None,
Self::Uniform | Self::PushConstant => Some("constant"),
Self::Storage { .. } => Some("device"),
Self::Private | Self::Function => Some("thread"),
Self::WorkGroup => Some("threadgroup"),
}
}
}
impl crate::Type {
// Returns `true` if we need to emit an alias for this type.
const fn needs_alias(&self) -> bool {
use crate::TypeInner as Ti;
match self.inner {
// value types are concise enough, we only alias them if they are named
Ti::Scalar(_)
| Ti::Vector { .. }
| Ti::Matrix { .. }
| Ti::Atomic(_)
| Ti::Pointer { .. }
| Ti::ValuePointer { .. } => self.name.is_some(),
// composite types are better to be aliased, regardless of the name
Ti::Struct { .. } | Ti::Array { .. } => true,
// handle types may be different, depending on the global var access, so we always inline them
Ti::Image { .. }
| Ti::Sampler { .. }
| Ti::AccelerationStructure
| Ti::RayQuery
| Ti::BindingArray { .. } => false,
}
}
}
enum FunctionOrigin {
Handle(Handle<crate::Function>),
EntryPoint(proc::EntryPointIndex),
}
/// A level of detail argument.
///
/// When [`BoundsCheckPolicy::Restrict`] applies to an [`ImageLoad`] access, we
/// save the clamped level of detail in a temporary variable whose name is based
/// on the handle of the `ImageLoad` expression. But for other policies, we just
/// use the expression directly.
///
/// [`BoundsCheckPolicy::Restrict`]: index::BoundsCheckPolicy::Restrict
/// [`ImageLoad`]: crate::Expression::ImageLoad
#[derive(Clone, Copy)]
enum LevelOfDetail {
Direct(Handle<crate::Expression>),
Restricted(Handle<crate::Expression>),
}
/// Values needed to select a particular texel for [`ImageLoad`] and [`ImageStore`].
///
/// When this is used in code paths unconcerned with the `Restrict` bounds check
/// policy, the `LevelOfDetail` enum introduces an unneeded match, since `level`
/// will always be either `None` or `Some(Direct(_))`. But this turns out not to
/// be too awkward. If that changes, we can revisit.
///
/// [`ImageLoad`]: crate::Expression::ImageLoad
/// [`ImageStore`]: crate::Statement::ImageStore
struct TexelAddress {
coordinate: Handle<crate::Expression>,
array_index: Option<Handle<crate::Expression>>,
sample: Option<Handle<crate::Expression>>,
level: Option<LevelOfDetail>,
}
struct ExpressionContext<'a> {
function: &'a crate::Function,
origin: FunctionOrigin,
info: &'a valid::FunctionInfo,
module: &'a crate::Module,
mod_info: &'a valid::ModuleInfo,
pipeline_options: &'a PipelineOptions,
lang_version: (u8, u8),
policies: index::BoundsCheckPolicies,
/// The set of expressions used as indices in `ReadZeroSkipWrite`-policy
/// accesses. These may need to be cached in temporary variables. See
/// `index::find_checked_indexes` for details.
guarded_indices: HandleSet<crate::Expression>,
}
impl<'a> ExpressionContext<'a> {
fn resolve_type(&self, handle: Handle<crate::Expression>) -> &'a crate::TypeInner {
self.info[handle].ty.inner_with(&self.module.types)
}
/// Return true if calls to `image`'s `read` and `write` methods should supply a level of detail.
///
/// Only mipmapped images need to specify a level of detail. Since 1D
/// textures cannot have mipmaps, MSL requires that the level argument to
/// texture1d queries and accesses must be a constexpr 0. It's easiest
/// just to omit the level entirely for 1D textures.
fn image_needs_lod(&self, image: Handle<crate::Expression>) -> bool {
let image_ty = self.resolve_type(image);
if let crate::TypeInner::Image { dim, class, .. } = *image_ty {
class.is_mipmapped() && dim != crate::ImageDimension::D1
} else {
false
}
}
fn choose_bounds_check_policy(
&self,
pointer: Handle<crate::Expression>,
) -> index::BoundsCheckPolicy {
self.policies
.choose_policy(pointer, &self.module.types, self.info)
}
fn access_needs_check(
&self,
base: Handle<crate::Expression>,
index: index::GuardedIndex,
) -> Option<index::IndexableLength> {
index::access_needs_check(
base,
index,
self.module,
&self.function.expressions,
self.info,
)
}
fn get_packed_vec_kind(&self, expr_handle: Handle<crate::Expression>) -> Option<crate::Scalar> {
match self.function.expressions[expr_handle] {
crate::Expression::AccessIndex { base, index } => {
let ty = match *self.resolve_type(base) {
crate::TypeInner::Pointer { base, .. } => &self.module.types[base].inner,
ref ty => ty,
};
match *ty {
crate::TypeInner::Struct {
ref members, span, ..
} => should_pack_struct_member(members, span, index as usize, self.module),
_ => None,
}
}
_ => None,
}
}
}
struct StatementContext<'a> {
expression: ExpressionContext<'a>,
result_struct: Option<&'a str>,
}
impl<W: Write> Writer<W> {
/// Creates a new `Writer` instance.
pub fn new(out: W) -> Self {
Writer {
out,
names: FastHashMap::default(),
named_expressions: Default::default(),
need_bake_expressions: Default::default(),
namer: proc::Namer::default(),
#[cfg(test)]
put_expression_stack_pointers: Default::default(),
#[cfg(test)]
put_block_stack_pointers: Default::default(),
struct_member_pads: FastHashSet::default(),
force_bounded_loop_macro_name: String::default(),
}
}
/// Finishes writing and returns the output.
// See https://github.com/rust-lang/rust-clippy/issues/4979.
#[allow(clippy::missing_const_for_fn)]
pub fn finish(self) -> W {
self.out
}
/// Define a macro to invoke at the bottom of each loop body, to
/// defeat MSL infinite loop reasoning.
///
/// If we haven't done so already, emit the definition of a preprocessor
/// macro to be invoked at the end of each loop body in the generated MSL,
/// to ensure that the MSL compiler's optimizations do not remove bounds
/// checks.
///
/// Only the first call to this function for a given module actually causes
/// the macro definition to be written. Subsequent loops can simply use the
/// prior macro definition, since macros aren't block-scoped.
///
/// # What is this trying to solve?
///
/// In Metal Shading Language, an infinite loop has undefined behavior.
/// (This rule is inherited from C++14.) This means that, if the MSL
/// compiler determines that a given loop will never exit, it may assume
/// that it is never reached. It may thus assume that any conditions
/// sufficient to cause the loop to be reached must be false. Like many
/// optimizing compilers, MSL uses this kind of analysis to establish limits
/// on the range of values variables involved in those conditions might
/// hold.
///
/// For example, suppose the MSL compiler sees the code:
///
/// ```ignore
/// if (i >= 10) {
/// while (true) { }
/// }
/// ```
///
/// It will recognize that the `while` loop will never terminate, conclude
/// that it must be unreachable, and thus infer that, if this code is
/// reached, then `i < 10` at that point.
///
/// Now suppose that, at some point where `i` has the same value as above,
/// the compiler sees the code:
///
/// ```ignore
/// if (i < 10) {
/// a[i] = 1;
/// }
/// ```
///
/// Because the compiler is confident that `i < 10`, it will make the
/// assignment to `a[i]` unconditional, rewriting this code as, simply:
///
/// ```ignore
/// a[i] = 1;
/// ```
///
/// If that `if` condition was injected by Naga to implement a bounds check,
/// the MSL compiler's optimizations could allow out-of-bounds array
/// accesses to occur.
///
/// Naga cannot feasibly anticipate whether the MSL compiler will determine
/// that a loop is infinite, so an attacker could craft a Naga module
/// containing an infinite loop protected by conditions that cause the Metal
/// compiler to remove bounds checks that Naga injected elsewhere in the
/// function.
///
/// This rewrite could occur even if the conditional assignment appears
/// *before* the `while` loop, as long as `i < 10` by the time the loop is
/// reached. This would allow the attacker to save the results of
/// unauthorized reads somewhere accessible before entering the infinite
/// loop. But even worse, the MSL compiler has been observed to simply
/// delete the infinite loop entirely, so that even code dominated by the
/// loop becomes reachable. This would make the attack even more flexible,
/// since shaders that would appear to never terminate would actually exit
/// nicely, after having stolen data from elsewhere in the GPU address
/// space.
///
/// To avoid UB, Naga must persuade the MSL compiler that no loop Naga
/// generates is infinite. One approach would be to add inline assembly to
/// each loop that is annotated as potentially branching out of the loop,
/// but which in fact generates no instructions. Unfortunately, inline
/// assembly is not handled correctly by some Metal device drivers.
///
/// Instead, we add the following code to the bottom of every loop:
///
/// ```ignore
/// if (volatile bool unpredictable = false; unpredictable)
/// break;
/// ```
///
/// Although the `if` condition will always be false in any real execution,
/// the `volatile` qualifier prevents the compiler from assuming this. Thus,
/// it must assume that the `break` might be reached, and hence that the
/// loop is not unbounded. This prevents the range analysis impact described
/// above.
///
/// Unfortunately, what makes this a kludge, not a hack, is that this
/// solution leaves the GPU executing a pointless conditional branch, at
/// runtime, in every iteration of the loop. There's no part of the system
/// that has a global enough view to be sure that `unpredictable` is true,
/// and remove it from the code. Adding the branch also affects
/// optimization: for example, it's impossible to unroll this loop. This
/// transformation has been observed to significantly hurt performance.
///
/// To make our output a bit more legible, we pull the condition out into a
/// preprocessor macro defined at the top of the module.
///
/// This approach is also used by Chromium WebGPU's Dawn shader compiler:
/// <https://dawn.googlesource.com/dawn/+/a37557db581c2b60fb1cd2c01abdb232927dd961/src/tint/lang/msl/writer/printer/printer.cc#222>
fn emit_force_bounded_loop_macro(&mut self) -> BackendResult {
if !self.force_bounded_loop_macro_name.is_empty() {
return Ok(());
}
self.force_bounded_loop_macro_name = self.namer.call("LOOP_IS_BOUNDED");
let loop_bounded_volatile_name = self.namer.call("unpredictable_break_from_loop");
writeln!(
self.out,
"#define {} {{ volatile bool {} = false; if ({}) break; }}",
self.force_bounded_loop_macro_name,
loop_bounded_volatile_name,
loop_bounded_volatile_name,
)?;
Ok(())
}
fn put_call_parameters(
&mut self,
parameters: impl Iterator<Item = Handle<crate::Expression>>,
context: &ExpressionContext,
) -> BackendResult {
self.put_call_parameters_impl(parameters, context, |writer, context, expr| {
writer.put_expression(expr, context, true)
})
}
fn put_call_parameters_impl<C, E>(
&mut self,
parameters: impl Iterator<Item = Handle<crate::Expression>>,
ctx: &C,
put_expression: E,
) -> BackendResult
where
E: Fn(&mut Self, &C, Handle<crate::Expression>) -> BackendResult,
{
write!(self.out, "(")?;
for (i, handle) in parameters.enumerate() {
if i != 0 {
write!(self.out, ", ")?;
}
put_expression(self, ctx, handle)?;
}
write!(self.out, ")")?;
Ok(())
}
fn put_level_of_detail(
&mut self,
level: LevelOfDetail,
context: &ExpressionContext,
) -> BackendResult {
match level {
LevelOfDetail::Direct(expr) => self.put_expression(expr, context, true)?,
LevelOfDetail::Restricted(load) => write!(self.out, "{}", ClampedLod(load))?,
}
Ok(())
}
fn put_image_query(
&mut self,
image: Handle<crate::Expression>,
query: &str,
level: Option<LevelOfDetail>,
context: &ExpressionContext,
) -> BackendResult {
self.put_expression(image, context, false)?;
write!(self.out, ".get_{query}(")?;
if let Some(level) = level {
self.put_level_of_detail(level, context)?;
}
write!(self.out, ")")?;
Ok(())
}
fn put_image_size_query(
&mut self,
image: Handle<crate::Expression>,
level: Option<LevelOfDetail>,
kind: crate::ScalarKind,
context: &ExpressionContext,
) -> BackendResult {
//Note: MSL only has separate width/height/depth queries,
// so compose the result of them.
let dim = match *context.resolve_type(image) {
crate::TypeInner::Image { dim, .. } => dim,
ref other => unreachable!("Unexpected type {:?}", other),
};
let scalar = crate::Scalar { kind, width: 4 };
let coordinate_type = scalar.to_msl_name();
match dim {
crate::ImageDimension::D1 => {
// Since 1D textures never have mipmaps, MSL requires that the
// `level` argument be a constexpr 0. It's simplest for us just
// to pass `None` and omit the level entirely.
if kind == crate::ScalarKind::Uint {
// No need to construct a vector. No cast needed.
self.put_image_query(image, "width", None, context)?;
} else {
// There's no definition for `int` in the `metal` namespace.
write!(self.out, "int(")?;
self.put_image_query(image, "width", None, context)?;
write!(self.out, ")")?;
}
}
crate::ImageDimension::D2 => {
write!(self.out, "{NAMESPACE}::{coordinate_type}2(")?;
self.put_image_query(image, "width", level, context)?;
write!(self.out, ", ")?;
self.put_image_query(image, "height", level, context)?;
write!(self.out, ")")?;
}
crate::ImageDimension::D3 => {
write!(self.out, "{NAMESPACE}::{coordinate_type}3(")?;
self.put_image_query(image, "width", level, context)?;
write!(self.out, ", ")?;
self.put_image_query(image, "height", level, context)?;
write!(self.out, ", ")?;
self.put_image_query(image, "depth", level, context)?;
write!(self.out, ")")?;
}
crate::ImageDimension::Cube => {
write!(self.out, "{NAMESPACE}::{coordinate_type}2(")?;
self.put_image_query(image, "width", level, context)?;
write!(self.out, ")")?;
}
}
Ok(())
}
fn put_cast_to_uint_scalar_or_vector(
&mut self,
expr: Handle<crate::Expression>,
context: &ExpressionContext,
) -> BackendResult {
// coordinates in IR are int, but Metal expects uint
match *context.resolve_type(expr) {
crate::TypeInner::Scalar(_) => {
put_numeric_type(&mut self.out, crate::Scalar::U32, &[])?
}
crate::TypeInner::Vector { size, .. } => {
put_numeric_type(&mut self.out, crate::Scalar::U32, &[size])?
}
_ => {
return Err(Error::GenericValidation(
"Invalid type for image coordinate".into(),
))
}
};
write!(self.out, "(")?;
self.put_expression(expr, context, true)?;
write!(self.out, ")")?;
Ok(())
}
fn put_image_sample_level(
&mut self,
image: Handle<crate::Expression>,
level: crate::SampleLevel,
context: &ExpressionContext,
) -> BackendResult {
let has_levels = context.image_needs_lod(image);
match level {
crate::SampleLevel::Auto => {}
crate::SampleLevel::Zero => {
//TODO: do we support Zero on `Sampled` image classes?
}
_ if !has_levels => {
log::warn!("1D image can't be sampled with level {:?}", level);
}
crate::SampleLevel::Exact(h) => {
write!(self.out, ", {NAMESPACE}::level(")?;
self.put_expression(h, context, true)?;
write!(self.out, ")")?;
}
crate::SampleLevel::Bias(h) => {
write!(self.out, ", {NAMESPACE}::bias(")?;
self.put_expression(h, context, true)?;
write!(self.out, ")")?;
}
crate::SampleLevel::Gradient { x, y } => {
write!(self.out, ", {NAMESPACE}::gradient2d(")?;
self.put_expression(x, context, true)?;
write!(self.out, ", ")?;
self.put_expression(y, context, true)?;
write!(self.out, ")")?;
}
}
Ok(())
}
fn put_image_coordinate_limits(
&mut self,
image: Handle<crate::Expression>,
level: Option<LevelOfDetail>,
context: &ExpressionContext,
) -> BackendResult {
self.put_image_size_query(image, level, crate::ScalarKind::Uint, context)?;
write!(self.out, " - 1")?;