From fbe927c3eed818995ee06e59eb92b436fc5058b0 Mon Sep 17 00:00:00 2001
From: teoxoy <28601907+teoxoy@users.noreply.github.com>
Date: Fri, 17 Jun 2022 16:40:50 +0200
Subject: [PATCH] [hlsl-out] fix matCx2's nested inside global arrays

---
 src/back/hlsl/writer.rs                  | 108 ++++---
 tests/in/globals.wgsl                    |   6 +-
 tests/out/glsl/globals.main.Compute.glsl |  33 +-
 tests/out/hlsl/globals.hlsl              |  32 +-
 tests/out/msl/globals.msl                |  45 ++-
 tests/out/spv/globals.spvasm             | 390 ++++++++++++-----------
 tests/out/wgsl/globals.wgsl              |  33 +-
 7 files changed, 360 insertions(+), 287 deletions(-)

diff --git a/src/back/hlsl/writer.rs b/src/back/hlsl/writer.rs
index b59a0d421d..0a69b9a6b1 100644
--- a/src/back/hlsl/writer.rs
+++ b/src/back/hlsl/writer.rs
@@ -662,14 +662,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         if global.space == crate::AddressSpace::Uniform {
             write!(self.out, " {{ ")?;
 
+            let matrix_data = get_inner_matrix_data(module, global.ty);
+
             // We treat matrices of the form `matCx2` as a sequence of C `vec2`s.
             // See the module-level block comment in mod.rs for details.
-            if let TypeInner::Matrix {
-                rows: crate::VectorSize::Bi,
-                columns,
-                width,
-            } = module.types[global.ty].inner
-            {
+            if let Some((columns, crate::VectorSize::Bi, width)) = matrix_data {
                 let vec_ty = crate::TypeInner::Vector {
                     size: crate::VectorSize::Bi,
                     kind: crate::ScalarKind::Float,
@@ -692,18 +689,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 // Even though Naga IR matrices are column-major, we must describe
                 // matrices passed from the CPU as being in row-major order.
                 // See the module-level block comment in mod.rs for details.
-                let is_matrix = matches!(module.types[global.ty].inner, TypeInner::Matrix { .. });
-                if is_matrix || is_array_of_matrices(module, global.ty) {
+                if matrix_data.is_some() {
                     write!(self.out, "row_major ")?;
                 }
 
                 self.write_type(module, global.ty)?;
                 let sub_name = &self.names[&NameKey::GlobalVariable(handle)];
                 write!(self.out, " {}", sub_name)?;
-                // need to write the array size if the type was emitted with `write_type`
-                if let TypeInner::Array { base, size, .. } = module.types[global.ty].inner {
-                    self.write_array_size(module, base, size)?;
-                }
+            }
+
+            // need to write the array size if the type was emitted with `write_type`
+            if let TypeInner::Array { base, size, .. } = module.types[global.ty].inner {
+                self.write_array_size(module, base, size)?;
             }
 
             writeln!(self.out, "; }}")?;
@@ -843,7 +840,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     // Even though Naga IR matrices are column-major, we must describe
                     // matrices passed from the CPU as being in row-major order.
                     // See the module-level block comment in mod.rs for details.
-                    if is_array_of_matrices(module, member.ty) {
+                    if get_inner_matrix_data(module, member.ty).is_some() {
                         write!(self.out, "row_major ")?;
                     }
 
@@ -1919,11 +1916,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 }
             }
             Expression::AccessIndex { base, index } => {
-                if let Some(crate::AddressSpace::Storage { .. }) = func_ctx.info[expr]
-                    .ty
-                    .inner_with(&module.types)
-                    .pointer_space()
-                {
+                let res_ty = func_ctx.info[expr].ty.inner_with(&module.types);
+                if let Some(crate::AddressSpace::Storage { .. }) = res_ty.pointer_space() {
                     // do nothing, the chain is written on `Load`/`Store`
                 } else {
                     let base_ty_res = &func_ctx.info[base].ty;
@@ -1962,6 +1956,24 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                         }
                     };
 
+                    let mut close_paren = false;
+                    if let TypeInner::Pointer {
+                        base,
+                        space: crate::AddressSpace::Uniform,
+                    } = *res_ty
+                    {
+                        if let TypeInner::Matrix {
+                            rows: crate::VectorSize::Bi,
+                            ..
+                        } = module.types[base].inner
+                        {
+                            write!(self.out, "((")?;
+                            self.write_type(module, base)?;
+                            write!(self.out, ")")?;
+                            close_paren = true;
+                        }
+                    }
+
                     self.write_expr(module, base, func_ctx)?;
 
                     match *resolved {
@@ -1988,6 +2000,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                             return Err(Error::Custom(format!("Cannot index {:?}", other)))
                         }
                     }
+
+                    if close_paren {
+                        write!(self.out, ")")?;
+                    }
                 }
             }
             Expression::FunctionArgument(pos) => {
@@ -2160,34 +2176,25 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     // We treat matrices of the form `matCx2` as a sequence of C `vec2`s.
                     // See the module-level block comment in mod.rs for details.
                     Some(crate::AddressSpace::Uniform) => {
+                        let mut close_paren = false;
                         if let Expression::GlobalVariable(handle) = func_ctx.expressions[pointer] {
                             let ty = module.global_variables[handle].ty;
-                            match module.types[ty].inner {
-                                TypeInner::Matrix {
-                                    rows: crate::VectorSize::Bi,
-                                    columns,
-                                    ..
-                                } => {
-                                    self.write_type(module, ty)?;
-                                    write!(self.out, "(")?;
-
-                                    let name = &NameKey::GlobalVariable(handle);
+                            if let TypeInner::Matrix {
+                                rows: crate::VectorSize::Bi,
+                                ..
+                            } = module.types[ty].inner
+                            {
+                                write!(self.out, "((")?;
+                                self.write_type(module, ty)?;
+                                write!(self.out, ")")?;
+                                close_paren = true;
+                            }
+                        }
 
-                                    for i in 0..columns as u8 {
-                                        if i != 0 {
-                                            write!(self.out, ", ")?;
-                                        }
-                                        write!(self.out, "{}._{}", &self.names[name], i)?;
-                                    }
+                        self.write_expr(module, pointer, func_ctx)?;
 
-                                    write!(self.out, ")")?;
-                                }
-                                _ => {
-                                    self.write_expr(module, pointer, func_ctx)?;
-                                }
-                            }
-                        } else {
-                            self.write_expr(module, pointer, func_ctx)?;
+                        if close_paren {
+                            write!(self.out, ")")?;
                         }
                     }
                     _ => {
@@ -2651,12 +2658,17 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
     }
 }
 
-fn is_array_of_matrices(module: &Module, handle: Handle<crate::Type>) -> bool {
+fn get_inner_matrix_data(
+    module: &Module,
+    handle: Handle<crate::Type>,
+) -> Option<(crate::VectorSize, crate::VectorSize, u8)> {
     match module.types[handle].inner {
-        TypeInner::Array { base, .. } => match module.types[base].inner {
-            TypeInner::Matrix { .. } => true,
-            _ => is_array_of_matrices(module, base),
-        },
-        _ => false,
+        TypeInner::Matrix {
+            columns,
+            rows,
+            width,
+        } => Some((columns, rows, width)),
+        TypeInner::Array { base, .. } => get_inner_matrix_data(module, base),
+        _ => None,
     }
 }
diff --git a/tests/in/globals.wgsl b/tests/in/globals.wgsl
index e768f37a31..c1dcba1c84 100644
--- a/tests/in/globals.wgsl
+++ b/tests/in/globals.wgsl
@@ -26,7 +26,10 @@ var<uniform> global_vec: vec3<f32>;
 var<uniform> global_mat: mat3x2<f32>;
 
 @group(0) @binding(6)
-var<uniform> global_nested_arrays_of_matrices: array<array<mat4x3<f32>, 2>, 2>;
+var<uniform> global_nested_arrays_of_matrices_4x4: array<array<mat4x4<f32>, 2>, 2>;
+
+@group(0) @binding(7)
+var<uniform> global_nested_arrays_of_matrices_4x2: array<array<mat4x2<f32>, 2>, 2>;
 
 fn test_msl_packed_vec3_as_arg(arg: vec3<f32>) {}
 
@@ -59,6 +62,7 @@ fn test_msl_packed_vec3() {
 fn main() {
     test_msl_packed_vec3();
 
+    wg[7] = (global_nested_arrays_of_matrices_4x2[0][0] * global_nested_arrays_of_matrices_4x4[0][0][0]).x;
     wg[6] = (global_mat * global_vec).x;
     wg[5] = dummy[1].y;
     wg[4] = float_vecs[0].w;
diff --git a/tests/out/glsl/globals.main.Compute.glsl b/tests/out/glsl/globals.main.Compute.glsl
index 26ae2d902c..b9d4d761b0 100644
--- a/tests/out/glsl/globals.main.Compute.glsl
+++ b/tests/out/glsl/globals.main.Compute.glsl
@@ -23,6 +23,10 @@ uniform type_4_block_3Compute { vec3 _group_0_binding_4_cs; };
 
 uniform type_9_block_4Compute { mat3x2 _group_0_binding_5_cs; };
 
+uniform type_12_block_5Compute { mat4x4 _group_0_binding_6_cs[2][2]; };
+
+uniform type_15_block_6Compute { mat4x2 _group_0_binding_7_cs[2][2]; };
+
 
 void test_msl_packed_vec3_as_arg(vec3 arg) {
     return;
@@ -33,8 +37,8 @@ void test_msl_packed_vec3_() {
     _group_0_binding_1_cs.v3_ = vec3(1.0);
     _group_0_binding_1_cs.v3_.x = 1.0;
     _group_0_binding_1_cs.v3_.x = 2.0;
-    int _e22 = idx;
-    _group_0_binding_1_cs.v3_[_e22] = 3.0;
+    int _e23 = idx;
+    _group_0_binding_1_cs.v3_[_e23] = 3.0;
     Foo data = _group_0_binding_1_cs;
     vec3 unnamed = data.v3_;
     vec2 unnamed_1 = data.v3_.zx;
@@ -49,17 +53,20 @@ void main() {
     float Foo_1 = 1.0;
     bool at = true;
     test_msl_packed_vec3_();
-    mat3x2 _e11 = _group_0_binding_5_cs;
-    vec3 _e12 = _group_0_binding_4_cs;
-    wg[6] = (_e11 * _e12).x;
-    float _e20 = _group_0_binding_2_cs[1].y;
-    wg[5] = _e20;
-    float _e26 = _group_0_binding_3_cs[0].w;
-    wg[4] = _e26;
-    float _e30 = _group_0_binding_1_cs.v1_;
-    wg[3] = _e30;
-    float _e35 = _group_0_binding_1_cs.v3_.x;
-    wg[2] = _e35;
+    mat4x2 _e16 = _group_0_binding_7_cs[0][0];
+    vec4 _e23 = _group_0_binding_6_cs[0][0][0];
+    wg[7] = (_e16 * _e23).x;
+    mat3x2 _e28 = _group_0_binding_5_cs;
+    vec3 _e29 = _group_0_binding_4_cs;
+    wg[6] = (_e28 * _e29).x;
+    float _e37 = _group_0_binding_2_cs[1].y;
+    wg[5] = _e37;
+    float _e43 = _group_0_binding_3_cs[0].w;
+    wg[4] = _e43;
+    float _e47 = _group_0_binding_1_cs.v1_;
+    wg[3] = _e47;
+    float _e52 = _group_0_binding_1_cs.v3_.x;
+    wg[2] = _e52;
     _group_0_binding_1_cs.v1_ = 4.0;
     wg[1] = float(uint(_group_0_binding_2_cs.length()));
     at_1 = 2u;
diff --git a/tests/out/hlsl/globals.hlsl b/tests/out/hlsl/globals.hlsl
index db87fa9ff9..1104c3aa50 100644
--- a/tests/out/hlsl/globals.hlsl
+++ b/tests/out/hlsl/globals.hlsl
@@ -12,7 +12,8 @@ ByteAddressBuffer dummy : register(t2);
 cbuffer float_vecs : register(b3) { float4 float_vecs[20]; }
 cbuffer global_vec : register(b4) { float3 global_vec; }
 cbuffer global_mat : register(b5) { struct { float2 _0; float2 _1; float2 _2; } global_mat; }
-cbuffer global_nested_arrays_of_matrices : register(b6) { row_major float4x3 global_nested_arrays_of_matrices[2][2]; }
+cbuffer global_nested_arrays_of_matrices_4x4_ : register(b6) { row_major float4x4 global_nested_arrays_of_matrices_4x4_[2][2]; }
+cbuffer global_nested_arrays_of_matrices_4x2_ : register(b7) { struct { float2 _0; float2 _1; float2 _2; float2 _3; } global_nested_arrays_of_matrices_4x2_[2][2]; }
 
 void test_msl_packed_vec3_as_arg(float3 arg)
 {
@@ -33,8 +34,8 @@ void test_msl_packed_vec3_()
     alignment.Store3(0, asuint((1.0).xxx));
     alignment.Store(0+0, asuint(1.0));
     alignment.Store(0+0, asuint(2.0));
-    int _expr22 = idx;
-    alignment.Store(_expr22*4+0, asuint(3.0));
+    int _expr23 = idx;
+    alignment.Store(_expr23*4+0, asuint(3.0));
     Foo data = ConstructFoo(asfloat(alignment.Load3(0)), asfloat(alignment.Load(12)));
     float3 unnamed = data.v3_;
     float2 unnamed_1 = data.v3_.zx;
@@ -59,17 +60,20 @@ void main()
     bool at = true;
 
     test_msl_packed_vec3_();
-    float3x2 _expr11 = float3x2(global_mat._0, global_mat._1, global_mat._2);
-    float3 _expr12 = global_vec;
-    wg[6] = mul(_expr12, _expr11).x;
-    float _expr20 = asfloat(dummy.Load(4+8));
-    wg[5] = _expr20;
-    float _expr26 = float_vecs[0].w;
-    wg[4] = _expr26;
-    float _expr30 = asfloat(alignment.Load(12));
-    wg[3] = _expr30;
-    float _expr35 = asfloat(alignment.Load(0+0));
-    wg[2] = _expr35;
+    float4x2 _expr16 = ((float4x2)global_nested_arrays_of_matrices_4x2_[0][0]);
+    float4 _expr23 = global_nested_arrays_of_matrices_4x4_[0][0][0];
+    wg[7] = mul(_expr23, _expr16).x;
+    float3x2 _expr28 = ((float3x2)global_mat);
+    float3 _expr29 = global_vec;
+    wg[6] = mul(_expr29, _expr28).x;
+    float _expr37 = asfloat(dummy.Load(4+8));
+    wg[5] = _expr37;
+    float _expr43 = float_vecs[0].w;
+    wg[4] = _expr43;
+    float _expr47 = asfloat(alignment.Load(12));
+    wg[3] = _expr47;
+    float _expr52 = asfloat(alignment.Load(0+0));
+    wg[2] = _expr52;
     alignment.Store(12, asuint(4.0));
     wg[1] = float(((NagaBufferLength(dummy) - 0) / 8));
     at_1 = 2u;
diff --git a/tests/out/msl/globals.msl b/tests/out/msl/globals.msl
index 4ee09c8291..483b082445 100644
--- a/tests/out/msl/globals.msl
+++ b/tests/out/msl/globals.msl
@@ -21,13 +21,19 @@ struct type_8 {
     metal::float4 inner[20];
 };
 struct type_11 {
-    metal::float4x3 inner[2];
+    metal::float4x4 inner[2];
 };
 struct type_12 {
     type_11 inner[2];
 };
+struct type_14 {
+    metal::float4x2 inner[2];
+};
+struct type_15 {
+    type_14 inner[2];
+};
 constant metal::float3 const_type_4_ = {0.0, 0.0, 0.0};
-constant metal::float3x3 const_type_14_ = {const_type_4_, const_type_4_, const_type_4_};
+constant metal::float3x3 const_type_17_ = {const_type_4_, const_type_4_, const_type_4_};
 
 void test_msl_packed_vec3_as_arg(
     metal::float3 arg
@@ -42,14 +48,14 @@ void test_msl_packed_vec3_(
     alignment.v3_ = metal::float3(1.0);
     alignment.v3_[0] = 1.0;
     alignment.v3_[0] = 2.0;
-    int _e22 = idx;
-    alignment.v3_[_e22] = 3.0;
+    int _e23 = idx;
+    alignment.v3_[_e23] = 3.0;
     Foo data = alignment;
     metal::float3 unnamed = data.v3_;
     metal::float2 unnamed_1 = metal::float3(data.v3_).zx;
     test_msl_packed_vec3_as_arg(data.v3_);
-    metal::float3 unnamed_2 = metal::float3(data.v3_) * const_type_14_;
-    metal::float3 unnamed_3 = const_type_14_ * metal::float3(data.v3_);
+    metal::float3 unnamed_2 = metal::float3(data.v3_) * const_type_17_;
+    metal::float3 unnamed_3 = const_type_17_ * metal::float3(data.v3_);
     metal::float3 unnamed_4 = data.v3_ * 2.0;
     metal::float3 unnamed_5 = 2.0 * data.v3_;
 }
@@ -62,22 +68,27 @@ kernel void main_(
 , constant type_8& float_vecs [[user(fake0)]]
 , constant metal::float3& global_vec [[user(fake0)]]
 , constant metal::float3x2& global_mat [[user(fake0)]]
+, constant type_12& global_nested_arrays_of_matrices_4x4_ [[user(fake0)]]
+, constant type_15& global_nested_arrays_of_matrices_4x2_ [[user(fake0)]]
 , constant _mslBufferSizes& _buffer_sizes [[user(fake0)]]
 ) {
     float Foo_1 = 1.0;
     bool at = true;
     test_msl_packed_vec3_(alignment);
-    metal::float3x2 _e11 = global_mat;
-    metal::float3 _e12 = global_vec;
-    wg.inner[6] = (_e11 * _e12).x;
-    float _e20 = dummy[1].y;
-    wg.inner[5] = _e20;
-    float _e26 = float_vecs.inner[0].w;
-    wg.inner[4] = _e26;
-    float _e30 = alignment.v1_;
-    wg.inner[3] = _e30;
-    float _e35 = alignment.v3_[0];
-    wg.inner[2] = _e35;
+    metal::float4x2 _e16 = global_nested_arrays_of_matrices_4x2_.inner[0].inner[0];
+    metal::float4 _e23 = global_nested_arrays_of_matrices_4x4_.inner[0].inner[0][0];
+    wg.inner[7] = (_e16 * _e23).x;
+    metal::float3x2 _e28 = global_mat;
+    metal::float3 _e29 = global_vec;
+    wg.inner[6] = (_e28 * _e29).x;
+    float _e37 = dummy[1].y;
+    wg.inner[5] = _e37;
+    float _e43 = float_vecs.inner[0].w;
+    wg.inner[4] = _e43;
+    float _e47 = alignment.v1_;
+    wg.inner[3] = _e47;
+    float _e52 = alignment.v3_[0];
+    wg.inner[2] = _e52;
     alignment.v1_ = 4.0;
     wg.inner[1] = static_cast<float>(1 + (_buffer_sizes.size3 - 0 - 8) / 8);
     metal::atomic_store_explicit(&at_1, 2u, metal::memory_order_relaxed);
diff --git a/tests/out/spv/globals.spvasm b/tests/out/spv/globals.spvasm
index 6939df4713..6ab71d2f83 100644
--- a/tests/out/spv/globals.spvasm
+++ b/tests/out/spv/globals.spvasm
@@ -1,47 +1,53 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 146
+; Bound: 169
 OpCapability Shader
 OpExtension "SPV_KHR_storage_buffer_storage_class"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %106 "main"
-OpExecutionMode %106 LocalSize 1 1 1
-OpDecorate %24 ArrayStride 4
-OpMemberDecorate %26 0 Offset 0
-OpMemberDecorate %26 1 Offset 12
-OpDecorate %28 ArrayStride 8
-OpDecorate %30 ArrayStride 16
-OpDecorate %33 ArrayStride 64
-OpDecorate %34 ArrayStride 128
-OpDecorate %42 DescriptorSet 0
-OpDecorate %42 Binding 1
-OpDecorate %43 Block
-OpMemberDecorate %43 0 Offset 0
-OpDecorate %45 NonWritable
-OpDecorate %45 DescriptorSet 0
-OpDecorate %45 Binding 2
-OpDecorate %46 Block
-OpMemberDecorate %46 0 Offset 0
-OpDecorate %48 DescriptorSet 0
-OpDecorate %48 Binding 3
-OpDecorate %49 Block
-OpMemberDecorate %49 0 Offset 0
-OpDecorate %51 DescriptorSet 0
-OpDecorate %51 Binding 4
-OpDecorate %52 Block
-OpMemberDecorate %52 0 Offset 0
-OpDecorate %54 DescriptorSet 0
-OpDecorate %54 Binding 5
-OpDecorate %55 Block
-OpMemberDecorate %55 0 Offset 0
-OpMemberDecorate %55 0 ColMajor
-OpMemberDecorate %55 0 MatrixStride 8
-OpDecorate %57 DescriptorSet 0
-OpDecorate %57 Binding 6
-OpDecorate %58 Block
-OpMemberDecorate %58 0 Offset 0
+OpEntryPoint GLCompute %114 "main"
+OpExecutionMode %114 LocalSize 1 1 1
+OpDecorate %25 ArrayStride 4
+OpMemberDecorate %27 0 Offset 0
+OpMemberDecorate %27 1 Offset 12
+OpDecorate %29 ArrayStride 8
+OpDecorate %31 ArrayStride 16
+OpDecorate %34 ArrayStride 64
+OpDecorate %35 ArrayStride 128
+OpDecorate %37 ArrayStride 32
+OpDecorate %38 ArrayStride 64
+OpDecorate %46 DescriptorSet 0
+OpDecorate %46 Binding 1
+OpDecorate %47 Block
+OpMemberDecorate %47 0 Offset 0
+OpDecorate %49 NonWritable
+OpDecorate %49 DescriptorSet 0
+OpDecorate %49 Binding 2
+OpDecorate %50 Block
+OpMemberDecorate %50 0 Offset 0
+OpDecorate %52 DescriptorSet 0
+OpDecorate %52 Binding 3
+OpDecorate %53 Block
+OpMemberDecorate %53 0 Offset 0
+OpDecorate %55 DescriptorSet 0
+OpDecorate %55 Binding 4
+OpDecorate %56 Block
+OpMemberDecorate %56 0 Offset 0
+OpDecorate %58 DescriptorSet 0
+OpDecorate %58 Binding 5
+OpDecorate %59 Block
+OpMemberDecorate %59 0 Offset 0
+OpMemberDecorate %59 0 ColMajor
+OpMemberDecorate %59 0 MatrixStride 8
+OpDecorate %61 DescriptorSet 0
+OpDecorate %61 Binding 6
+OpDecorate %62 Block
+OpMemberDecorate %62 0 Offset 0
+OpDecorate %64 DescriptorSet 0
+OpDecorate %64 Binding 7
+OpDecorate %65 Block
+OpMemberDecorate %65 0 Offset 0
 %2 = OpTypeVoid
 %4 = OpTypeBool
 %3 = OpConstantTrue  %4
@@ -57,153 +63,177 @@ OpMemberDecorate %58 0 Offset 0
 %14 = OpConstant  %11  2.0
 %15 = OpConstant  %11  3.0
 %16 = OpConstant  %11  0.0
-%17 = OpConstant  %8  6
-%18 = OpConstant  %8  5
-%19 = OpConstant  %8  4
-%20 = OpConstant  %8  3
-%21 = OpConstant  %11  4.0
-%22 = OpConstant  %6  2
-%23 = OpConstantTrue  %4
-%24 = OpTypeArray %11 %5
-%25 = OpTypeVector %11 3
-%26 = OpTypeStruct %25 %11
-%27 = OpTypeVector %11 2
-%28 = OpTypeRuntimeArray %27
-%29 = OpTypeVector %11 4
-%30 = OpTypeArray %29 %7
-%31 = OpTypeMatrix %27 3
-%32 = OpTypeMatrix %25 4
-%33 = OpTypeArray %32 %9
+%17 = OpConstant  %8  7
+%18 = OpConstant  %8  6
+%19 = OpConstant  %8  5
+%20 = OpConstant  %8  4
+%21 = OpConstant  %8  3
+%22 = OpConstant  %11  4.0
+%23 = OpConstant  %6  2
+%24 = OpConstantTrue  %4
+%25 = OpTypeArray %11 %5
+%26 = OpTypeVector %11 3
+%27 = OpTypeStruct %26 %11
+%28 = OpTypeVector %11 2
+%29 = OpTypeRuntimeArray %28
+%30 = OpTypeVector %11 4
+%31 = OpTypeArray %30 %7
+%32 = OpTypeMatrix %28 3
+%33 = OpTypeMatrix %30 4
 %34 = OpTypeArray %33 %9
-%35 = OpTypeMatrix %25 3
-%36 = OpConstantComposite  %25  %16 %16 %16
-%37 = OpConstantComposite  %35  %36 %36 %36
-%39 = OpTypePointer Workgroup %24
-%38 = OpVariable  %39  Workgroup
-%41 = OpTypePointer Workgroup %6
-%40 = OpVariable  %41  Workgroup
-%43 = OpTypeStruct %26
-%44 = OpTypePointer StorageBuffer %43
-%42 = OpVariable  %44  StorageBuffer
-%46 = OpTypeStruct %28
-%47 = OpTypePointer StorageBuffer %46
-%45 = OpVariable  %47  StorageBuffer
-%49 = OpTypeStruct %30
-%50 = OpTypePointer Uniform %49
-%48 = OpVariable  %50  Uniform
-%52 = OpTypeStruct %25
-%53 = OpTypePointer Uniform %52
-%51 = OpVariable  %53  Uniform
-%55 = OpTypeStruct %31
-%56 = OpTypePointer Uniform %55
-%54 = OpVariable  %56  Uniform
-%58 = OpTypeStruct %34
-%59 = OpTypePointer Uniform %58
-%57 = OpVariable  %59  Uniform
-%63 = OpTypeFunction %2 %25
-%64 = OpTypePointer StorageBuffer %28
-%65 = OpTypePointer Uniform %25
-%66 = OpTypePointer StorageBuffer %26
-%67 = OpTypePointer Uniform %34
-%68 = OpTypePointer Uniform %30
-%69 = OpTypePointer Uniform %31
-%72 = OpTypePointer Function %8
-%75 = OpTypeFunction %2
-%76 = OpConstant  %6  0
-%79 = OpTypePointer StorageBuffer %25
-%82 = OpTypePointer StorageBuffer %11
-%102 = OpTypePointer Function %11
-%104 = OpTypePointer Function %4
-%114 = OpTypePointer Workgroup %11
-%119 = OpConstant  %6  6
-%121 = OpTypePointer StorageBuffer %27
-%122 = OpConstant  %6  1
-%125 = OpConstant  %6  5
-%127 = OpTypePointer Uniform %29
-%128 = OpTypePointer Uniform %11
-%129 = OpConstant  %6  3
-%132 = OpConstant  %6  4
-%134 = OpTypePointer StorageBuffer %11
-%145 = OpConstant  %6  256
-%62 = OpFunction  %2  None %63
-%61 = OpFunctionParameter  %25
-%60 = OpLabel
-OpBranch %70
-%70 = OpLabel
-OpReturn
-OpFunctionEnd
-%74 = OpFunction  %2  None %75
-%73 = OpLabel
-%71 = OpVariable  %72  Function %12
-%77 = OpAccessChain  %66  %42 %76
+%35 = OpTypeArray %34 %9
+%36 = OpTypeMatrix %28 4
+%37 = OpTypeArray %36 %9
+%38 = OpTypeArray %37 %9
+%39 = OpTypeMatrix %26 3
+%40 = OpConstantComposite  %26  %16 %16 %16
+%41 = OpConstantComposite  %39  %40 %40 %40
+%43 = OpTypePointer Workgroup %25
+%42 = OpVariable  %43  Workgroup
+%45 = OpTypePointer Workgroup %6
+%44 = OpVariable  %45  Workgroup
+%47 = OpTypeStruct %27
+%48 = OpTypePointer StorageBuffer %47
+%46 = OpVariable  %48  StorageBuffer
+%50 = OpTypeStruct %29
+%51 = OpTypePointer StorageBuffer %50
+%49 = OpVariable  %51  StorageBuffer
+%53 = OpTypeStruct %31
+%54 = OpTypePointer Uniform %53
+%52 = OpVariable  %54  Uniform
+%56 = OpTypeStruct %26
+%57 = OpTypePointer Uniform %56
+%55 = OpVariable  %57  Uniform
+%59 = OpTypeStruct %32
+%60 = OpTypePointer Uniform %59
+%58 = OpVariable  %60  Uniform
+%62 = OpTypeStruct %35
+%63 = OpTypePointer Uniform %62
+%61 = OpVariable  %63  Uniform
+%65 = OpTypeStruct %38
+%66 = OpTypePointer Uniform %65
+%64 = OpVariable  %66  Uniform
+%70 = OpTypeFunction %2 %26
+%71 = OpTypePointer StorageBuffer %29
+%72 = OpTypePointer Uniform %26
+%73 = OpTypePointer StorageBuffer %27
+%74 = OpTypePointer Uniform %35
+%75 = OpTypePointer Uniform %31
+%76 = OpTypePointer Uniform %38
+%77 = OpTypePointer Uniform %32
+%80 = OpTypePointer Function %8
+%83 = OpTypeFunction %2
+%84 = OpConstant  %6  0
+%87 = OpTypePointer StorageBuffer %26
+%90 = OpTypePointer StorageBuffer %11
+%110 = OpTypePointer Function %11
+%112 = OpTypePointer Function %4
+%124 = OpTypePointer Workgroup %11
+%125 = OpTypePointer Uniform %37
+%126 = OpTypePointer Uniform %36
+%129 = OpTypePointer Uniform %34
+%130 = OpTypePointer Uniform %33
+%131 = OpTypePointer Uniform %30
+%136 = OpConstant  %6  7
+%142 = OpConstant  %6  6
+%144 = OpTypePointer StorageBuffer %28
+%145 = OpConstant  %6  1
+%148 = OpConstant  %6  5
+%150 = OpTypePointer Uniform %30
+%151 = OpTypePointer Uniform %11
+%152 = OpConstant  %6  3
+%155 = OpConstant  %6  4
+%157 = OpTypePointer StorageBuffer %11
+%168 = OpConstant  %6  256
+%69 = OpFunction  %2  None %70
+%68 = OpFunctionParameter  %26
+%67 = OpLabel
 OpBranch %78
 %78 = OpLabel
-%80 = OpCompositeConstruct  %25  %10 %10 %10
-%81 = OpAccessChain  %79  %77 %76
-OpStore %81 %80
-%83 = OpAccessChain  %82  %77 %76 %76
-OpStore %83 %10
-%84 = OpAccessChain  %82  %77 %76 %76
-OpStore %84 %14
-%85 = OpLoad  %8  %71
-%86 = OpAccessChain  %82  %77 %76 %85
-OpStore %86 %15
-%87 = OpLoad  %26  %77
-%88 = OpCompositeExtract  %25  %87 0
-%89 = OpCompositeExtract  %25  %87 0
-%90 = OpVectorShuffle  %27  %89 %89 2 0
-%91 = OpCompositeExtract  %25  %87 0
-%92 = OpFunctionCall  %2  %62 %91
-%93 = OpCompositeExtract  %25  %87 0
-%94 = OpVectorTimesMatrix  %25  %93 %37
-%95 = OpCompositeExtract  %25  %87 0
-%96 = OpMatrixTimesVector  %25  %37 %95
-%97 = OpCompositeExtract  %25  %87 0
-%98 = OpVectorTimesScalar  %25  %97 %14
-%99 = OpCompositeExtract  %25  %87 0
-%100 = OpVectorTimesScalar  %25  %99 %14
 OpReturn
 OpFunctionEnd
-%106 = OpFunction  %2  None %75
-%105 = OpLabel
-%101 = OpVariable  %102  Function %10
-%103 = OpVariable  %104  Function %23
-%107 = OpAccessChain  %66  %42 %76
-%108 = OpAccessChain  %64  %45 %76
-%109 = OpAccessChain  %68  %48 %76
-%110 = OpAccessChain  %65  %51 %76
-%111 = OpAccessChain  %69  %54 %76
-OpBranch %112
-%112 = OpLabel
-%113 = OpFunctionCall  %2  %74
-%115 = OpLoad  %31  %111
-%116 = OpLoad  %25  %110
-%117 = OpMatrixTimesVector  %27  %115 %116
-%118 = OpCompositeExtract  %11  %117 0
-%120 = OpAccessChain  %114  %38 %119
-OpStore %120 %118
-%123 = OpAccessChain  %82  %108 %122 %122
-%124 = OpLoad  %11  %123
-%126 = OpAccessChain  %114  %38 %125
-OpStore %126 %124
-%130 = OpAccessChain  %128  %109 %76 %129
-%131 = OpLoad  %11  %130
-%133 = OpAccessChain  %114  %38 %132
-OpStore %133 %131
-%135 = OpAccessChain  %134  %107 %122
-%136 = OpLoad  %11  %135
-%137 = OpAccessChain  %114  %38 %129
-OpStore %137 %136
-%138 = OpAccessChain  %82  %107 %76 %76
-%139 = OpLoad  %11  %138
-%140 = OpAccessChain  %114  %38 %22
-OpStore %140 %139
-%141 = OpAccessChain  %134  %107 %122
-OpStore %141 %21
-%142 = OpArrayLength  %6  %45 0
-%143 = OpConvertUToF  %11  %142
-%144 = OpAccessChain  %114  %38 %122
-OpStore %144 %143
-OpAtomicStore %40 %9 %145 %22
+%82 = OpFunction  %2  None %83
+%81 = OpLabel
+%79 = OpVariable  %80  Function %12
+%85 = OpAccessChain  %73  %46 %84
+OpBranch %86
+%86 = OpLabel
+%88 = OpCompositeConstruct  %26  %10 %10 %10
+%89 = OpAccessChain  %87  %85 %84
+OpStore %89 %88
+%91 = OpAccessChain  %90  %85 %84 %84
+OpStore %91 %10
+%92 = OpAccessChain  %90  %85 %84 %84
+OpStore %92 %14
+%93 = OpLoad  %8  %79
+%94 = OpAccessChain  %90  %85 %84 %93
+OpStore %94 %15
+%95 = OpLoad  %27  %85
+%96 = OpCompositeExtract  %26  %95 0
+%97 = OpCompositeExtract  %26  %95 0
+%98 = OpVectorShuffle  %28  %97 %97 2 0
+%99 = OpCompositeExtract  %26  %95 0
+%100 = OpFunctionCall  %2  %69 %99
+%101 = OpCompositeExtract  %26  %95 0
+%102 = OpVectorTimesMatrix  %26  %101 %41
+%103 = OpCompositeExtract  %26  %95 0
+%104 = OpMatrixTimesVector  %26  %41 %103
+%105 = OpCompositeExtract  %26  %95 0
+%106 = OpVectorTimesScalar  %26  %105 %14
+%107 = OpCompositeExtract  %26  %95 0
+%108 = OpVectorTimesScalar  %26  %107 %14
+OpReturn
+OpFunctionEnd
+%114 = OpFunction  %2  None %83
+%113 = OpLabel
+%109 = OpVariable  %110  Function %10
+%111 = OpVariable  %112  Function %24
+%115 = OpAccessChain  %73  %46 %84
+%116 = OpAccessChain  %71  %49 %84
+%117 = OpAccessChain  %75  %52 %84
+%118 = OpAccessChain  %72  %55 %84
+%119 = OpAccessChain  %77  %58 %84
+%120 = OpAccessChain  %74  %61 %84
+%121 = OpAccessChain  %76  %64 %84
+OpBranch %122
+%122 = OpLabel
+%123 = OpFunctionCall  %2  %82
+%127 = OpAccessChain  %126  %121 %84 %84
+%128 = OpLoad  %36  %127
+%132 = OpAccessChain  %131  %120 %84 %84 %84
+%133 = OpLoad  %30  %132
+%134 = OpMatrixTimesVector  %28  %128 %133
+%135 = OpCompositeExtract  %11  %134 0
+%137 = OpAccessChain  %124  %42 %136
+OpStore %137 %135
+%138 = OpLoad  %32  %119
+%139 = OpLoad  %26  %118
+%140 = OpMatrixTimesVector  %28  %138 %139
+%141 = OpCompositeExtract  %11  %140 0
+%143 = OpAccessChain  %124  %42 %142
+OpStore %143 %141
+%146 = OpAccessChain  %90  %116 %145 %145
+%147 = OpLoad  %11  %146
+%149 = OpAccessChain  %124  %42 %148
+OpStore %149 %147
+%153 = OpAccessChain  %151  %117 %84 %152
+%154 = OpLoad  %11  %153
+%156 = OpAccessChain  %124  %42 %155
+OpStore %156 %154
+%158 = OpAccessChain  %157  %115 %145
+%159 = OpLoad  %11  %158
+%160 = OpAccessChain  %124  %42 %152
+OpStore %160 %159
+%161 = OpAccessChain  %90  %115 %84 %84
+%162 = OpLoad  %11  %161
+%163 = OpAccessChain  %124  %42 %23
+OpStore %163 %162
+%164 = OpAccessChain  %157  %115 %145
+OpStore %164 %22
+%165 = OpArrayLength  %6  %49 0
+%166 = OpConvertUToF  %11  %165
+%167 = OpAccessChain  %124  %42 %145
+OpStore %167 %166
+OpAtomicStore %44 %9 %168 %23
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/tests/out/wgsl/globals.wgsl b/tests/out/wgsl/globals.wgsl
index 8dedd70394..0e48754507 100644
--- a/tests/out/wgsl/globals.wgsl
+++ b/tests/out/wgsl/globals.wgsl
@@ -18,7 +18,9 @@ var<uniform> global_vec: vec3<f32>;
 @group(0) @binding(5) 
 var<uniform> global_mat: mat3x2<f32>;
 @group(0) @binding(6) 
-var<uniform> global_nested_arrays_of_matrices: array<array<mat4x3<f32>,2>,2>;
+var<uniform> global_nested_arrays_of_matrices_4x4_: array<array<mat4x4<f32>,2>,2>;
+@group(0) @binding(7) 
+var<uniform> global_nested_arrays_of_matrices_4x2_: array<array<mat4x2<f32>,2>,2>;
 
 fn test_msl_packed_vec3_as_arg(arg: vec3<f32>) {
     return;
@@ -30,8 +32,8 @@ fn test_msl_packed_vec3_() {
     alignment.v3_ = vec3<f32>(1.0);
     alignment.v3_.x = 1.0;
     alignment.v3_.x = 2.0;
-    let _e22 = idx;
-    alignment.v3_[_e22] = 3.0;
+    let _e23 = idx;
+    alignment.v3_[_e23] = 3.0;
     let data = alignment;
     _ = data.v3_;
     _ = data.v3_.zx;
@@ -48,17 +50,20 @@ fn main() {
     var at: bool = true;
 
     test_msl_packed_vec3_();
-    let _e11 = global_mat;
-    let _e12 = global_vec;
-    wg[6] = (_e11 * _e12).x;
-    let _e20 = dummy[1].y;
-    wg[5] = _e20;
-    let _e26 = float_vecs[0].w;
-    wg[4] = _e26;
-    let _e30 = alignment.v1_;
-    wg[3] = _e30;
-    let _e35 = alignment.v3_.x;
-    wg[2] = _e35;
+    let _e16 = global_nested_arrays_of_matrices_4x2_[0][0];
+    let _e23 = global_nested_arrays_of_matrices_4x4_[0][0][0];
+    wg[7] = (_e16 * _e23).x;
+    let _e28 = global_mat;
+    let _e29 = global_vec;
+    wg[6] = (_e28 * _e29).x;
+    let _e37 = dummy[1].y;
+    wg[5] = _e37;
+    let _e43 = float_vecs[0].w;
+    wg[4] = _e43;
+    let _e47 = alignment.v1_;
+    wg[3] = _e47;
+    let _e52 = alignment.v3_.x;
+    wg[2] = _e52;
     alignment.v1_ = 4.0;
     wg[1] = f32(arrayLength((&dummy)));
     atomicStore((&at_1), 2u);