Miscompilation corrupts stack-allocated vectors #63475

cbeuw · 2023-06-23T16:23:16Z

This should print 42 42 42 42 42 42 42 , but prints 42 0 42 0 42 42 42 with clang or opt -O3 https://godbolt.org/z/8v3d7enK8

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: noinline
define internal fastcc void @_ZN5repro11black_box_217h2c9aef2f201b1074E(i128 %val0, i64 %val1) #0 {
start:
  %0 = alloca [0 x [0 x [0 x i8]]], i32 0, align 1
  %_4 = icmp eq i128 %val0, 0
  %1 = zext i1 %_4 to i8
  store i8 %1, ptr %0, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr %0)
  %_7 = icmp eq i64 %val1, 0
  %2 = zext i1 %_7 to i8
  store i8 %2, ptr %0, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr null)
  ret void
}

define void @fn1(i1 %0) #1 {
start:
  %1 = alloca [0 x [0 x [0 x [5 x i32]]]], i32 0, align 4
  %2 = alloca [7 x i32], align 4
  store <7 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, ptr %2, align 4
  call fastcc void @_ZN5repro4fn1117ha0d291cafd330a2bE(i64 0, i128 0, ptr %1, i128 0, ptr %2)
  br i1 %0, label %bb2.preheader.i, label %_ZN5repro3fn517h51e49bf383c47da1E.exit

bb2.preheader.i:                                  ; preds = %start
  call fastcc void @_ZN5repro4fn1117ha0d291cafd330a2bE(i64 1, i128 1, ptr null, i128 1, ptr null)
  br label %_ZN5repro3fn517h51e49bf383c47da1E.exit

_ZN5repro3fn517h51e49bf383c47da1E.exit:           ; preds = %bb2.preheader.i, %start
  ret void
}

define internal fastcc void @_ZN5repro4fn1117ha0d291cafd330a2bE(i64 %_1, i128 %_3, ptr %_4, i128 %_5.1, ptr %_7) #1 personality ptr null {
start:
  %0 = alloca [0 x [0 x [0 x i8]]], i32 0, align 1
  %fmt.i = alloca [4 x i8], align 1
  %1 = alloca [5 x i32], align 4
  %2 = alloca [4 x i128], align 8
  %_8 = alloca [7 x i32], align 4
  %3 = load <7 x i32>, ptr %_7, align 4
  store <7 x i32> %3, ptr %_8, align 4
  tail call fastcc void @_ZN5repro11black_box_217h2c9aef2f201b1074E(i128 %_3, i64 %_1)
  store i128 %_5.1, ptr %2, align 8
  %4 = load i8, ptr %_4, align 1
  store i8 %4, ptr %1, align 4
  %bcmp.i.i.i = call i32 @bcmp(ptr %_4, ptr %_8, i64 28)
  %5 = icmp eq i32 %bcmp.i.i.i, 0
  %6 = zext i1 %5 to i8
  store i8 %6, ptr %_4, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr %0)
  %bcmp.i.i3.i = call i32 @bcmp(ptr %_4, ptr %2, i64 64)
  %7 = icmp eq i32 %bcmp.i.i3.i, 0
  %8 = zext i1 %7 to i8
  store i8 %8, ptr %_4, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr null)
  %bcmp.i.i4.i = call i32 @bcmp(ptr %_4, ptr %1, i64 20)
  %9 = icmp eq i32 %bcmp.i.i4.i, 0
  %10 = zext i1 %9 to i8
  store i8 %10, ptr %_4, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr null)
  store i8 37, ptr %fmt.i, align 1
  %11 = getelementptr [4 x i8], ptr %fmt.i, i64 0, i64 1
  store i8 100, ptr %11, align 1
  %12 = getelementptr [4 x i8], ptr %fmt.i, i64 0, i64 2
  store i8 32, ptr %12, align 1
  %iter.i.sroa.10.16.vec.extract = extractelement <7 x i32> %3, i64 0
  %_44.i = call i32 (ptr, ...) @printf(ptr %fmt.i, i32 %iter.i.sroa.10.16.vec.extract)
  %iter.i.sroa.10.20.vec.extract = extractelement <7 x i32> %3, i64 1
  %_44.i.1 = call i32 (ptr, ...) @printf(ptr %fmt.i, i32 %iter.i.sroa.10.20.vec.extract)
  %iter.i.sroa.10.24.vec.extract = extractelement <7 x i32> %3, i64 2
  %_44.i.2 = call i32 (ptr, ...) @printf(ptr %fmt.i, i32 %iter.i.sroa.10.24.vec.extract)
  %iter.i.sroa.10.28.vec.extract = extractelement <7 x i32> %3, i64 3
  %_44.i.3 = call i32 (ptr, ...) @printf(ptr %fmt.i, i32 %iter.i.sroa.10.28.vec.extract)
  %iter.i.sroa.10.32.vec.extract = extractelement <7 x i32> %3, i64 4
  %_44.i.4 = call i32 (ptr, ...) @printf(ptr %fmt.i, i32 %iter.i.sroa.10.32.vec.extract)
  %iter.i.sroa.10.36.vec.extract = extractelement <7 x i32> %3, i64 5
  %_44.i.5 = call i32 (ptr, ...) @printf(ptr %fmt.i, i32 %iter.i.sroa.10.36.vec.extract)
  %_44.i.6 = call i32 (ptr, ...) @printf(ptr %fmt.i, i32 %iter.i.sroa.10.16.vec.extract)
  ret void
}

declare i32 @printf(ptr, ...)

define i32 @main() {
top:
  call void @fn1(i1 false)
  ret i32 0
}

declare i32 @bcmp(ptr, ptr, i64)

; uselistorder directives
uselistorder ptr null, { 1, 2, 6, 7, 0, 3, 4, 5 }
uselistorder ptr @_ZN5repro4fn1117ha0d291cafd330a2bE, { 1, 0 }
uselistorder ptr @printf, { 6, 5, 4, 3, 2, 1, 0 }
uselistorder ptr @bcmp, { 2, 1, 0 }

attributes #0 = { noinline }
attributes #1 = { "target-cpu"="x86-64" }

The above was from llvm-reduce. I don't know if it broke something so I attached the original IR below. This is compiled from Rust but I've patched out the symbols from Rust std so has no dependency on Rust.

original IR

; ModuleID = 'repro.46f743e1561fb24e-cgu.0'
source_filename = "repro.46f743e1561fb24e-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%Adt56 = type { { i128, i128, { i16, i128 }, i64, i32, [1 x i32] }, %Adt55 }
%Adt55 = type { %Adt54 }
%Adt54 = type { { i128, ptr }, { i128, i128, { i16, i128 }, i64, i32, [1 x i32] } }

@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h0eee5ecdc5932091E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hf0de4a394f8e37a1E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h991b85cf75f57f3aE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h991b85cf75f57f3aE" }>, align 8
@alloc_a00f8a95864fc305bf508c11187211d8 = private unnamed_addr constant <{ [28 x i8] }> zeroinitializer, align 4
@alloc_4f40612ab7406a7d1f3f0640c8ea0fb4 = private unnamed_addr constant <{ [64 x i8] }> zeroinitializer, align 8
@alloc_ee0548ff1320ae5be168b83ab0b060cd = private unnamed_addr constant <{ [20 x i8] }> <{ [20 x i8] c"a\00\00\00a\00\00\00a\00\00\00a\00\00\00a\00\00\00" }>, align 4

; std::sys_common::backtrace::__rust_begin_short_backtrace
; Function Attrs: noinline nonlazybind uwtable
define internal fastcc void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h3230664098c98715E(ptr nocapture noundef nonnull readonly %f) unnamed_addr #0 {
start:
  tail call void %f()
  tail call void asm sideeffect "", "~{memory}"() #10, !srcloc !3
  ret void
}


; std::rt::lang_start::{{closure}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal noundef i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h991b85cf75f57f3aE"(ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %_1) unnamed_addr #2 {
start:
  %_4 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
; call std::sys_common::backtrace::__rust_begin_short_backtrace
  tail call fastcc void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h3230664098c98715E(ptr noundef nonnull %_4)
  ret i32 0
}

; core::ops::function::FnOnce::call_once{{vtable.shim}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal noundef i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hf0de4a394f8e37a1E"(ptr nocapture noundef readonly %_1) unnamed_addr #2 personality ptr @rust_eh_personality {
start:
  %0 = load ptr, ptr %_1, align 8, !nonnull !4, !noundef !4
; call std::sys_common::backtrace::__rust_begin_short_backtrace
  tail call fastcc void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h3230664098c98715E(ptr noundef nonnull %0), !noalias !5
  ret i32 0
}

; core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}>
; Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable
define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h0eee5ecdc5932091E"(ptr noalias nocapture readnone align 8 %_1) unnamed_addr #3 {
start:
  ret void
}

; repro::black_box_1
; Function Attrs: noinline nonlazybind uwtable
define internal fastcc void @_ZN5repro11black_box_117h2948a258b3403becE(ptr noalias nocapture noundef readonly dereferenceable(28) %val3) unnamed_addr #0 {
start:
  %0 = alloca i8, align 1
  %bcmp.i.i = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(28) @alloc_a00f8a95864fc305bf508c11187211d8, ptr noundef nonnull dereferenceable(28) %val3, i64 28)
  %1 = icmp eq i32 %bcmp.i.i, 0
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %0)
  %2 = zext i1 %1 to i8
  store i8 %2, ptr %0, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #10, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %0)
  ret void
}

; repro::black_box_2
; Function Attrs: noinline nonlazybind uwtable
define internal fastcc void @_ZN5repro11black_box_217h2c9aef2f201b1074E(i128 noundef %val0, i64 noundef %val1) unnamed_addr #0 {
start:
  %0 = alloca i8, align 1
  %1 = alloca i8, align 1
  %_4 = icmp eq i128 %val0, 0
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %1)
  %2 = zext i1 %_4 to i8
  store i8 %2, ptr %1, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %1) #10, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %1)
  %_7 = icmp eq i64 %val1, 0
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %0)
  %3 = zext i1 %_7 to i8
  store i8 %3, ptr %0, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #10, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %0)
  ret void
}

; Function Attrs: nonlazybind uwtable
define dso_local void @fn1() unnamed_addr #1 {
start:
  %0 = alloca [7 x i32], align 4
  %1 = alloca [5 x i32], align 4
  %2 = alloca [7 x i32], align 4
  %3 = alloca [5 x i32], align 4
  %_2.i = alloca %Adt56, align 8
  %_1 = alloca [7 x i32], align 4
  store i32 42, ptr %_1, align 4
  %4 = getelementptr inbounds i32, ptr %_1, i64 1
  store i32 42, ptr %4, align 4
  %5 = getelementptr inbounds i32, ptr %_1, i64 2
  store i32 42, ptr %5, align 4
  %6 = getelementptr inbounds i32, ptr %_1, i64 3
  store i32 42, ptr %6, align 4
  %7 = getelementptr inbounds i32, ptr %_1, i64 4
  store i32 42, ptr %7, align 4
  %8 = getelementptr inbounds i32, ptr %_1, i64 5
  store i32 42, ptr %8, align 4
  %9 = getelementptr inbounds i32, ptr %_1, i64 6
  store i32 42, ptr %9, align 4
  %10 = load <7 x i32>, ptr %_1, align 4
  call void @llvm.lifetime.start.p0(i64 168, ptr nonnull %_2.i)
  %11 = getelementptr inbounds %Adt56, ptr %_2.i, i64 0, i32 1, i32 0, i32 1
  %12 = getelementptr inbounds %Adt56, ptr %_2.i, i64 0, i32 1, i32 0, i32 1, i32 3
  store i64 0, ptr %12, align 8, !noalias !8
  %13 = getelementptr inbounds %Adt56, ptr %_2.i, i64 0, i32 1, i32 0, i32 1, i32 2
  store i16 -21983, ptr %13, align 8, !noalias !8
  %14 = getelementptr inbounds %Adt56, ptr %_2.i, i64 0, i32 1, i32 0, i32 1, i32 2, i32 1
  store i128 0, ptr %14, align 8, !noalias !8
  store i128 0, ptr %11, align 8, !noalias !8
  %15 = getelementptr inbounds %Adt56, ptr %_2.i, i64 0, i32 1, i32 0, i32 0, i32 1
  store ptr %_2.i, ptr %15, align 8, !noalias !8
  call void @llvm.lifetime.start.p0(i64 20, ptr nonnull %3), !noalias !8
  store i32 97, ptr %3, align 4, !noalias !8
  %_3.sroa.3.0..sroa_idx.i = getelementptr inbounds i8, ptr %3, i64 4
  store i32 97, ptr %_3.sroa.3.0..sroa_idx.i, align 4, !noalias !8
  %_3.sroa.4.0..sroa_idx.i = getelementptr inbounds i8, ptr %3, i64 8
  store i32 97, ptr %_3.sroa.4.0..sroa_idx.i, align 4, !noalias !8
  %_3.sroa.5.0..sroa_idx.i = getelementptr inbounds i8, ptr %3, i64 12
  store i32 97, ptr %_3.sroa.5.0..sroa_idx.i, align 4, !noalias !8
  %_3.sroa.6.0..sroa_idx.i = getelementptr inbounds i8, ptr %3, i64 16
  store i32 97, ptr %_3.sroa.6.0..sroa_idx.i, align 4, !noalias !8
  call void @llvm.lifetime.start.p0(i64 28, ptr nonnull %2), !noalias !8
  store <7 x i32> %10, ptr %2, align 4, !noalias !8
; call repro::fn11
  call fastcc void @_ZN5repro4fn1117ha0d291cafd330a2bE(i64 noundef 0, i128 noundef 0, ptr noalias nocapture noundef nonnull readonly dereferenceable(20) %3, i128 noundef 0, ptr noalias nocapture noundef nonnull readonly dereferenceable(28) %2)
  call void @llvm.lifetime.end.p0(i64 20, ptr nonnull %3), !noalias !8
  call void @llvm.lifetime.end.p0(i64 28, ptr nonnull %2), !noalias !8
  %16 = load i16, ptr %13, align 8, !noalias !8, !noundef !4
  %17 = icmp eq i16 %16, 2
  br i1 %17, label %bb2.preheader.i, label %_ZN5repro3fn517h51e49bf383c47da1E.exit

bb2.preheader.i:                                  ; preds = %start
  %_3.sroa.3.0..sroa_idx3.i = getelementptr inbounds i8, ptr %1, i64 4
  %_3.sroa.4.0..sroa_idx5.i = getelementptr inbounds i8, ptr %1, i64 8
  %_3.sroa.5.0..sroa_idx7.i = getelementptr inbounds i8, ptr %1, i64 12
  %_3.sroa.6.0..sroa_idx9.i = getelementptr inbounds i8, ptr %1, i64 16
  br label %bb2.i

bb2.i:                                            ; preds = %bb2.i, %bb2.preheader.i
  %18 = load i64, ptr %12, align 8, !noalias !8, !noundef !4
  %19 = load i128, ptr %11, align 8, !noalias !8, !noundef !4
  call void @llvm.lifetime.start.p0(i64 20, ptr nonnull %1), !noalias !8
  store i32 97, ptr %1, align 4, !noalias !8
  store i32 97, ptr %_3.sroa.3.0..sroa_idx3.i, align 4, !noalias !8
  store i32 97, ptr %_3.sroa.4.0..sroa_idx5.i, align 4, !noalias !8
  store i32 97, ptr %_3.sroa.5.0..sroa_idx7.i, align 4, !noalias !8
  store i32 97, ptr %_3.sroa.6.0..sroa_idx9.i, align 4, !noalias !8
  %20 = load i128, ptr %14, align 8, !noalias !8, !noundef !4
  call void @llvm.lifetime.start.p0(i64 28, ptr nonnull %0), !noalias !8
  store <7 x i32> %10, ptr %0, align 4, !noalias !8
; call repro::fn11
  call fastcc void @_ZN5repro4fn1117ha0d291cafd330a2bE(i64 noundef %18, i128 noundef %19, ptr noalias nocapture noundef nonnull readonly dereferenceable(20) %1, i128 noundef %20, ptr noalias nocapture noundef nonnull readonly dereferenceable(28) %0)
  call void @llvm.lifetime.end.p0(i64 20, ptr nonnull %1), !noalias !8
  call void @llvm.lifetime.end.p0(i64 28, ptr nonnull %0), !noalias !8
  %21 = load i16, ptr %13, align 8, !noalias !8, !noundef !4
  %22 = icmp eq i16 %21, 2
  br i1 %22, label %bb2.i, label %_ZN5repro3fn517h51e49bf383c47da1E.exit

_ZN5repro3fn517h51e49bf383c47da1E.exit:           ; preds = %bb2.i, %start
  call void @llvm.lifetime.end.p0(i64 168, ptr nonnull %_2.i)
; call repro::black_box_1
  call fastcc void @_ZN5repro11black_box_117h2948a258b3403becE(ptr noalias nocapture noundef nonnull readonly dereferenceable(28) %_1)
  ret void
}

; repro::fn11
; Function Attrs: nonlazybind uwtable
define internal fastcc void @_ZN5repro4fn1117ha0d291cafd330a2bE(i64 noundef %_1, i128 noundef %_3, ptr noalias nocapture noundef readonly dereferenceable(20) %_4, i128 noundef %_5.1, ptr noalias nocapture noundef readonly dereferenceable(28) %_7) unnamed_addr #1 personality ptr @rust_eh_personality {
start:
  %0 = alloca i8, align 1
  %1 = alloca i8, align 1
  %2 = alloca i8, align 1
  %3 = alloca i8, align 1
  %4 = alloca i8, align 1
  %5 = alloca i8, align 1
  %lf.i = alloca [2 x i8], align 1
  %fmt.i = alloca [4 x i8], align 1
  %6 = alloca [5 x i32], align 4
  %7 = alloca [4 x i128], align 8
  %_8 = alloca [7 x i32], align 4
  %8 = load <7 x i32>, ptr %_7, align 4
  store <7 x i32> %8, ptr %_8, align 4
; call repro::black_box_2
  tail call fastcc void @_ZN5repro11black_box_217h2c9aef2f201b1074E(i128 noundef %_3, i64 noundef %_1)
  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %7)
  %_12.sroa.3.0..sroa_idx = getelementptr inbounds i8, ptr %7, i64 32
  call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %7, i8 0, i64 32, i1 false)
  store i128 %_5.1, ptr %_12.sroa.3.0..sroa_idx, align 8
  %_12.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %7, i64 48
  store i128 0, ptr %_12.sroa.4.0..sroa_idx, align 8
  call void @llvm.lifetime.start.p0(i64 20, ptr nonnull %6)
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(20) %6, ptr noundef nonnull align 4 dereferenceable(20) %_4, i64 20, i1 false)
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %5), !noalias !11
  store i8 0, ptr %5, align 1, !noalias !11
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %5) #10, !noalias !11, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %5), !noalias !11
  %bcmp.i.i.i = call i32 @bcmp(ptr noundef nonnull dereferenceable(28) @alloc_a00f8a95864fc305bf508c11187211d8, ptr noundef nonnull dereferenceable(28) %_8, i64 28), !noalias !16
  %9 = icmp eq i32 %bcmp.i.i.i, 0
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %4), !noalias !11
  %10 = zext i1 %9 to i8
  store i8 %10, ptr %4, align 1, !noalias !11
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %4) #10, !noalias !11, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %4), !noalias !11
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %3), !noalias !11
  store i8 1, ptr %3, align 1, !noalias !11
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %3) #10, !noalias !11, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %3), !noalias !11
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %2), !noalias !11
  store i8 1, ptr %2, align 1, !noalias !11
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %2) #10, !noalias !11, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %2), !noalias !11
  %bcmp.i.i3.i = call i32 @bcmp(ptr noundef nonnull dereferenceable(64) @alloc_4f40612ab7406a7d1f3f0640c8ea0fb4, ptr noundef nonnull dereferenceable(64) %7, i64 64), !noalias !17
  %11 = icmp eq i32 %bcmp.i.i3.i, 0
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %1), !noalias !11
  %12 = zext i1 %11 to i8
  store i8 %12, ptr %1, align 1, !noalias !11
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %1) #10, !noalias !11, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %1), !noalias !11
  %bcmp.i.i4.i = call i32 @bcmp(ptr noundef nonnull dereferenceable(20) @alloc_ee0548ff1320ae5be168b83ab0b060cd, ptr noundef nonnull dereferenceable(20) %6, i64 20), !noalias !18
  %13 = icmp eq i32 %bcmp.i.i4.i, 0
  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %0), !noalias !11
  %14 = zext i1 %13 to i8
  store i8 %14, ptr %0, align 1, !noalias !11
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #10, !noalias !11, !srcloc !3
  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %0), !noalias !11
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %fmt.i), !noalias !11
  store i8 37, ptr %fmt.i, align 1, !noalias !11
  %15 = getelementptr inbounds [4 x i8], ptr %fmt.i, i64 0, i64 1
  store i8 100, ptr %15, align 1, !noalias !11
  %16 = getelementptr inbounds [4 x i8], ptr %fmt.i, i64 0, i64 2
  store i8 32, ptr %16, align 1, !noalias !11
  %17 = getelementptr inbounds [4 x i8], ptr %fmt.i, i64 0, i64 3
  store i8 0, ptr %17, align 1, !noalias !11
  %iter.i.sroa.10.16.vec.extract = extractelement <7 x i32> %8, i64 0
  %_44.i = call noundef i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) %fmt.i, i32 noundef %iter.i.sroa.10.16.vec.extract), !noalias !11
  %iter.i.sroa.10.20.vec.extract = extractelement <7 x i32> %8, i64 1
  %_44.i.1 = call noundef i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) %fmt.i, i32 noundef %iter.i.sroa.10.20.vec.extract), !noalias !11
  %iter.i.sroa.10.24.vec.extract = extractelement <7 x i32> %8, i64 2
  %_44.i.2 = call noundef i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) %fmt.i, i32 noundef %iter.i.sroa.10.24.vec.extract), !noalias !11
  %iter.i.sroa.10.28.vec.extract = extractelement <7 x i32> %8, i64 3
  %_44.i.3 = call noundef i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) %fmt.i, i32 noundef %iter.i.sroa.10.28.vec.extract), !noalias !11
  %iter.i.sroa.10.32.vec.extract = extractelement <7 x i32> %8, i64 4
  %_44.i.4 = call noundef i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) %fmt.i, i32 noundef %iter.i.sroa.10.32.vec.extract), !noalias !11
  %iter.i.sroa.10.36.vec.extract = extractelement <7 x i32> %8, i64 5
  %_44.i.5 = call noundef i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) %fmt.i, i32 noundef %iter.i.sroa.10.36.vec.extract), !noalias !11
  %iter.i.sroa.10.40.vec.extract = extractelement <7 x i32> %8, i64 6
  %_44.i.6 = call noundef i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) %fmt.i, i32 noundef %iter.i.sroa.10.40.vec.extract), !noalias !11
  call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %lf.i), !noalias !11
  store i8 10, ptr %lf.i, align 1, !noalias !11
  %18 = getelementptr inbounds [2 x i8], ptr %lf.i, i64 0, i64 1
  store i8 0, ptr %18, align 1, !noalias !11
  %_50.i = call noundef i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) %lf.i), !noalias !11
  call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %lf.i), !noalias !11
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %fmt.i), !noalias !11
  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %7)
  call void @llvm.lifetime.end.p0(i64 20, ptr nonnull %6)
  ret void
}

; repro::main
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro4main17hc1028cd349e9622cE() unnamed_addr #1 {
start:
  tail call void @fn1()
  ret void
}

; Function Attrs: nonlazybind uwtable
define internal i32 @rust_eh_personality(i32 noundef, i32 noundef, i64 noundef, ptr noundef, ptr noundef) unnamed_addr #1 {
start:
  ret i32 0
}

; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #4

; Function Attrs: nofree nounwind nonlazybind uwtable
declare noundef i32 @printf(ptr nocapture noundef readonly, ...) unnamed_addr #5

; Function Attrs: nonlazybind
define i32 @main(i32 %0, ptr %1) unnamed_addr #6 {
top:
  call void @fn1()
  ret i32 0
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #7

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #7

; Function Attrs: nofree nounwind nonlazybind willreturn memory(argmem: read)
declare i32 @bcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #8

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #9

attributes #0 = { noinline nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { inlinehint nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { inlinehint mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #4 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #5 = { nofree nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #6 = { nonlazybind "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #7 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #8 = { nofree nounwind nonlazybind willreturn memory(argmem: read) }
attributes #9 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #10 = { nounwind }

!llvm.module.flags = !{!0, !1, !2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 2, !"RtLibUseGOT", i32 1}
!3 = !{i32 704612}
!4 = !{}
!5 = !{!6}
!6 = distinct !{!6, !7, !"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h991b85cf75f57f3aE: %_1"}
!7 = distinct !{!7, !"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h991b85cf75f57f3aE"}
!8 = !{!9}
!9 = distinct !{!9, !10, !"_ZN5repro3fn517h51e49bf383c47da1E: %_1"}
!10 = distinct !{!10, !"_ZN5repro3fn517h51e49bf383c47da1E"}
!11 = !{!12, !14, !15}
!12 = distinct !{!12, !13, !"_ZN5repro8dump_var17h8ee3b87d4b793436E: %val0"}
!13 = distinct !{!13, !"_ZN5repro8dump_var17h8ee3b87d4b793436E"}
!14 = distinct !{!14, !13, !"_ZN5repro8dump_var17h8ee3b87d4b793436E: %val2"}
!15 = distinct !{!15, !13, !"_ZN5repro8dump_var17h8ee3b87d4b793436E: %val3"}
!16 = !{!14, !15}
!17 = !{!12, !15}
!18 = !{!12, !14}

The text was updated successfully, but these errors were encountered:

efriedma-quic · 2023-06-23T23:48:33Z

I don't think your reduction is correct; it looks like it involves accessing zero-byte allocations.

Generally, the first tool I reach for to reduce miscompiles is opt-bisect-limit (https://llvm.org/docs/OptBisect.html).

cbeuw · 2023-06-24T07:47:03Z

I removed all the zero-byte allocas: https://godbolt.org/z/jEbPc1P94

nikic · 2023-06-27T14:39:41Z

Looks like there is an ABI mismatch. The arguments are pushed via pushq at 8 byte offsets and then read via movl at 4 byte offsets.

nikic · 2023-06-27T15:09:18Z

Here's a reduction:

define void @caller() nounwind {
  call void @callee(ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, <7 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>)
  ret void
}

define void @callee(ptr %p0, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, <7 x i32> %arg) nounwind {
start:
  %alloca = alloca [7 x i32], align 4
  store <7 x i32> %arg, ptr %alloca, align 4
  %extract0 = extractelement <7 x i32> %arg, i64 0
  call void @use(i32 %extract0)
  %extract1 = extractelement <7 x i32> %arg, i64 1
  call void @use(i32 %extract1)
  %extract2 = extractelement <7 x i32> %arg, i64 2
  call void @use(i32 %extract2)
  %extract3 = extractelement <7 x i32> %arg, i64 3
  call void @use(i32 %extract3)
  %extract4 = extractelement <7 x i32> %arg, i64 4
  call void @use(i32 %extract4)
  %extract5 = extractelement <7 x i32> %arg, i64 5
  call void @use(i32 %extract5)
  %extract6 = extractelement <7 x i32> %arg, i64 6
  call void @use(i32 %extract6)
  %extract7 = extractelement <7 x i32> %arg, i64 7
  call void @use(i32 %extract7)
  ret void
}

declare void @use(i32)

The caller does:

	pushq	$42
	pushq	$42
	pushq	$42
	pushq	$42
	pushq	$42
	pushq	$42
	pushq	$42
	callq	callee@PLT

The callee does:

	movl	112(%rsp), %ebx
	movl	104(%rsp), %ebp
	movl	96(%rsp), %r14d
	movl	76(%rsp), %r15d
	movl	72(%rsp), %r12d
	movl	64(%rsp), %edi
	movl	68(%rsp), %r13d

If we drop the store, then the offsets are correct (don't mind the different base):

	movl	144(%rsp), %ebx
	movl	136(%rsp), %ebp
	movl	128(%rsp), %r14d
	movl	120(%rsp), %r15d
	movl	112(%rsp), %r12d
	movl	104(%rsp), %r13d
	movl	96(%rsp), %edi

So this is again in some way related to the arg copy elision optimization.

nikic · 2023-06-29T13:15:39Z

This seems to be related to the code in X86ISelLowering::LowerMemArgument() handling isCopyElisionCandidate(). It checks for ScalarizedAndExtendedVector, but does so by inspecting the size of the LocVT. However, if I'm understanding this right, in this case the LocVT is i32 matching the vector size, but this doesn't match the size of the stack slot, which is 8.

I'm not sure if there's any easy way to access that stack slot size though... CCAssignVal only stores the start offset.

nikic · 2023-06-29T13:47:16Z

Candidate patch: https://reviews.llvm.org/D154078

llvmbot · 2023-06-29T13:54:09Z

@llvm/issue-subscribers-backend-x86

nikic · 2023-07-03T09:28:16Z

@cbeuw Do you have the original Rust code that lead to this issue? I find it suspicious that we end up with illegal vector types in optimized IR -- unless you did something with repr(simd) I don't think that's supposed to happen.

cbeuw · 2023-07-03T09:48:36Z

@nikic I have the unreduced code in custom MIR: https://godbolt.org/z/7q6q8eK96. But I don't have the reduced one around any more... I'm happy to run the minimisation script again though if needed.

This isn't reproducible from surface Rust, which is why I opened a bug report with LLVM directly. The reproduction required a Move operand of an array local in a function call, where the same local was previously used. This MIR cannot be built from surface Rust as MIR building creates temporary copies for all Move operands in Call. The local that gets moved is assigned to and used exactly once. If you change Move(_16) to _16 on line 3200 then the bug goes away.

cbeuw · 2023-07-03T09:54:30Z

By illegal vector types do you mean the zero-byte [0 x [0 x [0 x i8]]]s? They weren't from rustc, they were from llvm-reduce. The IR from Rust was folded under original IR in the OP.

nikic · 2023-07-03T10:56:16Z

"Illegal vector type" here refers to the non-power-of-two vectors, which are not natively supported by the target. They are already part of the input IR, and the most likely culprit for that is rust-lang/rust#111999.

I wonder whether it would make sense to prevent argument promotion for such types, as the legalized argument passing for such vectors can be substantially worse than just passing them indirectly (and it makes it more likely to hit legalization bugs like #63608).

When eliding argument copies, the memory layout between a plain store of the type and the layout of the argument lowering on the stack must match. For multi-part argument lowerings, this is not necessarily the case. The code already tried to prevent this optimization for "scalarized and extended" vectors, but the check for "extends" was incomplete. While a scalarized vector of i32s stores i32 values on the stack, these are stored in 8 byte stack slots (on x86_64), so effectively have padding. Rather than trying to add more special cases to handle this (which is not straightforward), I'm going in the other direction and exclude scalarized vectors from this optimization entirely. This seems like a rare case that is not worth the hassle -- the complete lack of test coverage is not reassuring either. Fixes llvm/llvm-project#63475. Differential Revision: https://reviews.llvm.org/D154078

github-actions bot added the new issue label Jun 23, 2023

cbeuw mentioned this issue Jun 23, 2023

Miscompilation on aarch64-apple-darwin (likely a calling convention bug) rust-lang/rust#112548

Closed

tbaederr added llvm:optimizations and removed new issue labels Jun 23, 2023

nikic added the miscompilation label Jun 23, 2023

nikic self-assigned this Jun 27, 2023

EugeneZelenko added backend:X86 and removed llvm:optimizations labels Jun 29, 2023

nikic mentioned this issue Jul 3, 2023

spurious crash when compiling cargo on dist-mips64{el}-linux rust-lang/rust#113065

Closed

nikic closed this as completed in 7025ac8 Jul 13, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Miscompilation corrupts stack-allocated vectors #63475

Miscompilation corrupts stack-allocated vectors #63475

cbeuw commented Jun 23, 2023 •

edited

Loading

efriedma-quic commented Jun 23, 2023

cbeuw commented Jun 24, 2023

nikic commented Jun 27, 2023 •

edited

Loading

nikic commented Jun 27, 2023

nikic commented Jun 29, 2023

nikic commented Jun 29, 2023

llvmbot commented Jun 29, 2023

nikic commented Jul 3, 2023

cbeuw commented Jul 3, 2023

cbeuw commented Jul 3, 2023

nikic commented Jul 3, 2023

Miscompilation corrupts stack-allocated vectors #63475

Miscompilation corrupts stack-allocated vectors #63475

Comments

cbeuw commented Jun 23, 2023 • edited Loading

efriedma-quic commented Jun 23, 2023

cbeuw commented Jun 24, 2023

nikic commented Jun 27, 2023 • edited Loading

nikic commented Jun 27, 2023

nikic commented Jun 29, 2023

nikic commented Jun 29, 2023

llvmbot commented Jun 29, 2023

nikic commented Jul 3, 2023

cbeuw commented Jul 3, 2023

cbeuw commented Jul 3, 2023

nikic commented Jul 3, 2023

cbeuw commented Jun 23, 2023 •

edited

Loading

nikic commented Jun 27, 2023 •

edited

Loading