diff --git a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index eb9c4b3e5977..2850baf7a65e 100644 --- a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -586,7 +586,7 @@ void X86AvoidSFBPass::breakBlockedCopies( StDisp2 += OverlapDelta; Size2 -= OverlapDelta; } - Size1 = std::abs(std::abs(LdDisp2) - std::abs(LdDisp1)); + Size1 = LdDisp2 - LdDisp1; // Build a copy for the point until the current blocking store's // displacement. diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2ba1174517ff..17d1f0359c52 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -522,11 +522,9 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, } // Otherwise, there is an index. The computation we will do will be modulo - // the pointer size, so get it. - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - Offset &= PtrSizeMask; - VariableScale &= PtrSizeMask; + // the pointer size. + Offset = SignExtend64(Offset, IntPtrWidth); + VariableScale = SignExtend64(VariableScale, IntPtrWidth); // To do this transformation, any constant index must be a multiple of the // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", diff --git a/test/CodeGen/X86/pr39926.ll b/test/CodeGen/X86/pr39926.ll new file mode 100644 index 000000000000..c22e4f2f9a8b --- /dev/null +++ b/test/CodeGen/X86/pr39926.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx | FileCheck %s +define i8 @test_offset(i8* %base) { +; CHECK-LABEL: test_offset: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movb $0, 7(%rdi) +; CHECK-NEXT: movw $0, 5(%rdi) +; CHECK-NEXT: movl $0, 1(%rdi) +; CHECK-NEXT: movl -4(%rdi), %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb (%rdi), %al +; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl 1(%rdi), %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movzwl 5(%rdi), %eax +; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb 7(%rdi), %al +; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl 8(%rdi), %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %z = alloca [128 x i8], align 16 + %gep0 = getelementptr inbounds i8, i8* %base, i64 7 + store volatile i8 0, i8* %gep0 + %gep1 = getelementptr inbounds i8, i8* %base, i64 5 + %bc1 = bitcast i8* %gep1 to i16* + store volatile i16 0, i16* %bc1 + %gep2 = getelementptr inbounds i8, i8* %base, i64 1 + %bc2 = bitcast i8* %gep2 to i32* + store volatile i32 0, i32* %bc2 + + %y1 = getelementptr inbounds i8, i8* %base, i64 -4 + %y2 = bitcast [128 x i8]* %z to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %y2, i8* %y1, i64 16, i1 false) + + %gep4 = getelementptr inbounds [128 x i8], [128 x i8]* %z, i64 0, i64 4 + %ret = load i8, i8* %gep4 + ret i8 %ret +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) diff --git a/test/Transforms/InstCombine/pr39908.ll b/test/Transforms/InstCombine/pr39908.ll new file mode 100644 index 000000000000..bd7a82990ad8 --- /dev/null +++ b/test/Transforms/InstCombine/pr39908.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "p:32:32" + +%S = type { [2 x i32] } + +define i1 @test([0 x %S]* %p, i32 %n) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP]] +; + %start.cast = bitcast [0 x %S]* %p to %S* + %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i32 0, i32 %n, i32 0, i32 0 + %end.cast = bitcast i32* %end to %S* + %last = getelementptr inbounds %S, %S* %end.cast, i32 -1 + %cmp = icmp eq %S* %last, %start.cast + ret i1 %cmp +} + +; Same test using 64-bit indices. +define i1 @test64([0 x %S]* %p, i64 %n) { +; CHECK-LABEL: @test64( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[CMP]] +; + %start.cast = bitcast [0 x %S]* %p to %S* + %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i64 0, i64 %n, i32 0, i64 0 + %end.cast = bitcast i32* %end to %S* + %last = getelementptr inbounds %S, %S* %end.cast, i64 -1 + %cmp = icmp eq %S* %last, %start.cast + ret i1 %cmp +} + +; Here the offset overflows and is treated modulo 2^32. This is UB. +define i1 @test64_overflow([0 x %S]* %p, i64 %n) { +; CHECK-LABEL: @test64_overflow( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[CMP]] +; + %start.cast = bitcast [0 x %S]* %p to %S* + %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i64 0, i64 %n, i32 0, i64 8589934592 + %end.cast = bitcast i32* %end to %S* + %last = getelementptr inbounds %S, %S* %end.cast, i64 -1 + %cmp = icmp eq %S* %last, %start.cast + ret i1 %cmp +}