diff --git a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index eb9c4b3e5977..2850baf7a65e 100644
--- a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -586,7 +586,7 @@ void X86AvoidSFBPass::breakBlockedCopies(
       StDisp2 += OverlapDelta;
       Size2 -= OverlapDelta;
     }
-    Size1 = std::abs(std::abs(LdDisp2) - std::abs(LdDisp1));
+    Size1 = LdDisp2 - LdDisp1;
 
     // Build a copy for the point until the current blocking store's
     // displacement.
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2ba1174517ff..17d1f0359c52 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -522,11 +522,9 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
   }
 
   // Otherwise, there is an index.  The computation we will do will be modulo
-  // the pointer size, so get it.
-  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-
-  Offset &= PtrSizeMask;
-  VariableScale &= PtrSizeMask;
+  // the pointer size.
+  Offset = SignExtend64(Offset, IntPtrWidth);
+  VariableScale = SignExtend64(VariableScale, IntPtrWidth);
 
   // To do this transformation, any constant index must be a multiple of the
   // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
diff --git a/test/CodeGen/X86/pr39926.ll b/test/CodeGen/X86/pr39926.ll
new file mode 100644
index 000000000000..c22e4f2f9a8b
--- /dev/null
+++ b/test/CodeGen/X86/pr39926.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx | FileCheck %s
+define i8 @test_offset(i8* %base) {
+; CHECK-LABEL: test_offset:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movb $0, 7(%rdi)
+; CHECK-NEXT:    movw $0, 5(%rdi)
+; CHECK-NEXT:    movl $0, 1(%rdi)
+; CHECK-NEXT:    movl -4(%rdi), %eax
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movb (%rdi), %al
+; CHECK-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movl 1(%rdi), %eax
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movzwl 5(%rdi), %eax
+; CHECK-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movb 7(%rdi), %al
+; CHECK-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movl 8(%rdi), %eax
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movb -{{[0-9]+}}(%rsp), %al
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %z = alloca [128 x i8], align 16
+  %gep0 = getelementptr inbounds i8, i8* %base, i64 7
+  store volatile i8 0, i8* %gep0
+  %gep1 = getelementptr inbounds i8, i8* %base, i64 5
+  %bc1 = bitcast i8* %gep1 to i16*
+  store volatile i16 0, i16* %bc1
+  %gep2 = getelementptr inbounds i8, i8* %base, i64 1
+  %bc2 = bitcast i8* %gep2 to i32*
+  store volatile i32 0, i32* %bc2
+
+  %y1 = getelementptr inbounds i8, i8* %base, i64 -4
+  %y2 = bitcast [128 x i8]* %z to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %y2, i8* %y1, i64 16, i1 false)
+
+  %gep4 = getelementptr inbounds [128 x i8], [128 x i8]* %z, i64 0, i64 4
+  %ret = load i8, i8* %gep4
+  ret i8 %ret
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
diff --git a/test/Transforms/InstCombine/pr39908.ll b/test/Transforms/InstCombine/pr39908.ll
new file mode 100644
index 000000000000..bd7a82990ad8
--- /dev/null
+++ b/test/Transforms/InstCombine/pr39908.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "p:32:32"
+
+%S = type { [2 x i32] }
+
+define i1 @test([0 x %S]* %p, i32 %n) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %start.cast = bitcast [0 x %S]* %p to %S*
+  %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i32 0, i32 %n, i32 0, i32 0
+  %end.cast = bitcast i32* %end to %S*
+  %last = getelementptr inbounds %S, %S* %end.cast, i32 -1
+  %cmp = icmp eq %S* %last, %start.cast
+  ret i1 %cmp
+}
+
+; Same test using 64-bit indices.
+define i1 @test64([0 x %S]* %p, i64 %n) {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %start.cast = bitcast [0 x %S]* %p to %S*
+  %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i64 0, i64 %n, i32 0, i64 0
+  %end.cast = bitcast i32* %end to %S*
+  %last = getelementptr inbounds %S, %S* %end.cast, i64 -1
+  %cmp = icmp eq %S* %last, %start.cast
+  ret i1 %cmp
+}
+
+; Here the offset overflows and is treated modulo 2^32. This is UB.
+define i1 @test64_overflow([0 x %S]* %p, i64 %n) {
+; CHECK-LABEL: @test64_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %start.cast = bitcast [0 x %S]* %p to %S*
+  %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i64 0, i64 %n, i32 0, i64 8589934592
+  %end.cast = bitcast i32* %end to %S*
+  %last = getelementptr inbounds %S, %S* %end.cast, i64 -1
+  %cmp = icmp eq %S* %last, %start.cast
+  ret i1 %cmp
+}