Skip to content

Commit

Permalink
[ARM][ParallelDSP] Change search for muls
Browse files Browse the repository at this point in the history
rL369567 reverted a couple of recent changes made to ARMParallelDSP
because of a miscompilation error: PR43073.

The issue stemmed from an underlying bug that was caused by adding
muls into a reduction before it was proved that they could be executed
in parallel with another mul.

Most of the changes here are from the previously reverted commits.
The additional changes have been made area:
1) The Search function now doesn't insert any muls into the Reduction
   object. That now happens once the search has successfully finished.
2) For any muls added into the reduction but that weren't paired, we
   accumulate their values as an input into the smlad.

Differential Revision: https://reviews.llvm.org/D66660

llvm-svn: 370171
  • Loading branch information
sparker-arm committed Aug 28, 2019
1 parent 207c653 commit a761ba0
Show file tree
Hide file tree
Showing 10 changed files with 918 additions and 175 deletions.
351 changes: 185 additions & 166 deletions llvm/lib/Target/ARM/ARMParallelDSP.cpp

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions llvm/test/CodeGen/ARM/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Loop Pass Manager
; CHECK-NEXT: Transform loops to use DSP intrinsics
; CHECK-NEXT: Transform functions to use DSP intrinsics
; CHECK-NEXT: Interleaved Access Pass
; CHECK-NEXT: ARM IR optimizations
; CHECK-NEXT: Dominator Tree Construction
Expand Down
79 changes: 79 additions & 0 deletions llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s

; CHECK-LABEL: single_block
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 %acc)
define i32 @single_block(i16* %a, i16* %b, i32 %acc) {
entry:
%ld.a.0 = load i16, i16* %a
%sext.a.0 = sext i16 %ld.a.0 to i32
%ld.b.0 = load i16, i16* %b
%sext.b.0 = sext i16 %ld.b.0 to i32
%mul.0 = mul i32 %sext.a.0, %sext.b.0
%addr.a.1 = getelementptr i16, i16* %a, i32 1
%addr.b.1 = getelementptr i16, i16* %b, i32 1
%ld.a.1 = load i16, i16* %addr.a.1
%sext.a.1 = sext i16 %ld.a.1 to i32
%ld.b.1 = load i16, i16* %addr.b.1
%sext.b.1 = sext i16 %ld.b.1 to i32
%mul.1 = mul i32 %sext.a.1, %sext.b.1
%add = add i32 %mul.0, %mul.1
%res = add i32 %add, %acc
ret i32 %res
}

; CHECK-LABEL: multi_block
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]]
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]]
; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 0)
define i32 @multi_block(i16* %a, i16* %b, i32 %acc) {
entry:
%ld.a.0 = load i16, i16* %a
%sext.a.0 = sext i16 %ld.a.0 to i32
%ld.b.0 = load i16, i16* %b
%sext.b.0 = sext i16 %ld.b.0 to i32
%mul.0 = mul i32 %sext.a.0, %sext.b.0
%addr.a.1 = getelementptr i16, i16* %a, i32 1
%addr.b.1 = getelementptr i16, i16* %b, i32 1
%ld.a.1 = load i16, i16* %addr.a.1
%sext.a.1 = sext i16 %ld.a.1 to i32
%ld.b.1 = load i16, i16* %addr.b.1
%sext.b.1 = sext i16 %ld.b.1 to i32
%mul.1 = mul i32 %sext.a.1, %sext.b.1
%add = add i32 %mul.0, %mul.1
br label %bb.1

bb.1:
%res = add i32 %add, %acc
ret i32 %res
}

; CHECK-LABEL: multi_block_1
; CHECK-NOT: call i32 @llvm.arm.smlad
define i32 @multi_block_1(i16* %a, i16* %b, i32 %acc) {
entry:
%ld.a.0 = load i16, i16* %a
%sext.a.0 = sext i16 %ld.a.0 to i32
%ld.b.0 = load i16, i16* %b
%sext.b.0 = sext i16 %ld.b.0 to i32
%mul.0 = mul i32 %sext.a.0, %sext.b.0
br label %bb.1

bb.1:
%addr.a.1 = getelementptr i16, i16* %a, i32 1
%addr.b.1 = getelementptr i16, i16* %b, i32 1
%ld.a.1 = load i16, i16* %addr.a.1
%sext.a.1 = sext i16 %ld.a.1 to i32
%ld.b.1 = load i16, i16* %addr.b.1
%sext.b.1 = sext i16 %ld.b.1 to i32
%mul.1 = mul i32 %sext.a.1, %sext.b.1
%add = add i32 %mul.0, %mul.1
%res = add i32 %add, %acc
ret i32 %res
}

Loading

0 comments on commit a761ba0

Please sign in to comment.