-
Notifications
You must be signed in to change notification settings - Fork 733
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ARM][ParallelDSP] Change search for muls
rL369567 reverted a couple of recent changes made to ARMParallelDSP because of a miscompilation error: PR43073. The issue stemmed from an underlying bug that was caused by adding muls into a reduction before it was proved that they could be executed in parallel with another mul. Most of the changes here are from the previously reverted commits. The additional changes have been made area: 1) The Search function now doesn't insert any muls into the Reduction object. That now happens once the search has successfully finished. 2) For any muls added into the reduction but that weren't paired, we accumulate their values as an input into the smlad. Differential Revision: https://reviews.llvm.org/D66660 llvm-svn: 370171
- Loading branch information
1 parent
207c653
commit a761ba0
Showing
10 changed files
with
918 additions
and
175 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s | ||
|
||
; CHECK-LABEL: single_block | ||
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* | ||
; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] | ||
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* | ||
; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] | ||
; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 %acc) | ||
define i32 @single_block(i16* %a, i16* %b, i32 %acc) { | ||
entry: | ||
%ld.a.0 = load i16, i16* %a | ||
%sext.a.0 = sext i16 %ld.a.0 to i32 | ||
%ld.b.0 = load i16, i16* %b | ||
%sext.b.0 = sext i16 %ld.b.0 to i32 | ||
%mul.0 = mul i32 %sext.a.0, %sext.b.0 | ||
%addr.a.1 = getelementptr i16, i16* %a, i32 1 | ||
%addr.b.1 = getelementptr i16, i16* %b, i32 1 | ||
%ld.a.1 = load i16, i16* %addr.a.1 | ||
%sext.a.1 = sext i16 %ld.a.1 to i32 | ||
%ld.b.1 = load i16, i16* %addr.b.1 | ||
%sext.b.1 = sext i16 %ld.b.1 to i32 | ||
%mul.1 = mul i32 %sext.a.1, %sext.b.1 | ||
%add = add i32 %mul.0, %mul.1 | ||
%res = add i32 %add, %acc | ||
ret i32 %res | ||
} | ||
|
||
; CHECK-LABEL: multi_block | ||
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32* | ||
; CHECK: [[A:%[^ ]+]] = load i32, i32* [[CAST_A]] | ||
; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32* | ||
; CHECK: [[B:%[^ ]+]] = load i32, i32* [[CAST_B]] | ||
; CHECK: call i32 @llvm.arm.smlad(i32 [[A]], i32 [[B]], i32 0) | ||
define i32 @multi_block(i16* %a, i16* %b, i32 %acc) { | ||
entry: | ||
%ld.a.0 = load i16, i16* %a | ||
%sext.a.0 = sext i16 %ld.a.0 to i32 | ||
%ld.b.0 = load i16, i16* %b | ||
%sext.b.0 = sext i16 %ld.b.0 to i32 | ||
%mul.0 = mul i32 %sext.a.0, %sext.b.0 | ||
%addr.a.1 = getelementptr i16, i16* %a, i32 1 | ||
%addr.b.1 = getelementptr i16, i16* %b, i32 1 | ||
%ld.a.1 = load i16, i16* %addr.a.1 | ||
%sext.a.1 = sext i16 %ld.a.1 to i32 | ||
%ld.b.1 = load i16, i16* %addr.b.1 | ||
%sext.b.1 = sext i16 %ld.b.1 to i32 | ||
%mul.1 = mul i32 %sext.a.1, %sext.b.1 | ||
%add = add i32 %mul.0, %mul.1 | ||
br label %bb.1 | ||
|
||
bb.1: | ||
%res = add i32 %add, %acc | ||
ret i32 %res | ||
} | ||
|
||
; CHECK-LABEL: multi_block_1 | ||
; CHECK-NOT: call i32 @llvm.arm.smlad | ||
define i32 @multi_block_1(i16* %a, i16* %b, i32 %acc) { | ||
entry: | ||
%ld.a.0 = load i16, i16* %a | ||
%sext.a.0 = sext i16 %ld.a.0 to i32 | ||
%ld.b.0 = load i16, i16* %b | ||
%sext.b.0 = sext i16 %ld.b.0 to i32 | ||
%mul.0 = mul i32 %sext.a.0, %sext.b.0 | ||
br label %bb.1 | ||
|
||
bb.1: | ||
%addr.a.1 = getelementptr i16, i16* %a, i32 1 | ||
%addr.b.1 = getelementptr i16, i16* %b, i32 1 | ||
%ld.a.1 = load i16, i16* %addr.a.1 | ||
%sext.a.1 = sext i16 %ld.a.1 to i32 | ||
%ld.b.1 = load i16, i16* %addr.b.1 | ||
%sext.b.1 = sext i16 %ld.b.1 to i32 | ||
%mul.1 = mul i32 %sext.a.1, %sext.b.1 | ||
%add = add i32 %mul.0, %mul.1 | ||
%res = add i32 %add, %acc | ||
ret i32 %res | ||
} | ||
|
Oops, something went wrong.