
This PR allows the loop vectorizer to handle in-loop sub reductions by forming a normal in-loop add reduction with a negated input. Stacked PRs: 1. -> https://github.com/llvm/llvm-project/pull/147026 2. https://github.com/llvm/llvm-project/pull/147255 3. https://github.com/llvm/llvm-project/pull/147302 4. https://github.com/llvm/llvm-project/pull/147513
35 lines
1.6 KiB
LLVM
35 lines
1.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s %}
|
|
; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux < %s | FileCheck %s %}
|
|
|
|
; SLP doesn't currently support sub reductions
|
|
define i64 @reduction_sub(ptr %ptr) {
|
|
; CHECK-LABEL: define i64 @reduction_sub(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[LD0:%.*]] = load i64, ptr [[PTR]], align 8
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1
|
|
; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[GEP]], align 8
|
|
; CHECK-NEXT: [[SUB_1:%.*]] = sub nuw nsw i64 [[LD0]], [[LD1]]
|
|
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 2
|
|
; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[GEP_1]], align 8
|
|
; CHECK-NEXT: [[SUB_2:%.*]] = sub nuw nsw i64 [[SUB_1]], [[LD2]]
|
|
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 3
|
|
; CHECK-NEXT: [[LD3:%.*]] = load i64, ptr [[GEP_2]], align 8
|
|
; CHECK-NEXT: [[SUB_3:%.*]] = sub nuw nsw i64 [[SUB_2]], [[LD3]]
|
|
; CHECK-NEXT: ret i64 [[SUB_3]]
|
|
;
|
|
entry:
|
|
%ld0 = load i64, ptr %ptr
|
|
%gep = getelementptr inbounds i64, ptr %ptr, i64 1
|
|
%ld1 = load i64, ptr %gep
|
|
%sub.1 = sub nuw nsw i64 %ld0, %ld1
|
|
%gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
|
|
%ld2 = load i64, ptr %gep.1
|
|
%sub.2 = sub nuw nsw i64 %sub.1, %ld2
|
|
%gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
|
|
%ld3 = load i64, ptr %gep.2
|
|
%sub.3 = sub nuw nsw i64 %sub.2, %ld3
|
|
ret i64 %sub.3
|
|
}
|