Currently reductions can handles only same/alternate instructions, skipping potential support for copyables. Patch adds support for copyables in the reduced values. Recommit after revert in 1febc3f088ef444af378c0a90aaba2195c30472b
43 lines
2.4 KiB
LLVM
43 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %}
|
|
; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %}
|
|
|
|
define i32 @test() {
|
|
; X86-LABEL: @test(
|
|
; X86-NEXT: bb:
|
|
; X86-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
|
; X86-NEXT: [[TMP1:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
|
|
; X86-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; X86-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; X86-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
|
; X86-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
|
|
; X86-NEXT: ret i32 [[TMP5]]
|
|
;
|
|
; AARCH64-LABEL: @test(
|
|
; AARCH64-NEXT: bb:
|
|
; AARCH64-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
|
; AARCH64-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
; AARCH64-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
|
|
; AARCH64-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
|
|
; AARCH64-NEXT: ret i32 [[TMP3]]
|
|
;
|
|
bb:
|
|
%0 = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
|
%1 = extractelement <4 x i32> %0, i32 3
|
|
%2 = extractelement <4 x i32> %0, i32 2
|
|
%3 = extractelement <4 x i32> %0, i32 1
|
|
%4 = extractelement <4 x i32> %0, i32 0
|
|
%inst514 = or i32 %4, 0
|
|
%inst494 = or i32 %3, 0
|
|
%inst474 = or i32 %2, 0
|
|
%inst454 = or i32 %1, 0
|
|
%inst458 = add i32 %1, %inst454
|
|
%inst477 = add i32 %inst458, %2
|
|
%inst478 = add i32 %inst477, %inst474
|
|
%inst497 = add i32 %inst478, %3
|
|
%inst498 = add i32 %inst497, %inst494
|
|
%inst517 = add i32 %inst498, %4
|
|
%inst518 = add i32 %inst517, %inst514
|
|
ret i32 %inst518
|
|
}
|