[NFC][PowerPC] Cleaning up test file and removing redundant front-end test (#151971)
NFC patch to clean up extra lines of code in the file `llvm/test/CodeGen/PowerPC/check-zero-vector.ll` as the current one has loop unrolled. Also removing the file `clang/test/CodeGen/PowerPC/check-zero-vector.c` as the patch affects only the backend. Co-authored-by: himadhith <himadhith.v@ibm.com>
This commit is contained in:
parent
14cd133931
commit
1f1b903a64
@ -1,143 +0,0 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
||||
// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64
|
||||
// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64LE
|
||||
// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_32
|
||||
|
||||
// POWERPC_64-LABEL: define signext i32 @test_Greater_than(
|
||||
// POWERPC_64-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// POWERPC_64-NEXT: [[ENTRY:.*:]]
|
||||
// POWERPC_64-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
|
||||
// POWERPC_64-NEXT: [[RESULT:%.*]] = alloca i16, align 2
|
||||
// POWERPC_64-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// POWERPC_64-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
|
||||
// POWERPC_64-NEXT: store i16 0, ptr [[RESULT]], align 2
|
||||
// POWERPC_64-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// POWERPC_64-NEXT: br label %[[FOR_COND:.*]]
|
||||
// POWERPC_64: [[FOR_COND]]:
|
||||
// POWERPC_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
|
||||
// POWERPC_64-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
|
||||
// POWERPC_64: [[FOR_BODY]]:
|
||||
// POWERPC_64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
|
||||
// POWERPC_64-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
|
||||
// POWERPC_64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
|
||||
// POWERPC_64-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
|
||||
// POWERPC_64-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
|
||||
// POWERPC_64-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
|
||||
// POWERPC_64-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
|
||||
// POWERPC_64: [[IF_THEN]]:
|
||||
// POWERPC_64-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
|
||||
// POWERPC_64-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
|
||||
// POWERPC_64-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
|
||||
// POWERPC_64-NEXT: br label %[[IF_END]]
|
||||
// POWERPC_64: [[IF_END]]:
|
||||
// POWERPC_64-NEXT: br label %[[FOR_INC:.*]]
|
||||
// POWERPC_64: [[FOR_INC]]:
|
||||
// POWERPC_64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_64-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
|
||||
// POWERPC_64-NEXT: store i32 [[INC3]], ptr [[I]], align 4
|
||||
// POWERPC_64-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
|
||||
// POWERPC_64: [[FOR_END]]:
|
||||
// POWERPC_64-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
|
||||
// POWERPC_64-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
|
||||
// POWERPC_64-NEXT: ret i32 [[CONV4]]
|
||||
//
|
||||
// POWERPC_64LE-LABEL: define dso_local signext i32 @test_Greater_than(
|
||||
// POWERPC_64LE-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// POWERPC_64LE-NEXT: [[ENTRY:.*:]]
|
||||
// POWERPC_64LE-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
|
||||
// POWERPC_64LE-NEXT: [[RESULT:%.*]] = alloca i16, align 2
|
||||
// POWERPC_64LE-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// POWERPC_64LE-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
|
||||
// POWERPC_64LE-NEXT: store i16 0, ptr [[RESULT]], align 2
|
||||
// POWERPC_64LE-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// POWERPC_64LE-NEXT: br label %[[FOR_COND:.*]]
|
||||
// POWERPC_64LE: [[FOR_COND]]:
|
||||
// POWERPC_64LE-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_64LE-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
|
||||
// POWERPC_64LE-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
|
||||
// POWERPC_64LE: [[FOR_BODY]]:
|
||||
// POWERPC_64LE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
|
||||
// POWERPC_64LE-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_64LE-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
|
||||
// POWERPC_64LE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
|
||||
// POWERPC_64LE-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
|
||||
// POWERPC_64LE-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
|
||||
// POWERPC_64LE-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
|
||||
// POWERPC_64LE-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
|
||||
// POWERPC_64LE: [[IF_THEN]]:
|
||||
// POWERPC_64LE-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
|
||||
// POWERPC_64LE-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
|
||||
// POWERPC_64LE-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
|
||||
// POWERPC_64LE-NEXT: br label %[[IF_END]]
|
||||
// POWERPC_64LE: [[IF_END]]:
|
||||
// POWERPC_64LE-NEXT: br label %[[FOR_INC:.*]]
|
||||
// POWERPC_64LE: [[FOR_INC]]:
|
||||
// POWERPC_64LE-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_64LE-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
|
||||
// POWERPC_64LE-NEXT: store i32 [[INC3]], ptr [[I]], align 4
|
||||
// POWERPC_64LE-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
|
||||
// POWERPC_64LE: [[FOR_END]]:
|
||||
// POWERPC_64LE-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
|
||||
// POWERPC_64LE-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
|
||||
// POWERPC_64LE-NEXT: ret i32 [[CONV4]]
|
||||
//
|
||||
// POWERPC_32-LABEL: define i32 @test_Greater_than(
|
||||
// POWERPC_32-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// POWERPC_32-NEXT: [[ENTRY:.*:]]
|
||||
// POWERPC_32-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 4
|
||||
// POWERPC_32-NEXT: [[RESULT:%.*]] = alloca i16, align 2
|
||||
// POWERPC_32-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// POWERPC_32-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 4
|
||||
// POWERPC_32-NEXT: store i16 0, ptr [[RESULT]], align 2
|
||||
// POWERPC_32-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// POWERPC_32-NEXT: br label %[[FOR_COND:.*]]
|
||||
// POWERPC_32: [[FOR_COND]]:
|
||||
// POWERPC_32-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
|
||||
// POWERPC_32-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
|
||||
// POWERPC_32: [[FOR_BODY]]:
|
||||
// POWERPC_32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 4
|
||||
// POWERPC_32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 [[TMP2]]
|
||||
// POWERPC_32-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
|
||||
// POWERPC_32-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
|
||||
// POWERPC_32-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
|
||||
// POWERPC_32-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
|
||||
// POWERPC_32: [[IF_THEN]]:
|
||||
// POWERPC_32-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
|
||||
// POWERPC_32-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
|
||||
// POWERPC_32-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
|
||||
// POWERPC_32-NEXT: br label %[[IF_END]]
|
||||
// POWERPC_32: [[IF_END]]:
|
||||
// POWERPC_32-NEXT: br label %[[FOR_INC:.*]]
|
||||
// POWERPC_32: [[FOR_INC]]:
|
||||
// POWERPC_32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
|
||||
// POWERPC_32-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
|
||||
// POWERPC_32-NEXT: store i32 [[INC3]], ptr [[I]], align 4
|
||||
// POWERPC_32-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
|
||||
// POWERPC_32: [[FOR_END]]:
|
||||
// POWERPC_32-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
|
||||
// POWERPC_32-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
|
||||
// POWERPC_32-NEXT: ret i32 [[CONV4]]
|
||||
//
|
||||
int test_Greater_than(unsigned short *colauths) {
|
||||
unsigned short result = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (colauths[i] > 0) {
|
||||
result++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
//.
|
||||
// POWERPC_64: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
|
||||
// POWERPC_64: [[META3]] = !{!"llvm.loop.mustprogress"}
|
||||
//.
|
||||
// POWERPC_64LE: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
|
||||
// POWERPC_64LE: [[META3]] = !{!"llvm.loop.mustprogress"}
|
||||
//.
|
||||
// POWERPC_32: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
|
||||
// POWERPC_32: [[META3]] = !{!"llvm.loop.mustprogress"}
|
||||
//.
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE
|
||||
|
||||
@ -7,240 +8,90 @@
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
|
||||
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32
|
||||
|
||||
define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) {
|
||||
; This testcase is manually reduced to isolate the critical code blocks.
|
||||
; It is designed to check for vector comparison specifically for zero vectors.
|
||||
; In the vector.body section, we are expecting a comparison instruction (vcmpequh),
|
||||
; merge instructions (vmrghh and vmrglh) which use exactly 2 vectors.
|
||||
; The output of the merge instruction is being used by xxland and finally
|
||||
; accumulated by vadduwm instruction.
|
||||
|
||||
define i32 @test_Greater_than(ptr %colauths) {
|
||||
; This testcase is for the special case of zero-vector comparisons.
|
||||
; Currently the generated code does a comparison (vcmpequh) and then a negation (xxlnor).
|
||||
; This pattern is expected to be optimized in a future patch.
|
||||
; POWERPC_64LE-LABEL: test_Greater_than:
|
||||
; POWERPC_64LE: .LBB0_6: # %vector.body
|
||||
; POWERPC_64LE-NEXT: #
|
||||
; POWERPC_64LE-NEXT: lxv [[R1:[0-9]+]], -64(4)
|
||||
; POWERPC_64LE-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
|
||||
; POWERPC_64LE-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
|
||||
; POWERPC_64LE-NEXT: vmrghh [[R4:[0-9]+]], [[R2]], [[R2]]
|
||||
; POWERPC_64LE-NEXT: vmrglh [[R2]], [[R2]], [[R2]]
|
||||
; POWERPC_64LE-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
|
||||
; POWERPC_64LE-NEXT: xxland [[R1]], [[R1]], [[R6]]
|
||||
; POWERPC_64LE-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
|
||||
; POWERPC_64LE: .LBB0_10: # %vec.epilog.vector.body
|
||||
; POWERPC_64LE-NEXT: #
|
||||
; POWERPC_64LE-NEXT: lxv [[R8:[0-9]+]], 0(4)
|
||||
; POWERPC_64LE-NEXT: addi 4, 4, 16
|
||||
; POWERPC_64LE-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]]
|
||||
; POWERPC_64LE-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
|
||||
; POWERPC_64LE-NEXT: vmrglh [[R11:[0-9]+]], [[R9]], [[R9]]
|
||||
; POWERPC_64LE-NEXT: vmrghh [[R9]], [[R9]], [[R9]]
|
||||
; POWERPC_64LE-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
|
||||
; POWERPC_64LE-NEXT: xxland [[R8]], [[R8]], [[R6]]
|
||||
; POWERPC_64LE-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
|
||||
; POWERPC_64LE-NEXT: vadduwm [[R3]], [[R3]], [[R11]]
|
||||
; POWERPC_64LE-NEXT: bdnz .LBB0_10
|
||||
; POWERPC_64LE: blr
|
||||
; POWERPC_64LE: # %bb.0: # %entry
|
||||
; POWERPC_64LE-NEXT: lfd 0, 0(3)
|
||||
; POWERPC_64LE-NEXT: xxlxor 35, 35, 35
|
||||
; POWERPC_64LE-NEXT: li 4, 0
|
||||
; POWERPC_64LE-NEXT: li 3, 4
|
||||
; POWERPC_64LE-NEXT: xxswapd 34, 0
|
||||
; POWERPC_64LE-NEXT: vcmpequh 2, 2, 3
|
||||
; POWERPC_64LE-NEXT: xxlnor 34, 34, 34
|
||||
; POWERPC_64LE-NEXT: vmrglh 3, 2, 2
|
||||
; POWERPC_64LE-NEXT: vextuwrx 4, 4, 2
|
||||
; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3
|
||||
; POWERPC_64LE-NEXT: clrlwi 4, 4, 31
|
||||
; POWERPC_64LE-NEXT: rlwimi 4, 3, 1, 30, 30
|
||||
; POWERPC_64LE-NEXT: mfvsrwz 3, 35
|
||||
; POWERPC_64LE-NEXT: rlwimi 4, 3, 2, 29, 29
|
||||
; POWERPC_64LE-NEXT: li 3, 12
|
||||
; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3
|
||||
; POWERPC_64LE-NEXT: rlwimi 4, 3, 3, 28, 28
|
||||
; POWERPC_64LE-NEXT: stb 4, -1(1)
|
||||
; POWERPC_64LE-NEXT: lbz 3, -1(1)
|
||||
; POWERPC_64LE-NEXT: popcntd 3, 3
|
||||
; POWERPC_64LE-NEXT: blr
|
||||
;
|
||||
; POWERPC_64-LABEL: test_Greater_than:
|
||||
; POWERPC_64: L..BB0_6: # %vector.body
|
||||
; POWERPC_64-NEXT: #
|
||||
; POWERPC_64-NEXT: lxv [[R1:[0-9]+]], -64(4)
|
||||
; POWERPC_64-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
|
||||
; POWERPC_64-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
|
||||
; POWERPC_64-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]]
|
||||
; POWERPC_64-NEXT: vmrghh [[R2]], [[R2]], [[R2]]
|
||||
; POWERPC_64-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
|
||||
; POWERPC_64-NEXT: xxland [[R1]], [[R1]], [[R6]]
|
||||
; POWERPC_64-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
|
||||
; POWERPC_64: L..BB0_10: # %vec.epilog.vector.body
|
||||
; POWERPC_64-NEXT: #
|
||||
; POWERPC_64-NEXT: lxv [[R8:[0-9]+]], 0(4)
|
||||
; POWERPC_64-NEXT: addi 4, 4, 16
|
||||
; POWERPC_64-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]]
|
||||
; POWERPC_64-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
|
||||
; POWERPC_64-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]]
|
||||
; POWERPC_64-NEXT: vmrglh [[R9]], [[R9]], [[R9]]
|
||||
; POWERPC_64-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
|
||||
; POWERPC_64-NEXT: xxland [[R8]], [[R8]], [[R6]]
|
||||
; POWERPC_64-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
|
||||
; POWERPC_64-NEXT: vadduwm [[R3]], [[R3]], [[R11]]
|
||||
; POWERPC_64-NEXT: bdnz L..BB0_10
|
||||
; POWERPC_64: blr
|
||||
; POWERPC_64: # %bb.0: # %entry
|
||||
; POWERPC_64-NEXT: lxsd 2, 0(3)
|
||||
; POWERPC_64-NEXT: xxlxor 35, 35, 35
|
||||
; POWERPC_64-NEXT: li 4, 12
|
||||
; POWERPC_64-NEXT: li 3, 8
|
||||
; POWERPC_64-NEXT: vcmpequh 2, 2, 3
|
||||
; POWERPC_64-NEXT: xxlnor 34, 34, 34
|
||||
; POWERPC_64-NEXT: vmrghh 2, 2, 2
|
||||
; POWERPC_64-NEXT: vextuwlx 4, 4, 2
|
||||
; POWERPC_64-NEXT: vextuwlx 3, 3, 2
|
||||
; POWERPC_64-NEXT: clrlwi 4, 4, 31
|
||||
; POWERPC_64-NEXT: rlwimi 4, 3, 1, 30, 30
|
||||
; POWERPC_64-NEXT: mfvsrwz 3, 34
|
||||
; POWERPC_64-NEXT: rlwimi 4, 3, 2, 29, 29
|
||||
; POWERPC_64-NEXT: li 3, 0
|
||||
; POWERPC_64-NEXT: vextuwlx 3, 3, 2
|
||||
; POWERPC_64-NEXT: rlwimi 4, 3, 3, 28, 28
|
||||
; POWERPC_64-NEXT: stb 4, -1(1)
|
||||
; POWERPC_64-NEXT: lbz 3, -1(1)
|
||||
; POWERPC_64-NEXT: popcntd 3, 3
|
||||
; POWERPC_64-NEXT: blr
|
||||
;
|
||||
; POWERPC_32-LABEL: test_Greater_than:
|
||||
; POWERPC_32: L..BB0_7: # %vector.body
|
||||
; POWERPC_32-NEXT: #
|
||||
; POWERPC_32-NEXT: lxv [[R1:[0-9]+]], 0(10)
|
||||
; POWERPC_32-NEXT: addic [[R13:[0-9]+]], [[R13]], 64
|
||||
; POWERPC_32-NEXT: addze [[R14:[0-9]+]], [[R14]]
|
||||
; POWERPC_32-NEXT: xor [[R15:[0-9]+]], [[R13]], [[R16:[0-9]+]]
|
||||
; POWERPC_32-NEXT: or. [[R15]], [[R15]], [[R14]]
|
||||
; POWERPC_32-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
|
||||
; POWERPC_32-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
|
||||
; POWERPC_32-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]]
|
||||
; POWERPC_32-NEXT: vmrghh [[R2]], [[R2]], [[R2]]
|
||||
; POWERPC_32-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
|
||||
; POWERPC_32-NEXT: xxland [[R1]], [[R1]], [[R6]]
|
||||
; POWERPC_32-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
|
||||
; POWERPC_32: L..BB0_11: # %vec.epilog.vector.body
|
||||
; POWERPC_32-NEXT: #
|
||||
; POWERPC_32-NEXT: slwi [[R14]], [[R13]], 1
|
||||
; POWERPC_32-NEXT: addic [[R13]], [[R13]], 8
|
||||
; POWERPC_32-NEXT: addze [[R17:[0-9]+]], [[R17]]
|
||||
; POWERPC_32-NEXT: lxvx [[R8:[0-9]+]], [[R18:[0-9]+]], [[R14]]
|
||||
; POWERPC_32-NEXT: xor [[R14]], [[R13]], [[R16]]
|
||||
; POWERPC_32-NEXT: or. [[R14]], [[R14]], [[R17]]
|
||||
; POWERPC_32-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R3]]
|
||||
; POWERPC_32-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
|
||||
; POWERPC_32-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]]
|
||||
; POWERPC_32-NEXT: vmrglh [[R9]], [[R9]], [[R9]]
|
||||
; POWERPC_32-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
|
||||
; POWERPC_32-NEXT: xxland [[R8]], [[R8]], [[R6]]
|
||||
; POWERPC_32-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
|
||||
; POWERPC_32-NEXT: vadduwm [[R19:[0-9]+]], [[R19]], [[R11]]
|
||||
; POWERPC_32-NEXT: bne 0, L..BB0_11
|
||||
; POWERPC_32: blr
|
||||
entry:
|
||||
%cmp5 = icmp sgt i32 %ncols, 0
|
||||
br i1 %cmp5, label %iter.check, label %for.cond.cleanup
|
||||
|
||||
iter.check: ; preds = %entry
|
||||
%wide.trip.count = zext nneg i32 %ncols to i64
|
||||
%min.iters.check = icmp ult i32 %ncols, 8
|
||||
br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check
|
||||
|
||||
for.body.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check
|
||||
%indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec31, %vec.epilog.middle.block ]
|
||||
%num_cols_needed.06.ph = phi i32 [ 0, %iter.check ], [ %33, %vec.epilog.iter.check ], [ %40, %vec.epilog.middle.block ]
|
||||
br label %for.body
|
||||
|
||||
vector.main.loop.iter.check: ; preds = %iter.check
|
||||
%min.iters.check9 = icmp ult i32 %ncols, 64
|
||||
br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph
|
||||
|
||||
vector.ph: ; preds = %vector.main.loop.iter.check
|
||||
%n.vec = and i64 %wide.trip.count, 2147483584
|
||||
br label %vector.body
|
||||
|
||||
vector.body: ; preds = %vector.body, %vector.ph
|
||||
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||||
%vec.phi = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %24, %vector.body ]
|
||||
%vec.phi10 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %25, %vector.body ]
|
||||
%vec.phi11 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %26, %vector.body ]
|
||||
%vec.phi12 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %27, %vector.body ]
|
||||
%vec.phi13 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %28, %vector.body ]
|
||||
%vec.phi14 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %29, %vector.body ]
|
||||
%vec.phi15 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %30, %vector.body ]
|
||||
%vec.phi16 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ]
|
||||
%0 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index
|
||||
%1 = getelementptr inbounds nuw i8, ptr %0, i64 16
|
||||
%2 = getelementptr inbounds nuw i8, ptr %0, i64 32
|
||||
%3 = getelementptr inbounds nuw i8, ptr %0, i64 48
|
||||
%4 = getelementptr inbounds nuw i8, ptr %0, i64 64
|
||||
%5 = getelementptr inbounds nuw i8, ptr %0, i64 80
|
||||
%6 = getelementptr inbounds nuw i8, ptr %0, i64 96
|
||||
%7 = getelementptr inbounds nuw i8, ptr %0, i64 112
|
||||
%wide.load = load <8 x i16>, ptr %0, align 2, !tbaa !5
|
||||
%wide.load17 = load <8 x i16>, ptr %1, align 2, !tbaa !5
|
||||
%wide.load18 = load <8 x i16>, ptr %2, align 2, !tbaa !5
|
||||
%wide.load19 = load <8 x i16>, ptr %3, align 2, !tbaa !5
|
||||
%wide.load20 = load <8 x i16>, ptr %4, align 2, !tbaa !5
|
||||
%wide.load21 = load <8 x i16>, ptr %5, align 2, !tbaa !5
|
||||
%wide.load22 = load <8 x i16>, ptr %6, align 2, !tbaa !5
|
||||
%wide.load23 = load <8 x i16>, ptr %7, align 2, !tbaa !5
|
||||
%8 = icmp ne <8 x i16> %wide.load, zeroinitializer
|
||||
%9 = icmp ne <8 x i16> %wide.load17, zeroinitializer
|
||||
%10 = icmp ne <8 x i16> %wide.load18, zeroinitializer
|
||||
%11 = icmp ne <8 x i16> %wide.load19, zeroinitializer
|
||||
%12 = icmp ne <8 x i16> %wide.load20, zeroinitializer
|
||||
%13 = icmp ne <8 x i16> %wide.load21, zeroinitializer
|
||||
%14 = icmp ne <8 x i16> %wide.load22, zeroinitializer
|
||||
%15 = icmp ne <8 x i16> %wide.load23, zeroinitializer
|
||||
%16 = zext <8 x i1> %8 to <8 x i32>
|
||||
%17 = zext <8 x i1> %9 to <8 x i32>
|
||||
%18 = zext <8 x i1> %10 to <8 x i32>
|
||||
%19 = zext <8 x i1> %11 to <8 x i32>
|
||||
%20 = zext <8 x i1> %12 to <8 x i32>
|
||||
%21 = zext <8 x i1> %13 to <8 x i32>
|
||||
%22 = zext <8 x i1> %14 to <8 x i32>
|
||||
%23 = zext <8 x i1> %15 to <8 x i32>
|
||||
%24 = add <8 x i32> %vec.phi, %16
|
||||
%25 = add <8 x i32> %vec.phi10, %17
|
||||
%26 = add <8 x i32> %vec.phi11, %18
|
||||
%27 = add <8 x i32> %vec.phi12, %19
|
||||
%28 = add <8 x i32> %vec.phi13, %20
|
||||
%29 = add <8 x i32> %vec.phi14, %21
|
||||
%30 = add <8 x i32> %vec.phi15, %22
|
||||
%31 = add <8 x i32> %vec.phi16, %23
|
||||
%index.next = add nuw i64 %index, 64
|
||||
%32 = icmp eq i64 %index.next, %n.vec
|
||||
br i1 %32, label %middle.block, label %vector.body, !llvm.loop !9
|
||||
|
||||
middle.block: ; preds = %vector.body
|
||||
%bin.rdx = add <8 x i32> %25, %24
|
||||
%bin.rdx24 = add <8 x i32> %26, %bin.rdx
|
||||
%bin.rdx25 = add <8 x i32> %27, %bin.rdx24
|
||||
%bin.rdx26 = add <8 x i32> %28, %bin.rdx25
|
||||
%bin.rdx27 = add <8 x i32> %29, %bin.rdx26
|
||||
%bin.rdx28 = add <8 x i32> %30, %bin.rdx27
|
||||
%bin.rdx29 = add <8 x i32> %31, %bin.rdx28
|
||||
%33 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %bin.rdx29)
|
||||
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
|
||||
br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check
|
||||
|
||||
vec.epilog.iter.check: ; preds = %middle.block
|
||||
%n.vec.remaining = and i64 %wide.trip.count, 56
|
||||
%min.epilog.iters.check = icmp eq i64 %n.vec.remaining, 0
|
||||
br i1 %min.epilog.iters.check, label %for.body.preheader, label %vec.epilog.ph
|
||||
|
||||
vec.epilog.ph: ; preds = %vec.epilog.iter.check, %vector.main.loop.iter.check
|
||||
%vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ]
|
||||
%bc.merge.rdx = phi i32 [ %33, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ]
|
||||
%n.vec31 = and i64 %wide.trip.count, 2147483640
|
||||
%34 = insertelement <8 x i32> <i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %bc.merge.rdx, i64 0
|
||||
br label %vec.epilog.vector.body
|
||||
|
||||
vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph
|
||||
%index32 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next35, %vec.epilog.vector.body ]
|
||||
%vec.phi33 = phi <8 x i32> [ %34, %vec.epilog.ph ], [ %38, %vec.epilog.vector.body ]
|
||||
%35 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index32
|
||||
%wide.load34 = load <8 x i16>, ptr %35, align 2, !tbaa !5
|
||||
%36 = icmp ne <8 x i16> %wide.load34, zeroinitializer
|
||||
%37 = zext <8 x i1> %36 to <8 x i32>
|
||||
%38 = add <8 x i32> %vec.phi33, %37
|
||||
%index.next35 = add nuw i64 %index32, 8
|
||||
%39 = icmp eq i64 %index.next35, %n.vec31
|
||||
br i1 %39, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !13
|
||||
|
||||
vec.epilog.middle.block: ; preds = %vec.epilog.vector.body
|
||||
%40 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %38)
|
||||
%cmp.n36 = icmp eq i64 %n.vec31, %wide.trip.count
|
||||
br i1 %cmp.n36, label %for.cond.cleanup, label %for.body.preheader
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body, %middle.block, %vec.epilog.middle.block, %entry
|
||||
%num_cols_needed.0.lcssa = phi i32 [ 0, %entry ], [ %33, %middle.block ], [ %40, %vec.epilog.middle.block ], [ %spec.select, %for.body ]
|
||||
ret i32 %num_cols_needed.0.lcssa
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
|
||||
%num_cols_needed.06 = phi i32 [ %spec.select, %for.body ], [ %num_cols_needed.06.ph, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds nuw i16, ptr %colauths, i64 %indvars.iv
|
||||
%41 = load i16, ptr %arrayidx, align 2, !tbaa !5
|
||||
%tobool.not = icmp ne i16 %41, 0
|
||||
%inc = zext i1 %tobool.not to i32
|
||||
%spec.select = add nuw nsw i32 %num_cols_needed.06, %inc
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !14
|
||||
; POWERPC_32: # %bb.0: # %entry
|
||||
; POWERPC_32-NEXT: li 4, 4
|
||||
; POWERPC_32-NEXT: lxvwsx 1, 0, 3
|
||||
; POWERPC_32-NEXT: xxlxor 35, 35, 35
|
||||
; POWERPC_32-NEXT: lxvwsx 0, 3, 4
|
||||
; POWERPC_32-NEXT: xxmrghw 34, 1, 0
|
||||
; POWERPC_32-NEXT: vcmpequh 2, 2, 3
|
||||
; POWERPC_32-NEXT: xxlnor 34, 34, 34
|
||||
; POWERPC_32-NEXT: vmrghh 2, 2, 2
|
||||
; POWERPC_32-NEXT: stxv 34, -32(1)
|
||||
; POWERPC_32-NEXT: lwz 3, -20(1)
|
||||
; POWERPC_32-NEXT: lwz 4, -24(1)
|
||||
; POWERPC_32-NEXT: clrlwi 3, 3, 31
|
||||
; POWERPC_32-NEXT: rlwimi 3, 4, 1, 30, 30
|
||||
; POWERPC_32-NEXT: lwz 4, -28(1)
|
||||
; POWERPC_32-NEXT: rlwimi 3, 4, 2, 29, 29
|
||||
; POWERPC_32-NEXT: lwz 4, -32(1)
|
||||
; POWERPC_32-NEXT: rlwimi 3, 4, 3, 28, 28
|
||||
; POWERPC_32-NEXT: popcntw 3, 3
|
||||
; POWERPC_32-NEXT: blr
|
||||
entry:
|
||||
%0 = load <4 x i16>, ptr %colauths, align 2, !tbaa !5
|
||||
%1 = icmp ne <4 x i16> %0, zeroinitializer
|
||||
%2 = bitcast <4 x i1> %1 to i4
|
||||
%3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2)
|
||||
%4 = zext nneg i4 %3 to i32
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
declare i4 @llvm.ctpop.i4(i4) #1
|
||||
|
||||
!5 = !{!6, !6, i64 0}
|
||||
!6 = !{!"short", !7, i64 0}
|
||||
!7 = !{!"omnipotent char", !8, i64 0}
|
||||
!8 = !{!"Simple C/C++ TBAA"}
|
||||
!9 = distinct !{!9, !10, !11, !12}
|
||||
!10 = !{!"llvm.loop.mustprogress"}
|
||||
!11 = !{!"llvm.loop.isvectorized", i32 1}
|
||||
!12 = !{!"llvm.loop.unroll.runtime.disable"}
|
||||
!13 = distinct !{!13, !10, !11, !12}
|
||||
!14 = distinct !{!14, !10, !12, !11}
|
||||
|
Loading…
x
Reference in New Issue
Block a user