602 lines
29 KiB
LLVM
602 lines
29 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -p loop-unroll -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s
|
|
|
|
define i32 @test_add(ptr %src, i64 %n, i32 %start) {
|
|
; CHECK-LABEL: define i32 @test_add(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]]
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_24]]
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr i32, ptr %src, i64 %iv
|
|
%l = load i32 , ptr %gep.src, align 1
|
|
%rdx.next = add i32 %rdx, %l
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %rdx.next
|
|
}
|
|
|
|
define i32 @test_add_tc_not_multiple_of_4(ptr %src, i64 %n, i32 %start) {
|
|
; CHECK-LABEL: define i32 @test_add_tc_not_multiple_of_4(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP_1:.*]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP_1]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
|
|
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 1001
|
|
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_1]], label %[[EXIT:.*]]
|
|
; CHECK: [[LOOP_1]]:
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]]
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_12:%.*]] = load i32, ptr [[GEP_SRC_12]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_12]]
|
|
; CHECK-NEXT: br label %[[LOOP]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr i32, ptr %src, i64 %iv
|
|
%l = load i32 , ptr %gep.src, align 1
|
|
%rdx.next = add i32 %rdx, %l
|
|
%ec = icmp ne i64 %iv.next, 1001
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %rdx.next
|
|
}
|
|
|
|
define i32 @test_add_rdx_used_in_loop(ptr %src, i64 %n, i32 %start) {
|
|
; CHECK-LABEL: define i32 @test_add_rdx_used_in_loop(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
|
|
; CHECK-NEXT: store i32 [[RDX_NEXT]], ptr [[GEP_SRC]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: store i32 [[RDX_NEXT_1]], ptr [[GEP_SRC_1]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]]
|
|
; CHECK-NEXT: store i32 [[RDX_NEXT_2]], ptr [[GEP_SRC_2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_NEXT_2]], [[L_24]]
|
|
; CHECK-NEXT: store i32 [[RDX_NEXT_24]], ptr [[GEP_SRC_24]], align 4
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr i32, ptr %src, i64 %iv
|
|
%l = load i32 , ptr %gep.src, align 1
|
|
%rdx.next = add i32 %rdx, %l
|
|
store i32 %rdx.next, ptr %gep.src
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %rdx.next
|
|
}
|
|
|
|
define i32 @test_add_phi_used_outside_loop(ptr %src, i64 %n, i32 %start) {
|
|
; CHECK-LABEL: define i32 @test_add_phi_used_outside_loop(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]]
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_24]]
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_2]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i32 [[RDX_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr i32, ptr %src, i64 %iv
|
|
%l = load i32 , ptr %gep.src, align 1
|
|
%rdx.next = add i32 %rdx, %l
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %rdx
|
|
}
|
|
|
|
define i32 @test_add_and_mul_reduction(ptr %src, i64 %n, i32 %start) {
|
|
; CHECK-LABEL: define i32 @test_add_and_mul_reduction(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_1_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX_2:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_2_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[RDX_1_NEXT:%.*]] = add i32 [[RDX_1]], [[L]]
|
|
; CHECK-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[L]]
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[RDX_1_2:%.*]] = add i32 [[RDX_1_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[RDX_2_2:%.*]] = mul i32 [[RDX_2_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[RDX_1_NEXT_2:%.*]] = add i32 [[RDX_1_2]], [[L_2]]
|
|
; CHECK-NEXT: [[RDX_2_NEXT_2:%.*]] = mul i32 [[RDX_2_2]], [[L_2]]
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
|
|
; CHECK-NEXT: [[RDX_1_NEXT_3]] = add i32 [[RDX_1_NEXT_2]], [[L_24]]
|
|
; CHECK-NEXT: [[RDX_2_NEXT_3]] = mul i32 [[RDX_2_NEXT_2]], [[L_24]]
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_1_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_1_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[BIN_RDX5:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RES:%.*]] = add i32 [[RDX_1_NEXT_LCSSA]], [[BIN_RDX5]]
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx.1 = phi i32 [ %start, %entry ], [ %rdx.1.next, %loop ]
|
|
%rdx.2 = phi i32 [ %start, %entry ], [ %rdx.2.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr i32, ptr %src, i64 %iv
|
|
%l = load i32 , ptr %gep.src, align 1
|
|
%rdx.1.next = add i32 %rdx.1, %l
|
|
%rdx.2.next = mul i32 %rdx.2, %l
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
%res = add i32 %rdx.1.next, %rdx.2.next
|
|
ret i32 %res
|
|
}
|
|
|
|
define float @test_fadd_no_fmfs(ptr %src, i64 %n, float %start) {
|
|
; CHECK-LABEL: define float @test_fadd_no_fmfs(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], float [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]]
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]]
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]]
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi float [ %start, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr float, ptr %src, i64 %iv
|
|
%l = load float, ptr %gep.src, align 1
|
|
%rdx.next = fadd float %rdx, %l
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret float %rdx.next
|
|
}
|
|
|
|
define float @test_fadd_with_ressaoc(ptr %src, i64 %n, float %start) {
|
|
; CHECK-LABEL: define float @test_fadd_with_ressaoc(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], float [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]]
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]]
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]]
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi float [ %start, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr float, ptr %src, i64 %iv
|
|
%l = load float, ptr %gep.src, align 1
|
|
%rdx.next = fadd float %rdx, %l
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret float %rdx.next
|
|
}
|
|
define i32 @test_smin(ptr %src, i64 %n) {
|
|
; CHECK-LABEL: define i32 @test_smin(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[MIN:%.*]] = phi i32 [ 1000, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = call i32 @llvm.smin.i32(i32 [[MIN]], i32 [[L]])
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT]], i32 [[L_1]])
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT_1]], i32 [[L_2]])
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT_2]], i32 [[L_24]])
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%min = phi i32 [ 1000, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr i32, ptr %src, i64 %iv
|
|
%l = load i32 , ptr %gep.src, align 1
|
|
%rdx.next = call i32 @llvm.smin(i32 %min, i32 %l)
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %rdx.next
|
|
}
|
|
|
|
define i64 @test_any_of_reduction(ptr %src, i64 %n) {
|
|
; CHECK-LABEL: define i64 @test_any_of_reduction(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[ANY_OF_RDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = select i1 [[C]], i64 [[ANY_OF_RDX]], i64 0
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L_1]], 0
|
|
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = select i1 [[C_1]], i64 [[RDX_NEXT]], i64 0
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_SRC_2]], align 1
|
|
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i8 [[L_2]], 0
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = select i1 [[C_2]], i64 [[RDX_NEXT_1]], i64 0
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_24:%.*]] = load i8, ptr [[GEP_SRC_24]], align 1
|
|
; CHECK-NEXT: [[C_24:%.*]] = icmp eq i8 [[L_24]], 0
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = select i1 [[C_24]], i64 [[RDX_NEXT_2]], i64 0
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i64 [ [[RDX_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i64 [[RDX_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
|
|
%any.of.rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep.src = getelementptr i8, ptr %src, i64 %iv
|
|
%l = load i8, ptr %gep.src, align 1
|
|
%c = icmp eq i8 %l, 0
|
|
%rdx.next = select i1 %c, i64 %any.of.rdx, i64 0
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i64 %rdx.next
|
|
}
|
|
|
|
define void @reduction_with_intermediate_store(ptr %src, ptr %sum) {
|
|
; CHECK-LABEL: define void @reduction_with_intermediate_store(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[SUM:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[SUM_PROMOTED:%.*]] = load i32, ptr [[SUM]], align 4
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[SUM_PROMOTED]], %[[ENTRY]] ], [ [[RED_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
|
|
; CHECK-NEXT: [[RED_NEXT:%.*]] = add nsw i32 [[RED]], [[L]]
|
|
; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[SUM]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
|
|
; CHECK-NEXT: [[RED_NEXT_1:%.*]] = add nsw i32 [[RED_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: store i32 [[RED_NEXT_1]], ptr [[SUM]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
|
|
; CHECK-NEXT: [[RED_NEXT_2:%.*]] = add nsw i32 [[RED_NEXT_1]], [[L_2]]
|
|
; CHECK-NEXT: store i32 [[RED_NEXT_2]], ptr [[SUM]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_SRC_3]], align 4
|
|
; CHECK-NEXT: [[RED_NEXT_3]] = add nsw i32 [[RED_NEXT_2]], [[L_3]]
|
|
; CHECK-NEXT: store i32 [[RED_NEXT_3]], ptr [[SUM]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 10000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%sum.promoted = load i32, ptr %sum, align 4
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%red = phi i32 [ %sum.promoted, %entry ], [ %red.next, %loop ]
|
|
%gep.src = getelementptr inbounds nuw i32, ptr %src, i64 %iv
|
|
%l = load i32, ptr %gep.src, align 4
|
|
%red.next = add nsw i32 %red, %l
|
|
store i32 %red.next, ptr %sum, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%ec = icmp eq i64 %iv.next, 10000
|
|
br i1 %ec, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare i32 @foo()
|
|
|
|
; Loop with a call cannot be handled by LoopVectorize, introducing additional
|
|
; accumulators when unrolling increases throughput.
|
|
define i32 @test_add_with_call(i64 %n, i32 %start) {
|
|
; CHECK-LABEL: define i32 @test_add_with_call(
|
|
; CHECK-SAME: i64 [[N:%.*]], i32 [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[L:%.*]] = call i32 @foo()
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = call i32 @foo()
|
|
; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = call i32 @foo()
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]]
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[L_3:%.*]] = call i32 @foo()
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]]
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i32 [[BIN_RDX2]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%l = call i32 @foo()
|
|
%rdx.next = add i32 %rdx, %l
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %rdx.next
|
|
}
|
|
|
|
; Loop with backward dependence cannot be handled LoopVectorize, introducing additional
|
|
; accumulators when unrolling increases throughput.
|
|
define i32 @test_add_with_backward_dep(ptr %p, i64 %n, i32 %start) {
|
|
; CHECK-LABEL: define i32 @test_add_with_backward_dep(
|
|
; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
|
|
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4
|
|
; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_11]], align 4
|
|
; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP_1_1]], align 4
|
|
; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_2]], align 4
|
|
; CHECK-NEXT: [[GEP_1_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP_1_2]], align 4
|
|
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]]
|
|
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_3]], align 4
|
|
; CHECK-NEXT: [[GEP_1_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_3]]
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP_1_3]], align 4
|
|
; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]]
|
|
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
|
|
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i32 [[BIN_RDX3]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
|
|
%iv.next = add i64 %iv, 1
|
|
%gep = getelementptr inbounds nuw i32, ptr %p, i64 %iv
|
|
%l = load i32, ptr %gep
|
|
%gep.1 = getelementptr inbounds nuw i32, ptr %p, i64 %iv.next
|
|
store i32 0, ptr %gep.1
|
|
%rdx.next = add i32 %rdx, %l
|
|
%ec = icmp ne i64 %iv.next, 1000
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %rdx.next
|
|
}
|