[VPlan] Check Def2LaneDefs first in cloneForLane. (NFC)
If we have entries in Def2LaneDefs, we always have to use it. Move the check before. Otherwise we may not pick the correct operand, e.g. if Op was a replicate recipe that got single-scalar after replicating it. Fixes https://github.com/llvm/llvm-project/issues/154330.
This commit is contained in:
parent
b20c291bae
commit
d67dba5e88
@ -473,8 +473,11 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
|
|||||||
// Collect the operands at Lane, creating extracts as needed.
|
// Collect the operands at Lane, creating extracts as needed.
|
||||||
SmallVector<VPValue *> NewOps;
|
SmallVector<VPValue *> NewOps;
|
||||||
for (VPValue *Op : RepR->operands()) {
|
for (VPValue *Op : RepR->operands()) {
|
||||||
if (vputils::isSingleScalar(Op)) {
|
// If Op is a definition that has been unrolled, directly use the clone for
|
||||||
NewOps.push_back(Op);
|
// the corresponding lane.
|
||||||
|
auto LaneDefs = Def2LaneDefs.find(Op);
|
||||||
|
if (LaneDefs != Def2LaneDefs.end()) {
|
||||||
|
NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
|
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
|
||||||
@ -482,11 +485,8 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
|
|||||||
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
|
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// If Op is a definition that has been unrolled, directly use the clone for
|
if (vputils::isSingleScalar(Op)) {
|
||||||
// the corresponding lane.
|
NewOps.push_back(Op);
|
||||||
auto LaneDefs = Def2LaneDefs.find(Op);
|
|
||||||
if (LaneDefs != Def2LaneDefs.end()) {
|
|
||||||
NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
|
||||||
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
|
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
|
||||||
|
|
||||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||||
@ -215,11 +215,98 @@ loop.latch:
|
|||||||
exit:
|
exit:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
;.
|
|
||||||
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) {
|
||||||
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
; CHECK-LABEL: define float @uniform_load_replicating_select(
|
||||||
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[TMP0:%.*]]) {
|
||||||
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||||
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 1
|
||||||
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8
|
||||||
;.
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
||||||
|
; CHECK: [[VECTOR_PH]]:
|
||||||
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8
|
||||||
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
|
||||||
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||||
|
; CHECK: [[VECTOR_BODY]]:
|
||||||
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 5
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 6
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00
|
||||||
|
; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00
|
||||||
|
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i1> [[TMP11]], i1 [[TMP8]], i32 1
|
||||||
|
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP12]], i1 [[TMP9]], i32 2
|
||||||
|
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3
|
||||||
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]]
|
||||||
|
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]]
|
||||||
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
|
||||||
|
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]]
|
||||||
|
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP7]], ptr [[A]], ptr [[TMP15]]
|
||||||
|
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP8]], ptr [[A]], ptr [[TMP16]]
|
||||||
|
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP9]], ptr [[A]], ptr [[TMP17]]
|
||||||
|
; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP18]]
|
||||||
|
; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP14]], <4 x float> splat (float 1.000000e+01), <4 x float> splat (float 1.000000e+00)
|
||||||
|
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP19]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP20]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP21]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP22]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x float> poison, float [[TMP24]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP25]], i32 1
|
||||||
|
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP26]], i32 2
|
||||||
|
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i32 3
|
||||||
|
; CHECK-NEXT: [[TMP32:%.*]] = fdiv <4 x float> splat (float 4.000000e+00), [[TMP31]]
|
||||||
|
; CHECK-NEXT: [[TMP33:%.*]] = call <4 x float> @llvm.pow.v4f32(<4 x float> [[TMP23]], <4 x float> [[TMP32]])
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||||
|
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
||||||
|
; CHECK: [[MIDDLE_BLOCK]]:
|
||||||
|
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x float> [[TMP33]], i32 3
|
||||||
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
|
||||||
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
||||||
|
; CHECK: [[SCALAR_PH]]:
|
||||||
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
||||||
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
||||||
|
; CHECK: [[LOOP]]:
|
||||||
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
||||||
|
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[A]], align 4
|
||||||
|
; CHECK-NEXT: [[C:%.*]] = fcmp ogt float [[L]], 0.000000e+00
|
||||||
|
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
|
||||||
|
; CHECK-NEXT: [[SEL_PTR:%.*]] = select i1 [[C]], ptr [[A]], ptr [[GEP_B]]
|
||||||
|
; CHECK-NEXT: [[BASE:%.*]] = select i1 [[C]], float 1.000000e+01, float 1.000000e+00
|
||||||
|
; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[SEL_PTR]], align 4
|
||||||
|
; CHECK-NEXT: [[DIV:%.*]] = fdiv float 4.000000e+00, [[L_2]]
|
||||||
|
; CHECK-NEXT: [[POW:%.*]] = tail call float @llvm.pow.f32(float [[BASE]], float [[DIV]])
|
||||||
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
|
||||||
|
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
|
||||||
|
; CHECK: [[EXIT]]:
|
||||||
|
; CHECK-NEXT: [[POW_LCSSA:%.*]] = phi float [ [[POW]], %[[LOOP]] ], [ [[TMP35]], %[[MIDDLE_BLOCK]] ]
|
||||||
|
; CHECK-NEXT: ret float [[POW_LCSSA]]
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
|
||||||
|
%l = load float, ptr %A, align 4
|
||||||
|
%c = fcmp ogt float %l, 0.000000e+00
|
||||||
|
%gep.B = getelementptr inbounds float, ptr %B, i64 %iv
|
||||||
|
%sel.ptr = select i1 %c, ptr %A, ptr %gep.B
|
||||||
|
%base = select i1 %c, float 10.000000e+00, float 1.000000e+00
|
||||||
|
%l.2 = load float, ptr %sel.ptr, align 4
|
||||||
|
%div = fdiv float 4.000000e+00, %l.2
|
||||||
|
%pow = tail call float @llvm.pow.f32(float %base, float %div)
|
||||||
|
%iv.next = add i64 %iv, 1
|
||||||
|
%ec = icmp eq i64 %iv, %1
|
||||||
|
br i1 %ec, label %exit, label %loop
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret float %pow
|
||||||
|
}
|
||||||
|
|
||||||
|
declare float @llvm.pow.f32(float, float)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user