When reviewing another change, I noticed that we were failing to infer samsign for two cases: 1) an unsigned comparison, and 2) when both arguments were known negative. Using CVP and InstCombine as a reference, we need to be careful to not allow eq/ne comparisons. I'm a bit unclear on the why of that, and for now am going with the low risk change. I may return to investigate that in a follow up. Compile time results look like noise to me, see: https://llvm-compile-time-tracker.com/compare.php?from=49a978712893fcf9e5f40ac488315d029cf15d3d&to=2ddb263604fd7d538e09dc1f805ebc30eb3ffab0&stat=instructions:u
65 lines
2.5 KiB
LLVM
65 lines
2.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=indvars -indvars-predicate-loops=1 -S | FileCheck %s
|
|
|
|
; Loop with body using loop convergence token should be skipped by IndVarSimplify.
|
|
|
|
declare token @llvm.experimental.convergence.entry() #0
|
|
|
|
define void @loop(i32 %tid, ptr %array) #0 {
|
|
; CHECK-LABEL: @loop(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
|
|
; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
|
|
; CHECK: for.cond.i:
|
|
; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
|
|
; CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 8
|
|
; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[EXIT_LOOPEXIT:%.*]]
|
|
; CHECK: for.body.i:
|
|
; CHECK-NEXT: [[CMP1_I:%.*]] = icmp eq i32 [[I_0_I]], [[TID:%.*]]
|
|
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
|
|
; CHECK-NEXT: br i1 [[CMP1_I]], label [[IF_THEN_I:%.*]], label [[FOR_COND_I]]
|
|
; CHECK: exit.loopexit:
|
|
; CHECK-NEXT: br label [[EXIT:%.*]]
|
|
; CHECK: if.then.i:
|
|
; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX2_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 [[TID]]) [ "convergencectrl"(token [[TMP1]]) ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 [[TID]]
|
|
; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX2_I]], ptr [[TMP2]], align 4
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = tail call token @llvm.experimental.convergence.entry()
|
|
br label %for.cond.i
|
|
|
|
for.cond.i:
|
|
%i.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ]
|
|
%2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
|
|
%cmp.i = icmp ult i32 %i.0.i, 8
|
|
br i1 %cmp.i, label %for.body.i, label %exit.loopexit
|
|
|
|
for.body.i:
|
|
%cmp1.i = icmp eq i32 %i.0.i, %tid
|
|
%inc.i = add nuw nsw i32 %i.0.i, 1
|
|
br i1 %cmp1.i, label %if.then.i, label %for.cond.i
|
|
|
|
exit.loopexit:
|
|
br label %exit
|
|
|
|
if.then.i:
|
|
%hlsl.wave.active.max2.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 %tid) [ "convergencectrl"(token %2) ]
|
|
%3 = getelementptr inbounds i32, ptr %array, i32 %tid
|
|
store i32 %hlsl.wave.active.max2.i, ptr %3, align 4
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare token @llvm.experimental.convergence.loop() #0
|
|
|
|
declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
|
|
|
|
attributes #0 = { convergent }
|