Fix a problem exposed by #166483 using AV classes in more places. `isVectorRegister` only accepts registers of VGPR or AGPR classes. `hasVectorRegisters` additionally accepts the combined AV classes. Fixes: #168761
38 lines
1.3 KiB
LLVM
38 lines
1.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck %s
|
|
|
|
; Check that the copy from s[2:3] to v[0:1] occurs inside the loop, not after it.
|
|
|
|
define i64 @test_temporal_divergence(i32 %arg) #0 {
|
|
; CHECK-LABEL: test_temporal_divergence:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: v_add_u32_e32 v2, 1, v0
|
|
; CHECK-NEXT: s_mov_b64 s[2:3], 0
|
|
; CHECK-NEXT: s_mov_b64 s[0:1], 0
|
|
; CHECK-NEXT: .LBB0_1: ; %loop
|
|
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: v_add_u32_e32 v2, -1, v2
|
|
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
|
; CHECK-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
|
|
; CHECK-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; CHECK-NEXT: s_mov_b64 s[2:3], 1
|
|
; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
|
; CHECK-NEXT: s_cbranch_execnz .LBB0_1
|
|
; CHECK-NEXT: ; %bb.2: ; %end
|
|
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i64 [ 1, %loop ], [ 0, %entry ]
|
|
%count = phi i32 [ %inc, %loop ], [ 0, %entry ]
|
|
%inc = add i32 %count, 1
|
|
%cond = icmp eq i32 %count, %arg
|
|
br i1 %cond, label %end, label %loop
|
|
|
|
end:
|
|
ret i64 %i
|
|
}
|