This patch implements a custom printer/parser for the immediate operand of s_wait_alu that prints/parses the decoded counter values. Format: ``` .<counter1>_<value1>_<counter2>_<value2> ``` Example: `s_wait_alu .VaVdst_1_VmVsrc_1` ; Which is equivalent to this: `s_wait_alu 8167` Features: - If a counter is at its maximum value it won't get printed. - The parser will error out if a counter is greater or equal to its max value. - If all counters are disabled we can use 'AllOff'. - For now we also accept numeric values for backwards compatibility with older MIR. Note: This is similar to https://github.com/llvm/llvm-project/pull/96004 but for `s_wait_alu`.
1152 lines
47 KiB
YAML
1152 lines
47 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec,amdgpu-wait-sgpr-hazards -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s
|
|
|
|
--- |
|
|
@mem = internal unnamed_addr addrspace(4) constant [4 x <4 x i32>] [<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>]
|
|
|
|
define amdgpu_gs void @mask_hazard_getpc1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_getpc2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_vcc1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_vcc2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_cndmask_dpp1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_cndmask_dpp2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_cndmask_dpp3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_addc1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_addc2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_addc3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_addc4() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subb1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subb2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subb3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subb4() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subbrev1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subbrev2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subbrev3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subbrev4() { ret void }
|
|
define amdgpu_gs void @mask_hazard_div_fmas_f32() { ret void }
|
|
define amdgpu_gs void @mask_hazard_div_fmas_f64() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subreg1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subreg2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subreg3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subreg4() { ret void }
|
|
define amdgpu_gs void @mask_hazard_subreg5() { ret void }
|
|
define amdgpu_gs void @mask_hazard_waitcnt() { ret void }
|
|
define amdgpu_gs void @mask_hazard_gap1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_gap2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_gap3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_no_hazard1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_no_hazard2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_no_hazard3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_cancel_hazard1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_cancel_hazard2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_cancel_hazard3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_cancel_hazard4() { ret void }
|
|
define amdgpu_gs void @mask_hazard_partial_cancel1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_partial_cancel2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_partial_cancel3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_partial_cancel4() { ret void }
|
|
define amdgpu_gs void @mask_hazard_valu_readlane1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_valu_readlane2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_valu_readlane3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_valu_readfirstlane() { ret void }
|
|
define amdgpu_gs void @mask_hazard_valu_vcmp_vcc() { ret void }
|
|
define amdgpu_gs void @mask_hazard_valu_vcmp_sgpr() { ret void }
|
|
define amdgpu_gs void @mask_hazard_combine1() { ret void }
|
|
define amdgpu_gs void @mask_hazard_combine2() { ret void }
|
|
define amdgpu_gs void @mask_hazard_combine3() { ret void }
|
|
define amdgpu_gs void @mask_hazard_combine4() { ret void }
|
|
define amdgpu_gs void @mask_hazard_combine5() { ret void }
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_getpc1
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_getpc1
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$sgpr0_sgpr1 = S_GETPC_B64
|
|
$sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_getpc2
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_getpc2
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GCN-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 {
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 8, implicit-def $scc
|
|
; GCN-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-lo) @mem + 16, implicit-def $scc, implicit $scc
|
|
; GCN-NEXT: }
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
BUNDLE implicit-def $sgpr0_sgpr1 {
|
|
$sgpr0_sgpr1 = S_GETPC_B64
|
|
$sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 4, implicit-def $scc
|
|
$sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-lo) @mem + 12, implicit-def $scc, implicit $scc
|
|
}
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_vcc1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_vcc1
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_vcc1
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_vcc2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_vcc2
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_vcc2
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_cndmask_dpp1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_cndmask_dpp1
|
|
; GFX11: $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_cndmask_dpp1
|
|
; GFX12: $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_cndmask_dpp2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_cndmask_dpp2
|
|
; GFX11: $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_cndmask_dpp2
|
|
; GFX12: $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_cndmask_dpp3
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_cndmask_dpp3
|
|
; GFX11: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_cndmask_dpp3
|
|
; GFX12: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_addc1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_addc1
|
|
; GFX11: $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_addc1
|
|
; GFX12: $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_addc2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_addc2
|
|
; GFX11: $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_addc2
|
|
; GFX12: $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_addc3
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_addc3
|
|
; GFX11: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_addc3
|
|
; GFX12: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_addc4
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_addc4
|
|
; GFX11: $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_addc4
|
|
; GFX12: $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_subb1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subb1
|
|
; GFX11: $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subb1
|
|
; GFX12: $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_subb2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subb2
|
|
; GFX11: $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subb2
|
|
; GFX12: $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_subb3
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subb3
|
|
; GFX11: $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subb3
|
|
; GFX12: $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_subb4
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subb4
|
|
; GFX11: $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subb4
|
|
; GFX12: $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_subbrev1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subbrev1
|
|
; GFX11: $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subbrev1
|
|
; GFX12: $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_subbrev2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subbrev2
|
|
; GFX11: $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subbrev2
|
|
; GFX12: $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_subbrev3
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subbrev3
|
|
; GFX11: $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subbrev3
|
|
; GFX12: $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_subbrev4
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subbrev4
|
|
; GFX11: $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subbrev4
|
|
; GFX12: $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec
|
|
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_div_fmas_f32
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_div_fmas_f32
|
|
; GFX11: $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_div_fmas_f32
|
|
; GFX12: $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_div_fmas_f64
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_div_fmas_f64
|
|
; GFX11: $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_div_fmas_f64
|
|
; GFX12: $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec
|
|
$vcc = S_CSELECT_B64 -1, 0, implicit $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Check low word overlap
|
|
---
|
|
name: mask_hazard_subreg1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subreg1
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr2 = S_MOV_B32 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subreg1
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr2 = S_MOV_B32 0
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr2 = S_MOV_B32 0
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Check high word overlap
|
|
---
|
|
name: mask_hazard_subreg2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subreg2
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr3 = S_MOV_B32 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subreg2
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr3 = S_MOV_B32 0
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr3 = S_MOV_B32 0
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Check multiple subreg overlap
|
|
---
|
|
name: mask_hazard_subreg3
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_subreg3
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr2 = S_MOV_B32 0
|
|
; GFX11-NEXT: $sgpr3 = S_MOV_B32 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_subreg3
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr2 = S_MOV_B32 0
|
|
; GFX12-NEXT: $sgpr3 = S_MOV_B32 0
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr2 = S_MOV_B32 0
|
|
$sgpr3 = S_MOV_B32 0
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Check vcc_lo overlap
|
|
---
|
|
name: mask_hazard_subreg4
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_subreg4
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GCN-NEXT: $vcc_lo = S_MOV_B32 0
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_lo
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vcc_lo = S_MOV_B32 0
|
|
$sgpr2 = S_MOV_B32 $vcc_lo
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Check vcc_hi overlap
|
|
---
|
|
name: mask_hazard_subreg5
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_subreg5
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GCN-NEXT: $vcc_hi = S_MOV_B32 0
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_hi
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vcc_hi = S_MOV_B32 0
|
|
$sgpr2 = S_MOV_B32 $vcc_hi
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# S_WAITCNT does not mitigate hazard
|
|
---
|
|
name: mask_hazard_waitcnt
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_waitcnt
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GCN-NEXT: S_WAITCNT 0
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
S_WAITCNT 0
|
|
$sgpr0_sgpr1 = S_GETPC_B64
|
|
$sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Check implicit $exec
|
|
---
|
|
name: mask_hazard_gap1
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_gap1
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
|
$sgpr0_sgpr1 = S_GETPC_B64
|
|
$sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Check implicit $mode
|
|
---
|
|
name: mask_hazard_gap2
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_gap2
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode
|
|
$sgpr0_sgpr1 = S_GETPC_B64
|
|
$sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Check explicit $exec
|
|
---
|
|
name: mask_hazard_gap3
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_gap3
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2
|
|
$sgpr0_sgpr1 = S_GETPC_B64
|
|
$sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Different SGPR write
|
|
---
|
|
name: mask_hazard_no_hazard1
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_no_hazard1
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 0
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr0 = S_MOV_B32 0
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Different SGPR write with mask read overlap
|
|
---
|
|
name: mask_hazard_no_hazard2
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_no_hazard2
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $vcc
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$sgpr0_sgpr1 = S_MOV_B64 $vcc
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# Overlapping VGPR write
|
|
---
|
|
name: mask_hazard_no_hazard3
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_no_hazard3
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_cancel_hazard1
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_cancel_hazard1
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GCN-NEXT: $vcc_lo = S_MOV_B32 0
|
|
; GCN-NEXT: $vcc_hi = S_MOV_B32 0
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
|
|
; GCN-NEXT: $vcc = S_MOV_B64 1
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vcc_lo = S_MOV_B32 0
|
|
$vcc_hi = S_MOV_B32 0
|
|
$sgpr0 = S_MOV_B32 $vcc_lo
|
|
$vcc = S_MOV_B64 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_cancel_hazard2
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_cancel_hazard2
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GCN-NEXT: $vcc = S_MOV_B64 0
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
|
|
; GCN-NEXT: $vcc = S_MOV_B64 1
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vcc = S_MOV_B64 0
|
|
$sgpr0 = S_MOV_B32 $vcc_lo
|
|
$vcc = S_MOV_B64 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_cancel_hazard3
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_cancel_hazard3
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 0
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr4 = S_MOV_B32 $sgpr0
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$sgpr0_sgpr1 = S_MOV_B64 0
|
|
$sgpr4 = S_MOV_B32 $sgpr0
|
|
$sgpr0_sgpr1 = S_MOV_B64 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_cancel_hazard4
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: mask_hazard_cancel_hazard4
|
|
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 0
|
|
; GCN-NEXT: $sgpr1 = S_MOV_B32 0
|
|
; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GCN-NEXT: $sgpr4 = S_MOV_B32 $sgpr0
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$sgpr0 = S_MOV_B32 0
|
|
$sgpr1 = S_MOV_B32 0
|
|
$sgpr4 = S_MOV_B32 $sgpr0
|
|
$sgpr0_sgpr1 = S_MOV_B64 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_partial_cancel1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_partial_cancel1
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc_lo = S_MOV_B32 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
|
|
; GFX11-NEXT: $vcc = S_MOV_B64 1
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_partial_cancel1
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc_lo = S_MOV_B32 0
|
|
; GFX12-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX12-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
|
|
; GFX12-NEXT: $vcc = S_MOV_B64 1
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vcc_lo = S_MOV_B32 0
|
|
$sgpr0 = S_MOV_B32 $vcc_lo
|
|
$vcc = S_MOV_B64 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_partial_cancel2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_partial_cancel2
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vcc_hi = S_MOV_B32 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
|
|
; GFX11-NEXT: $vcc = S_MOV_B64 1
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_partial_cancel2
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vcc_hi = S_MOV_B32 0
|
|
; GFX12-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX12-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
|
|
; GFX12-NEXT: $vcc = S_MOV_B64 1
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vcc_hi = S_MOV_B32 0
|
|
$sgpr0 = S_MOV_B32 $vcc_lo
|
|
$vcc = S_MOV_B64 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_partial_cancel3
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_partial_cancel3
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: $sgpr3 = S_MOV_B32 $sgpr0
|
|
; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_partial_cancel3
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
|
|
; GFX12-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX12-NEXT: $sgpr3 = S_MOV_B32 $sgpr0
|
|
; GFX12-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$sgpr0 = S_MOV_B32 0
|
|
$sgpr3 = S_MOV_B32 $sgpr0
|
|
$sgpr0_sgpr1 = S_MOV_B64 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_partial_cancel4
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_partial_cancel4
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX11-NEXT: $sgpr1 = S_MOV_B32 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: $sgpr3 = S_MOV_B32 $sgpr1
|
|
; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_partial_cancel4
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX12-NEXT: $sgpr1 = S_MOV_B32 0
|
|
; GFX12-NEXT: S_WAITCNT_DEPCTR .SaSdst_0
|
|
; GFX12-NEXT: $sgpr3 = S_MOV_B32 $sgpr1
|
|
; GFX12-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$sgpr1 = S_MOV_B32 0
|
|
$sgpr3 = S_MOV_B32 $sgpr1
|
|
$sgpr0_sgpr1 = S_MOV_B64 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_valu_readlane1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_valu_readlane1
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_valu_readlane1
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr2 = V_READLANE_B32 $vgpr3, 0
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_valu_readlane2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_valu_readlane2
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_valu_readlane2
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr3 = V_READLANE_B32 $vgpr3, 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_valu_readlane3
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_valu_readlane3
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0
|
|
; GFX11-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_valu_readlane3
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0
|
|
; GFX12-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr2 = V_READLANE_B32 $vgpr3, 0
|
|
$sgpr3 = V_READLANE_B32 $vgpr3, 1
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_valu_readfirstlane
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_valu_readfirstlane
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_valu_readfirstlane
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_valu_vcmp_vcc
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_valu_vcmp_vcc
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_valu_vcmp_vcc
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_valu_vcmp_sgpr
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_valu_vcmp_sgpr
|
|
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_valu_vcmp_sgpr
|
|
; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
$sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_combine1
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_combine1
|
|
; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
|
|
; GFX11-NEXT: $sgpr1 = S_MOV_B32 0
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_VaVcc_0_SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_combine1
|
|
; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
|
|
; GFX12-NEXT: $sgpr1 = S_MOV_B32 0
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
$sgpr0 = S_MOV_B32 0
|
|
$sgpr1 = S_MOV_B32 0
|
|
$sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_combine2
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_combine2
|
|
; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0_SaSdst_0
|
|
; GFX11-NEXT: $sgpr1 = S_MOV_B32 $sgpr4
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_combine2
|
|
; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
|
|
; GFX12-NEXT: $sgpr1 = S_MOV_B32 $sgpr4
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
$sgpr0 = S_MOV_B32 0
|
|
$sgpr1 = S_MOV_B32 $sgpr4
|
|
$sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_combine3
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_combine3
|
|
; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0
|
|
; GFX11-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_combine3
|
|
; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX12-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
$sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_combine4
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_combine4
|
|
; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0
|
|
; GFX11-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $vcc
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_combine4
|
|
; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX12-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
; GFX12-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $vcc
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
$sgpr4_sgpr5 = S_MOV_B64 $vcc
|
|
$sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: mask_hazard_combine5
|
|
body: |
|
|
bb.0:
|
|
; GFX11-LABEL: name: mask_hazard_combine5
|
|
; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0
|
|
; GFX11-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: $sgpr5 = S_MOV_B32 $sgpr1
|
|
; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0
|
|
; GFX11-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX12-LABEL: name: mask_hazard_combine5
|
|
; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
; GFX12-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
; GFX12-NEXT: $sgpr5 = S_MOV_B32 $sgpr1
|
|
; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
; GFX12-NEXT: S_ENDPGM 0
|
|
$vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
|
|
$vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
|
|
$vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
|
|
V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec
|
|
$sgpr5 = S_MOV_B32 $sgpr1
|
|
$sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|