llvm-project/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
Vikash Gupta 352c48f278
[SelectionDAG] Utilizing target hook convertSelectOfConstantsToMath for SelectwithConstant (#127599)
The Target hook convertSelectOfConstantsToMath() needs to be used within
SimplifySelectCC helper combine function in SelectionDAG Isel, where
generic select folding with constants is happening into simple maths op
using the condition as it is.

It necessarily fixes #121145.
2025-02-25 20:32:24 +05:30

1739 lines
63 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
declare void @llvm.set.rounding(i32)
declare i32 @llvm.get.rounding()
define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
; GFX678-LABEL: s_set_rounding:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_add_i32 s34, s4, -4
; GFX678-NEXT: s_min_u32 s34, s4, s34
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_add_i32 s34, s4, -4
; GFX9-NEXT: s_min_u32 s34, s4, s34
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_add_i32 s34, s4, -4
; GFX10-NEXT: s_min_u32 s36, s4, s34
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_add_i32 s0, s4, -4
; GFX11-NEXT: s_min_u32 s2, s4, s0
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
; GFX6-LABEL: s_set_rounding_kernel:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dword s2, s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX6-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX6-NEXT: ;;#ASMSTART
; GFX6-NEXT: ;;#ASMEND
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s3, s2, -4
; GFX6-NEXT: s_min_u32 s2, s2, s3
; GFX6-NEXT: s_lshl_b32 s2, s2, 2
; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: s_set_rounding_kernel:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dword s2, s[4:5], 0x9
; GFX7-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX7-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_add_i32 s3, s2, -4
; GFX7-NEXT: s_min_u32 s2, s2, s3
; GFX7-NEXT: s_lshl_b32 s2, s2, 2
; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: s_set_rounding_kernel:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dword s2, s[4:5], 0x24
; GFX8-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX8-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_add_i32 s3, s2, -4
; GFX8-NEXT: s_min_u32 s2, s2, s3
; GFX8-NEXT: s_lshl_b32 s2, s2, 2
; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: s_set_rounding_kernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x24
; GFX9-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_add_i32 s3, s2, -4
; GFX9-NEXT: s_min_u32 s2, s2, s3
; GFX9-NEXT: s_lshl_b32 s2, s2, 2
; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: s_set_rounding_kernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24
; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_add_i32 s3, s2, -4
; GFX10-NEXT: s_min_u32 s2, s2, s3
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: s_set_rounding_kernel:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_add_i32 s3, s2, -4
; GFX11-NEXT: s_min_u32 s2, s2, s3
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_endpgm
call void @llvm.set.rounding(i32 %rounding)
call void asm sideeffect "",""()
ret void
}
define void @v_set_rounding(i32 %rounding) {
; GFX6-LABEL: v_set_rounding:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_add_i32_e32 v1, vcc, -4, v0
; GFX6-NEXT: v_min_u32_e32 v0, v0, v1
; GFX6-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX6-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v0
; GFX6-NEXT: v_readfirstlane_b32 s4, v0
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_set_rounding:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_add_i32_e32 v1, vcc, -4, v0
; GFX7-NEXT: v_min_u32_e32 v0, v0, v1
; GFX7-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX7-NEXT: v_lshr_b64 v[0:1], s[4:5], v0
; GFX7-NEXT: v_readfirstlane_b32 s4, v0
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_set_rounding:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u32_e32 v1, vcc, -4, v0
; GFX8-NEXT: v_min_u32_e32 v0, v0, v1
; GFX8-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_set_rounding:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, -4, v0
; GFX9-NEXT: v_min_u32_e32 v0, v0, v1
; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_set_rounding:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v1, -4, v0
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX10-NEXT: v_min_u32_e32 v0, v0, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
; GFX10-NEXT: v_readfirstlane_b32 s4, v0
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_set_rounding:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v1, -4, v0
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: v_min_u32_e32 v0, v0, v1
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define void @set_rounding_get_rounding() {
; GFX678-LABEL: set_rounding_get_rounding:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
; GFX678-NEXT: s_lshl_b32 s6, s4, 2
; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71
; GFX678-NEXT: s_mov_b32 s5, 0xc96f385
; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX678-NEXT: s_and_b32 s4, s4, 15
; GFX678-NEXT: s_add_i32 s5, s4, 4
; GFX678-NEXT: s_cmp_lt_u32 s4, 4
; GFX678-NEXT: s_cselect_b32 s4, s4, s5
; GFX678-NEXT: s_add_i32 s5, s4, -4
; GFX678-NEXT: s_min_u32 s4, s4, s5
; GFX678-NEXT: s_lshl_b32 s6, s4, 2
; GFX678-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX678-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: set_rounding_get_rounding:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
; GFX9-NEXT: s_lshl_b32 s6, s4, 2
; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71
; GFX9-NEXT: s_mov_b32 s5, 0xc96f385
; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX9-NEXT: s_and_b32 s4, s4, 15
; GFX9-NEXT: s_add_i32 s5, s4, 4
; GFX9-NEXT: s_cmp_lt_u32 s4, 4
; GFX9-NEXT: s_cselect_b32 s4, s4, s5
; GFX9-NEXT: s_add_i32 s5, s4, -4
; GFX9-NEXT: s_min_u32 s4, s4, s5
; GFX9-NEXT: s_lshl_b32 s6, s4, 2
; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: set_rounding_get_rounding:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71
; GFX10-NEXT: s_mov_b32 s5, 0xc96f385
; GFX10-NEXT: s_lshl_b32 s6, s6, 2
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX10-NEXT: s_and_b32 s4, s4, 15
; GFX10-NEXT: s_add_i32 s5, s4, 4
; GFX10-NEXT: s_cmp_lt_u32 s4, 4
; GFX10-NEXT: s_cselect_b32 s4, s4, s5
; GFX10-NEXT: s_add_i32 s5, s4, -4
; GFX10-NEXT: s_min_u32 s6, s4, s5
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX10-NEXT: s_lshl_b32 s6, s6, 2
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: set_rounding_get_rounding:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_and_b32 s0, s0, 15
; GFX11-NEXT: s_add_i32 s1, s0, 4
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%rounding = call i32 @llvm.get.rounding()
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define void @s_set_rounding_0() {
; GFX678-LABEL: s_set_rounding_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_0:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xf
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 0)
ret void
}
define void @s_set_rounding_1() {
; GFX678-LABEL: s_set_rounding_1:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_1:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x0
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 1)
ret void
}
define void @s_set_rounding_2() {
; GFX678-LABEL: s_set_rounding_2:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_2:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x5
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 2)
ret void
}
define void @s_set_rounding_3() {
; GFX678-LABEL: s_set_rounding_3:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_3:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xa
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 3)
ret void
}
; Unsupported mode.
define void @s_set_rounding_4() {
; GFX678-LABEL: s_set_rounding_4:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_4:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xf
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 4)
ret void
}
; undefined
define void @s_set_rounding_5() {
; GFX678-LABEL: s_set_rounding_5:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_5:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x0
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 5)
ret void
}
; undefined
define void @s_set_rounding_6() {
; GFX678-LABEL: s_set_rounding_6:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_6:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x5
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 6)
ret void
}
; "Dynamic"
define void @s_set_rounding_7() {
; GFX678-LABEL: s_set_rounding_7:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_7:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xa
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 7)
ret void
}
; Invalid
define void @s_set_rounding_neg1() {
; GFX678-LABEL: s_set_rounding_neg1:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_neg1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_neg1:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xb
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 -1)
ret void
}
; --------------------------------------------------------------------
; Test extended values
; --------------------------------------------------------------------
; NearestTiesToEvenF32_TowardPositiveF64 = 8
define void @s_set_rounding_8() {
; GFX678-LABEL: s_set_rounding_8:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_8:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x4
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 8)
ret void
}
; NearestTiesToEvenF32_TowardNegativeF64 = 9
define void @s_set_rounding_9() {
; GFX678-LABEL: s_set_rounding_9:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_9:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x8
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 9)
ret void
}
; NearestTiesToEvenF32_TowardZeroF64 = 10
define void @s_set_rounding_10() {
; GFX678-LABEL: s_set_rounding_10:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_10:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_10:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xc
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 10)
ret void
}
; TowardPositiveF32_NearestTiesToEvenF64 = 11
define void @s_set_rounding_11() {
; GFX678-LABEL: s_set_rounding_11:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_11:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_11:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x1
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 11)
ret void
}
; TowardPositiveF32_TowardNegativeF64 = 12
define void @s_set_rounding_12() {
; GFX678-LABEL: s_set_rounding_12:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_12:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_12:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x9
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 12)
ret void
}
; TowardPositiveF32_TowardZeroF64 = 13
define void @s_set_rounding_13() {
; GFX678-LABEL: s_set_rounding_13:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_13:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_13:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xd
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 13)
ret void
}
; TowardNegativeF32_NearestTiesToEvenF64 = 14
define void @s_set_rounding_14() {
; GFX678-LABEL: s_set_rounding_14:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_14:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_14:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x2
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 14)
ret void
}
; TowardNegativeF32_TowardPositiveF64 = 15
define void @s_set_rounding_15() {
; GFX678-LABEL: s_set_rounding_15:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_15:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_15:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x6
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 15)
ret void
}
; TowardNegativeF32_TowardZeroF64 = 16
define void @s_set_rounding_16() {
; GFX678-LABEL: s_set_rounding_16:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_16:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xe
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 16)
ret void
}
; TowardZeroF32_NearestTiesToEvenF64 = 17
define void @s_set_rounding_17() {
; GFX678-LABEL: s_set_rounding_17:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_17:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_17:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x3
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 17)
ret void
}
; TowardZeroF32_TowardPositiveF64 = 18
define void @s_set_rounding_18() {
; GFX678-LABEL: s_set_rounding_18:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_18:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_18:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x7
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 18)
ret void
}
; TowardZeroF32_TowardNegativeF64 = 19,
define void @s_set_rounding_19() {
; GFX678-LABEL: s_set_rounding_19:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_19:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_19:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xb
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 19)
ret void
}
; Invalid, out of bounds
define void @s_set_rounding_20() {
; GFX678-LABEL: s_set_rounding_20:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_20:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_20:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xb
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 20)
ret void
}
define void @s_set_rounding_0xffff() {
; GFX678-LABEL: s_set_rounding_0xffff:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_0xffff:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_0xffff:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xb
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 65535)
ret void
}
; --------------------------------------------------------------------
; Test optimization knowing the value can only be in the standard
; range
; --------------------------------------------------------------------
define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) {
; GFX6-LABEL: s_set_rounding_i2_zeroext:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_lshl_b32 s34, s4, 2
; GFX6-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_i2_zeroext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_lshl_b32 s34, s4, 2
; GFX7-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_i2_zeroext:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_and_b32 s34, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s34, s34, 2
; GFX8-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_i2_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s34, 0xffff, s4
; GFX9-NEXT: s_lshl_b32 s34, s34, 2
; GFX9-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_i2_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
; GFX10-NEXT: s_lshl_b32 s34, s34, 2
; GFX10-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_i2_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshr_b32 s0, 0xa50f, s0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%zext.rounding = zext i2 %rounding to i32
call void @llvm.set.rounding(i32 %zext.rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
; GFX6-LABEL: s_set_rounding_i2_signext:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s34, s4, -4
; GFX6-NEXT: s_min_u32 s34, s4, s34
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_i2_signext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_add_i32 s34, s4, -4
; GFX7-NEXT: s_min_u32 s34, s4, s34
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_i2_signext:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_sext_i32_i16 s34, s4
; GFX8-NEXT: s_add_i32 s35, s34, -4
; GFX8-NEXT: s_min_u32 s34, s34, s35
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_i2_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_sext_i32_i16 s34, s4
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_i2_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_sext_i32_i16 s34, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_i2_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s0, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sext.rounding = sext i2 %rounding to i32
call void @llvm.set.rounding(i32 %sext.rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
; GFX6-LABEL: s_set_rounding_i3_signext:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s34, s4, -4
; GFX6-NEXT: s_min_u32 s34, s4, s34
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_i3_signext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_add_i32 s34, s4, -4
; GFX7-NEXT: s_min_u32 s34, s4, s34
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_i3_signext:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_sext_i32_i16 s34, s4
; GFX8-NEXT: s_add_i32 s35, s34, -4
; GFX8-NEXT: s_min_u32 s34, s34, s35
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_i3_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_sext_i32_i16 s34, s4
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_i3_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_sext_i32_i16 s34, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_i3_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s0, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sext.rounding = sext i3 %rounding to i32
call void @llvm.set.rounding(i32 %sext.rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
; GFX6-LABEL: s_set_rounding_i3_zeroext:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s34, s4, -4
; GFX6-NEXT: s_min_u32 s34, s4, s34
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_i3_zeroext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_add_i32 s34, s4, -4
; GFX7-NEXT: s_min_u32 s34, s4, s34
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_i3_zeroext:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_and_b32 s34, 0xffff, s4
; GFX8-NEXT: s_add_i32 s35, s34, -4
; GFX8-NEXT: s_min_u32 s34, s34, s35
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_i3_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s34, 0xffff, s4
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_i3_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_i3_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sext.rounding = zext i3 %rounding to i32
call void @llvm.set.rounding(i32 %sext.rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) {
; GFX6-LABEL: s_set_rounding_select_0_1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_cmp_lg_u32 s4, 0
; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
; GFX6-NEXT: v_readfirstlane_b32 s34, v0
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_select_0_1:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_cmp_lg_u32 s4, 0
; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
; GFX7-NEXT: v_readfirstlane_b32 s34, v0
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_select_0_1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_cmp_lg_u32 s4, 0
; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: s_mov_b32 s34, 0xa50f
; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34
; GFX8-NEXT: v_readfirstlane_b32 s34, v0
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_0_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_lg_u32 s4, 0
; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_mov_b32 s34, 0xa50f
; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34
; GFX9-NEXT: v_readfirstlane_b32 s34, v0
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_0_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_lg_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, -1, 0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_0_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_lg_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 0, i32 1
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_1_3:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_cselect_b32 s34, 0xa50, 10
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_1_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b32 s34, 0xa50, 10
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_1_3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, 0xa50, 10
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_1_3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, 0xa50, 10
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 1, i32 3
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define void @v_set_rounding_select_1_3(i32 %cond) {
; GFX678-LABEL: v_set_rounding_select_1_3:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: v_mov_b32_e32 v1, 0xa50
; GFX678-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX678-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc
; GFX678-NEXT: v_readfirstlane_b32 s4, v0
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_set_rounding_select_1_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, 0xa50
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_set_rounding_select_1_3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
; GFX10-NEXT: v_readfirstlane_b32 s4, v0
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_set_rounding_select_1_3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 1, i32 3
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_2_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_movk_i32 s34, 0xa5
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50f
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_2_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_movk_i32 s34, 0xa5
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50f
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_2_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_movk_i32 s34, 0xa5
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50f
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_2_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_movk_i32 s0, 0xa5
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50f
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 2, i32 0
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_2_1:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_movk_i32 s34, 0xa5
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_2_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_movk_i32 s34, 0xa5
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_2_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_movk_i32 s34, 0xa5
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_2_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_movk_i32 s0, 0xa5
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 2, i32 1
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_1_2:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_movk_i32 s34, 0xa50
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_1_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_movk_i32 s34, 0xa50
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_1_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_movk_i32 s34, 0xa50
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_1_2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_movk_i32 s0, 0xa50
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 1, i32 2
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_3_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_cselect_b32 s34, 10, 0xa50f
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_3_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b32 s34, 10, 0xa50f
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_3_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, 10, 0xa50f
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_3_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, 10, 0xa50f
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 3, i32 0
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_4_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_cselect_b32 s34, 4, 0
; GFX678-NEXT: s_add_i32 s35, s34, -4
; GFX678-NEXT: s_min_u32 s34, s34, s35
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_4_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b32 s34, 4, 0
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_4_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, 4, 0
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_4_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, 4, 0
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 4, i32 0
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_3_5:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_cselect_b32 s34, 3, 5
; GFX678-NEXT: s_add_i32 s35, s34, -4
; GFX678-NEXT: s_min_u32 s34, s34, s35
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_3_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b32 s34, 3, 5
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_3_5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, 3, 5
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_3_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, 3, 5
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 3, i32 5
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
; GFX6-LABEL: get_rounding_after_set_rounding_1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_nop 0
; GFX6-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX6-NEXT: s_lshl_b32 s2, s0, 2
; GFX6-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX6-NEXT: s_mov_b32 s1, 0xc96f385
; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX6-NEXT: s_and_b32 s0, s0, 15
; GFX6-NEXT: s_add_i32 s1, s0, 4
; GFX6-NEXT: s_cmp_lt_u32 s0, 4
; GFX6-NEXT: s_cselect_b32 s4, s0, s1
; GFX6-NEXT: s_mov_b64 s[0:1], 0
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: get_rounding_after_set_rounding_1:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_nop 0
; GFX7-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX7-NEXT: s_lshl_b32 s2, s0, 2
; GFX7-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX7-NEXT: s_mov_b32 s1, 0xc96f385
; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX7-NEXT: s_and_b32 s0, s0, 15
; GFX7-NEXT: s_add_i32 s1, s0, 4
; GFX7-NEXT: s_cmp_lt_u32 s0, 4
; GFX7-NEXT: s_cselect_b32 s4, s0, s1
; GFX7-NEXT: s_mov_b64 s[0:1], 0
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: get_rounding_after_set_rounding_1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX8-NEXT: s_lshl_b32 s2, s0, 2
; GFX8-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX8-NEXT: s_mov_b32 s1, 0xc96f385
; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX8-NEXT: s_and_b32 s0, s0, 15
; GFX8-NEXT: s_add_i32 s1, s0, 4
; GFX8-NEXT: s_cmp_lt_u32 s0, 4
; GFX8-NEXT: s_cselect_b32 s0, s0, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: get_rounding_after_set_rounding_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX9-NEXT: s_lshl_b32 s2, s0, 2
; GFX9-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX9-NEXT: s_mov_b32 s1, 0xc96f385
; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX9-NEXT: s_and_b32 s0, s0, 15
; GFX9-NEXT: s_add_i32 s1, s0, 4
; GFX9-NEXT: s_cmp_lt_u32 s0, 4
; GFX9-NEXT: s_cselect_b32 s0, s0, s1
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: get_rounding_after_set_rounding_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
; GFX10-NEXT: s_mov_b32 s1, 0xc96f385
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_and_b32 s0, s0, 15
; GFX10-NEXT: s_add_i32 s1, s0, 4
; GFX10-NEXT: s_cmp_lt_u32 s0, 4
; GFX10-NEXT: s_cselect_b32 s0, s0, s1
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: get_rounding_after_set_rounding_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_round_mode 0x0
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_and_b32 s0, s0, 15
; GFX11-NEXT: s_add_i32 s1, s0, 4
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
tail call void @llvm.set.rounding(i32 1)
%set.mode = tail call i32 @llvm.get.rounding()
store volatile i32 %set.mode, ptr addrspace(1) null
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}