2025-07-29 19:13:51 -07:00

193 lines
6.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck %s
declare i64 @llvm.umin.i64(i64, i64)
declare i64 @llvm.umax.i64(i64, i64)
declare i64 @llvm.smin.i64(i64, i64)
declare i64 @llvm.smax.i64(i64, i64)
declare i64 @llvm.abs.i64(i64, i1)
declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
define i64 @test_umin_i64(i64 %a, i64 %b) {
; CHECK-LABEL: test_umin_i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_min_u64 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call i64 @llvm.umin.i64(i64 %a, i64 %b)
ret i64 %r
}
define i64 @test_umax_i64(i64 %a, i64 %b) {
; CHECK-LABEL: test_umax_i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_max_u64 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call i64 @llvm.umax.i64(i64 %a, i64 %b)
ret i64 %r
}
define i64 @test_smin_i64(i64 %a, i64 %b) {
; CHECK-LABEL: test_smin_i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_min_i64 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call i64 @llvm.smin.i64(i64 %a, i64 %b)
ret i64 %r
}
define i64 @test_smax_i64(i64 %a, i64 %b) {
; CHECK-LABEL: test_smax_i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_max_i64 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call i64 @llvm.smax.i64(i64 %a, i64 %b)
ret i64 %r
}
define <4 x i64> @test_umin_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_umin_v4i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_min_u64 v[0:1], v[0:1], v[8:9]
; CHECK-NEXT: v_min_u64 v[2:3], v[2:3], v[10:11]
; CHECK-NEXT: v_min_u64 v[4:5], v[4:5], v[12:13]
; CHECK-NEXT: v_min_u64 v[6:7], v[6:7], v[14:15]
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b)
ret <4 x i64> %r
}
define <4 x i64> @test_umax_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_umax_v4i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_max_u64 v[0:1], v[0:1], v[8:9]
; CHECK-NEXT: v_max_u64 v[2:3], v[2:3], v[10:11]
; CHECK-NEXT: v_max_u64 v[4:5], v[4:5], v[12:13]
; CHECK-NEXT: v_max_u64 v[6:7], v[6:7], v[14:15]
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b)
ret <4 x i64> %r
}
define <4 x i64> @test_smin_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_smin_v4i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_min_i64 v[0:1], v[0:1], v[8:9]
; CHECK-NEXT: v_min_i64 v[2:3], v[2:3], v[10:11]
; CHECK-NEXT: v_min_i64 v[4:5], v[4:5], v[12:13]
; CHECK-NEXT: v_min_i64 v[6:7], v[6:7], v[14:15]
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b)
ret <4 x i64> %r
}
define <4 x i64> @test_smax_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_smax_v4i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_max_i64 v[0:1], v[0:1], v[8:9]
; CHECK-NEXT: v_max_i64 v[2:3], v[2:3], v[10:11]
; CHECK-NEXT: v_max_i64 v[4:5], v[4:5], v[12:13]
; CHECK-NEXT: v_max_i64 v[6:7], v[6:7], v[14:15]
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b)
ret <4 x i64> %r
}
define i64 @test_abs_i64(i64 %a) {
; CHECK-LABEL: test_abs_i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; CHECK-NEXT: v_mov_b32_e32 v3, v2
; CHECK-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2
; CHECK-NEXT: s_set_pc_i64 s[30:31]
%r = call i64 @llvm.abs.i64(i64 %a, i1 0)
ret i64 %r
}
define amdgpu_ps i64 @test_umin_i64_s(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: test_umin_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_min_u64 v[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
; CHECK-NEXT: ; return to shader part epilog
%r = call i64 @llvm.umin.i64(i64 %a, i64 %b)
ret i64 %r
}
define amdgpu_ps i64 @test_umax_i64_s(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: test_umax_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_max_u64 v[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
; CHECK-NEXT: ; return to shader part epilog
%r = call i64 @llvm.umax.i64(i64 %a, i64 %b)
ret i64 %r
}
define amdgpu_ps i64 @test_smin_i64_s(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: test_smin_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_min_i64 v[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
; CHECK-NEXT: ; return to shader part epilog
%r = call i64 @llvm.smin.i64(i64 %a, i64 %b)
ret i64 %r
}
define amdgpu_ps i64 @test_smax_i64_s(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: test_smax_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_max_i64 v[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
; CHECK-NEXT: ; return to shader part epilog
%r = call i64 @llvm.smax.i64(i64 %a, i64 %b)
ret i64 %r
}
define amdgpu_ps i64 @test_abs_i64_s(i64 inreg %a) {
; CHECK-LABEL: test_abs_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_ashr_i32 s2, s1, 31
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; CHECK-NEXT: s_mov_b32 s3, s2
; CHECK-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; CHECK-NEXT: ; return to shader part epilog
%r = call i64 @llvm.abs.i64(i64 %a, i1 0)
ret i64 %r
}