Detected on targets older then gfx10 (e.g. gfx9) for constants that are too large to be inlined (constant are sgpr by default). In med3 combine it is expected that regbankselect maps all operands of min/max we try to match to vgpr. However constants are mapped to sgpr and there will be a sgpr-to-vgpr copy. Matchers look through sgpr-to-vgpr copies and return sgpr and these break constant bus restriction. Build med3 with all vgpr operands. Use existing sgpr-to-vgpr copies for matched sgprs. If there is no such copy (not expected) build one. Differential Revision: https://reviews.llvm.org/D114700
209 lines
7.3 KiB
LLVM
209 lines
7.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
|
|
|
define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
|
|
; GFX9-LABEL: test_min_max_ValK0_K1_u32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_min_max_ValK0_K1_u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
|
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @min_max_ValK0_K1_i32(i32 %a) {
|
|
; GFX9-LABEL: min_max_ValK0_K1_i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: min_max_ValK0_K1_i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
|
|
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_min_K1max_ValK0__u32(i32 %a) {
|
|
; GFX9-LABEL: test_min_K1max_ValK0__u32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_min_K1max_ValK0__u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
|
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_min_K1max_K0Val__u32(i32 %a) {
|
|
; GFX9-LABEL: test_min_K1max_K0Val__u32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_min_K1max_K0Val__u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
|
|
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
|
|
; GFX9-LABEL: test_max_min_ValK1_K0_u32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_min_ValK1_K0_u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
|
|
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
|
|
; GFX9-LABEL: test_max_min_K1Val_K0_u32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_min_K1Val_K0_u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
|
|
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_max_K0min_ValK1__u32(i32 %a) {
|
|
; GFX9-LABEL: test_max_K0min_ValK1__u32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_K0min_ValK1__u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
|
|
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_max_K0min_K1Val__u32(i32 %a) {
|
|
; GFX9-LABEL: test_max_K0min_K1Val__u32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_K0min_K1Val__u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
|
|
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
|
|
; GFX9-LABEL: test_max_K0min_K1Val__v2u16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
|
|
; GFX9-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
|
|
; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call <2 x i16> @llvm.umin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
|
|
%umed = call <2 x i16> @llvm.umax.v2i16(<2 x i16> <i16 12, i16 12>, <2 x i16> %umin)
|
|
ret <2 x i16> %umed
|
|
}
|
|
|
|
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
|
|
; GFX9-LABEL: test_uniform_min_max:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_max_u32 s0, s2, 12
|
|
; GFX9-NEXT: s_min_u32 s0, s0, 17
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: test_uniform_min_max:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_max_u32 s0, s2, 12
|
|
; GFX10-NEXT: s_min_u32 s0, s0, 17
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
|
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_non_inline_constant_u32(i32 %a) {
|
|
; GFX9-LABEL: test_non_inline_constant_u32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, 0x41
|
|
; GFX9-NEXT: v_med3_u32 v0, v0, 12, v1
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_non_inline_constant_u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 0x41
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
|
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 65)
|
|
ret i32 %umed
|
|
}
|
|
|
|
declare i32 @llvm.umin.i32(i32, i32)
|
|
declare i32 @llvm.umax.i32(i32, i32)
|
|
declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
|
|
declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
|