Matt Arsenault b356aa3e2d
AMDGPU/GlobalISel: Partially move constant selection to patterns (#100786)
This is still relying on the manual code for splitting 64-bit
constants, and handling pointers.

We were missing some of the tablegen patterns for all immediate types,
so this has some side effect DAG path improvements. This also reduces
the diff in the 2 selector outputs.
2024-07-30 18:18:16 +04:00

294 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s
define double @v_floor_f64_ieee(double %x) {
; GFX6-LABEL: v_floor_f64_ieee:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-LABEL: v_floor_f64_ieee:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
; GFX78-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.floor.f64(double %x)
ret double %result
}
define double @v_floor_f64_ieee_nnan(double %x) {
; GFX6-LABEL: v_floor_f64_ieee_nnan:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-LABEL: v_floor_f64_ieee_nnan:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
; GFX78-NEXT: s_setpc_b64 s[30:31]
%result = call nnan double @llvm.floor.f64(double %x)
ret double %result
}
define double @v_floor_f64_ieee_fneg(double %x) {
; GFX6-LABEL: v_floor_f64_ieee_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-LABEL: v_floor_f64_ieee_fneg:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1]
; GFX78-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg double %x
%result = call double @llvm.floor.f64(double %neg.x)
ret double %result
}
define double @v_floor_f64_nonieee(double %x) #1 {
; GFX6-LABEL: v_floor_f64_nonieee:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-LABEL: v_floor_f64_nonieee:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
; GFX78-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.floor.f64(double %x)
ret double %result
}
define double @v_floor_f64_nonieee_nnan(double %x) #1 {
; GFX6-LABEL: v_floor_f64_nonieee_nnan:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-LABEL: v_floor_f64_nonieee_nnan:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
; GFX78-NEXT: s_setpc_b64 s[30:31]
%result = call nnan double @llvm.floor.f64(double %x)
ret double %result
}
define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
; GFX6-LABEL: v_floor_f64_non_ieee_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-LABEL: v_floor_f64_non_ieee_fneg:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1]
; GFX78-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg double %x
%result = call double @llvm.floor.f64(double %neg.x)
ret double %result
}
define double @v_floor_f64_fabs(double %x) {
; GFX6-LABEL: v_floor_f64_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]|
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX6-NEXT: v_add_f64 v[0:1], |v[0:1]|, -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-LABEL: v_floor_f64_fabs:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX78-NEXT: v_floor_f64_e64 v[0:1], |v[0:1]|
; GFX78-NEXT: s_setpc_b64 s[30:31]
%abs.x = call double @llvm.fabs.f64(double %x)
%result = call double @llvm.floor.f64(double %abs.x)
ret double %result
}
define double @v_floor_f64_fneg_fabs(double %x) {
; GFX6-LABEL: v_floor_f64_fneg_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]|
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX6-NEXT: v_add_f64 v[0:1], -|v[0:1]|, -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-LABEL: v_floor_f64_fneg_fabs:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|v[0:1]|
; GFX78-NEXT: s_setpc_b64 s[30:31]
%abs.x = call double @llvm.fabs.f64(double %x)
%neg.abs.x = fneg double %abs.x
%result = call double @llvm.floor.f64(double %neg.abs.x)
ret double %result
}
define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) {
; GFX6-LABEL: s_floor_f64:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1]
; GFX6-NEXT: ; return to shader part epilog
;
; GFX78-LABEL: s_floor_f64:
; GFX78: ; %bb.0:
; GFX78-NEXT: v_floor_f64_e32 v[0:1], s[2:3]
; GFX78-NEXT: ; return to shader part epilog
%result = call double @llvm.floor.f64(double %x)
%cast = bitcast double %result to <2 x float>
ret <2 x float> %cast
}
define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_add_f64 v[0:1], -s[2:3], -v[0:1]
; GFX6-NEXT: ; return to shader part epilog
;
; GFX78-LABEL: s_floor_f64_fneg:
; GFX78: ; %bb.0:
; GFX78-NEXT: v_floor_f64_e64 v[0:1], -s[2:3]
; GFX78-NEXT: ; return to shader part epilog
%neg.x = fneg double %x
%result = call double @llvm.floor.f64(double %neg.x)
%cast = bitcast double %result to <2 x float>
ret <2 x float> %cast
}
define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]|
; GFX6-NEXT: v_mov_b32_e32 v2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_add_f64 v[0:1], |s[2:3]|, -v[0:1]
; GFX6-NEXT: ; return to shader part epilog
;
; GFX78-LABEL: s_floor_f64_fabs:
; GFX78: ; %bb.0:
; GFX78-NEXT: v_floor_f64_e64 v[0:1], |s[2:3]|
; GFX78-NEXT: ; return to shader part epilog
%abs.x = call double @llvm.fabs.f64(double %x)
%result = call double @llvm.floor.f64(double %abs.x)
%cast = bitcast double %result to <2 x float>
ret <2 x float> %cast
}
define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fneg_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]|
; GFX6-NEXT: v_mov_b32_e32 v2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_add_f64 v[0:1], -|s[2:3]|, -v[0:1]
; GFX6-NEXT: ; return to shader part epilog
;
; GFX78-LABEL: s_floor_f64_fneg_fabs:
; GFX78: ; %bb.0:
; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|s[2:3]|
; GFX78-NEXT: ; return to shader part epilog
%abs.x = call double @llvm.fabs.f64(double %x)
%neg.abs.x = fneg double %abs.x
%result = call double @llvm.floor.f64(double %neg.abs.x)
%cast = bitcast double %result to <2 x float>
ret <2 x float> %cast
}
declare double @llvm.floor.f64(double) #0
declare double @llvm.fabs.f64(double) #0
attributes #0 = { nounwind readnone speculatable willreturn }
attributes #1 = { "amdgpu-ieee"="false" }