llvm-project/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
Kevin P. Neal 76c22b18ea [FPEnv][AMDGPU] Correct strictfp tests.
Correct AMDGPU strictfp tests to follow the rules documented in the LangRef:
https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics

Mostly these tests just needed the strictfp attribute on function
definitions.  I've also removed the strictfp attribute from uses
of the constrained intrinsics because it comes by default since
D154991, but I only did this in tests I was changing anyway.

I also removed attributes added to declare lines of intrinsics. The
attributes of intrinsics cannot be changed in a test so I eliminated
attempts to do so.

Test changes verified with D146845.
2023-07-25 13:24:46 -04:00

229 lines
10 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define float @v_constained_fmul_f32_fpexcept_ignore(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fmul_f32_fpexcept_ignore:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
ret float %val
}
define float @v_constained_fmul_f32_fpexcept_maytrap(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fmul_f32_fpexcept_maytrap:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
ret float %val
}
define <2 x float> @v_constained_fmul_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, v0, v2
; GCN-NEXT: v_mul_f32_e32 v1, v1, v3
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x float> %val
}
define <2 x float> @v_constained_fmul_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, v0, v2
; GCN-NEXT: v_mul_f32_e32 v1, v1, v3
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
ret <2 x float> %val
}
define <2 x float> @v_constained_fmul_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, v0, v2
; GCN-NEXT: v_mul_f32_e32 v1, v1, v3
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
ret <2 x float> %val
}
define <3 x float> @v_constained_fmul_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
; GCN-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, v0, v3
; GCN-NEXT: v_mul_f32_e32 v1, v1, v4
; GCN-NEXT: v_mul_f32_e32 v2, v2, v5
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v4
; GFX10-NEXT: v_mul_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v3 :: v_dual_mul_f32 v1, v1, v4
; GFX11-NEXT: v_mul_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x float> %val
}
define amdgpu_ps float @s_constained_fmul_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
; GCN-LABEL: s_constained_fmul_f32_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mul_f32_e32 v0, s2, v0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_constained_fmul_f32_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: v_mul_f32_e64 v0, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define float @v_constained_fmul_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e64 v0, |v0|, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_mul_f32_e64 v0, |v0|, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x) #0
%val = call float @llvm.experimental.constrained.fmul.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define float @v_constained_fmul_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e64 v0, v0, |v1|
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_mul_f32_e64 v0, v0, |v1|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y) #0
%val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define float @v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e64 v0, -|v0|, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_mul_f32_e64 v0, -|v0|, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x) #0
%neg.fabs.x = fneg float %fabs.x
%val = call float @llvm.experimental.constrained.fmul.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
declare float @llvm.fabs.f32(float)
declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
declare <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float>, <2 x float>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
attributes #0 = { strictfp }