[FPEnv][AMDGPU] Correct strictfp tests.

Correct AMDGPU strictfp tests to follow the rules documented in the
LangRef:
https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics

These tests needed the strictfp attribute added to function calls and
some declarations.

Some of the tests now pass with D146845, others get farther along and
fail with D146845. The tests revealed that further work is required
in mostly AMDGPU atomics to get the tests passing.

Since I was here anyway I removed the strictfp attribute from some
constrained intrinsic declarations. They have this attribute by default.

Test changes verified with D146845.
This commit is contained in:
Kevin P. Neal 2024-02-05 09:19:34 -05:00
parent 1af05363d6
commit d15c454bed
6 changed files with 32 additions and 30 deletions

View File

@ -943,7 +943,7 @@ define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1
ret <2 x half> %mul
}
define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #3 {
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -955,7 +955,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1)
ret float %mul
}
define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #4 {
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -967,7 +967,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1)
ret float %mul
}
define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #5 {
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -1316,3 +1316,6 @@ declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
attributes #0 = { "denormal-fp-math"="ieee,ieee" }
attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }
attributes #3 = { "denormal-fp-math"="ieee,ieee" strictfp }
attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" strictfp }
attributes #5 = { "denormal-fp-math"="dynamic,dynamic" strictfp }

View File

@ -2276,7 +2276,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB3_2
; GFX1132-DPP-NEXT: .LBB3_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
ret void
}
@ -4174,7 +4174,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: global_atomic_add_f32 v4, v0, s[0:1]
; GFX1132-DPP-NEXT: .LBB6_2:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
ret void
}
@ -5403,7 +5403,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scop
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2
; GFX1132-DPP-NEXT: .LBB8_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue monotonic, align 4
ret void
}

View File

@ -2380,7 +2380,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB3_2
; GFX1132-DPP-NEXT: .LBB3_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
ret void
}
@ -4382,7 +4382,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-DPP-NEXT: .LBB6_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
ret void
}
@ -5611,7 +5611,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scop
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2
; GFX1132-DPP-NEXT: .LBB8_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue monotonic, align 4
ret void
}

View File

@ -76,7 +76,7 @@ define i32 @strictfp_func_fpmode_i32() strictfp {
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%fpmode = call i32 @llvm.get.fpmode.i32() strictfp
ret i32 %fpmode
}

View File

@ -374,24 +374,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, |v0|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
%abs.arg = call float @llvm.fabs.f32(float %arg)
%abs.arg = call float @llvm.fabs.f32(float %arg) #0
%result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret void
}
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #1
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #1
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #1
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) #1
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) #1
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) #1
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) #1
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata) #1
declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata)
declare float @llvm.fabs.f32(float) #1
declare float @llvm.fabs.f32(float)
attributes #0 = { strictfp }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }

View File

@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
declare void @f16_user(half)
declare half @f16_result()
declare void @f16_user(half) #0
declare half @f16_result() #0
declare void @v2f16_user(<2 x half>)
declare <2 x half> @v2f16_result()
declare void @v2f16_user(<2 x half>) #0
declare <2 x half> @v2f16_result() #0
declare void @v4f16_user(<4 x half>)
declare <4 x half> @v4f16_result()
declare void @v4f16_user(<4 x half>) #0
declare <4 x half> @v4f16_result() #0
declare void @v8f16_user(<8 x half>)
declare <8 x half> @v8f16_result()
declare void @v8f16_user(<8 x half>) #0
declare <8 x half> @v8f16_result() #0
define void @f16_arg(half %arg, ptr %ptr) #0 {
; GFX7-LABEL: f16_arg: