From c9e0cf139cc93f2f531900b401bf7b8cd51468fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20Brku=C5=A1anin?= Date: Mon, 26 Jan 2026 19:33:00 +0100 Subject: [PATCH] [AMDGPU] Update patterns for v_cvt_flr and v_cvt_rpi (#177962) Support GlobalISel and switch to checking `nnan` flag on instruction instead of TargetOptions. Instruction are renamed to v_cvt_floor and v_cvt_nearest on gfx11+ so add gfx11 tests as well. --- .../include/llvm/Target/TargetSelectionDAG.td | 8 + llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 20 +- llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll | 328 +++++++++++++++--- llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll | 284 +++++++++++++-- .../GlobalISelEmitter/CustomPredicate.td | 3 + .../GlobalISelEmitter/GlobalISelEmitter.td | 3 +- 6 files changed, 561 insertions(+), 85 deletions(-) diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index bcb450094211..b297fd06711a 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1201,6 +1201,14 @@ def sext_like : PatFrags<(ops node:$src), [(zext_nneg node:$src), (sext node:$src)]>; +def ffloor_nnan : PatFrag<(ops node:$src), (ffloor node:$src), [{ + return N->getFlags().hasNoNaNs(); +}]> { + let GISelPredicateCode = [{ + return MI.getFlag(MachineInstr::FmNoNans); + }]; +} + // null_frag - The null pattern operator is used in multiclass instantiations // which accept an SDPatternOperator for use in matching patterns for internal // definitions. When expanding a pattern, if the null fragment is referenced diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 2a99dacba52a..2d649c2b7c5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -758,8 +758,11 @@ def FP_ONE : PatLeaf < def FP_HALF : PatLeaf < (fpimm), - [{return N->isExactlyValue(0.5);}] ->; + [{return N->isExactlyValue(0.5);}]> { + let GISelPredicateCode = [{ + return MI.getOperand(1).getFPImm()->isExactlyValue(0.5); + }]; +} /* Generic helper patterns for intrinsics */ /* -------------------------------------- */ @@ -806,16 +809,15 @@ class DwordAddrPat : AMDGPUPat < // Special conversion patterns -def cvt_rpi_i32_f32 : PatFrag < +let GIIgnoreCopies = 1 in +def cvt_rpi_i32_f32 : PatFrag< (ops node:$src), - (fp_to_sint (ffloor (fadd $src, FP_HALF))), - [{ (void) N; return TM.Options.NoNaNsFPMath; }] ->; + (fp_to_sint (ffloor_nnan (fadd $src, FP_HALF))) +>, GISelFlags; -def cvt_flr_i32_f32 : PatFrag < +def cvt_flr_i32_f32 : PatFrag< (ops node:$src), - (fp_to_sint (ffloor $src)), - [{ (void)N; return TM.Options.NoNaNsFPMath; }] + (fp_to_sint (ffloor_nnan $src)) >; let AddedComplexity = 2 in { diff --git a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll index 0974ce99aee3..9592e39114ea 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll @@ -1,82 +1,332 @@ -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-SDAG %s +; RUN: llc -mtriple=amdgcn -global-isel < %s | FileCheck -check-prefix=SI-GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel < %s | FileCheck -check-prefix=GFX11-GISEL %s declare float @llvm.fabs.f32(float) #1 declare float @llvm.floor.f32(float) #1 -; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0: -; SI-SAFE-NOT: v_cvt_flr_i32_f32 -; SI-NOT: add -; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} -; SI: s_endpgm define amdgpu_kernel void @cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0 { - %floor = call float @llvm.floor.f32(float %x) #1 +; SI-SDAG-LABEL: cvt_flr_i32_f32_0: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_flr_i32_f32_e32 v0, s6 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_flr_i32_f32_0: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_flr_i32_f32_e32 v0, s3 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_flr_i32_f32_0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_floor_i32_f32_e32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_flr_i32_f32_0: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_floor_i32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm + %floor = call nnan float @llvm.floor.f32(float %x) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1: -; SI: v_add_f32_e64 [[TMP:v[0-9]+]], s{{[0-9]+}}, 1.0 -; SI-SAFE-NOT: v_cvt_flr_i32_f32 -; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]] -; SI: s_endpgm define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) #0 { +; SI-SDAG-LABEL: cvt_flr_i32_f32_1: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_add_f32_e64 v0, s6, 1.0 +; SI-SDAG-NEXT: v_cvt_flr_i32_f32_e32 v0, v0 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_flr_i32_f32_1: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_add_f32_e64 v0, s3, 1.0 +; SI-GISEL-NEXT: v_cvt_flr_i32_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_flr_i32_f32_1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_f32_e64 v0, s2, 1.0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cvt_floor_i32_f32_e32 v0, v0 +; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_flr_i32_f32_1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_add_f32_e64 v0, s2, 1.0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cvt_floor_i32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %fadd = fadd float %x, 1.0 - %floor = call float @llvm.floor.f32(float %fadd) #1 + %floor = call nnan float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs: -; SI-NOT: add -; SI-SAFE-NOT: v_cvt_flr_i32_f32 -; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}| -; SI: s_endpgm define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x) #0 { +; SI-SDAG-LABEL: cvt_flr_i32_f32_fabs: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_flr_i32_f32_e64 v0, |s6| +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_flr_i32_f32_fabs: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_flr_i32_f32_e64 v0, |s3| +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_flr_i32_f32_fabs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_floor_i32_f32_e64 v1, |s2| +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_flr_i32_f32_fabs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_floor_i32_f32_e64 v0, |s2| +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %x.fabs = call float @llvm.fabs.f32(float %x) #1 - %floor = call float @llvm.floor.f32(float %x.fabs) #1 + %floor = call nnan float @llvm.floor.f32(float %x.fabs) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fneg: -; SI-NOT: add -; SI-SAFE-NOT: v_cvt_flr_i32_f32 -; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} -; SI: s_endpgm +; FIXME: GlobalISel selecting modifier fails because of G_FCANONICALIZE define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x) #0 { +; SI-SDAG-LABEL: cvt_flr_i32_f32_fneg: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_flr_i32_f32_e64 v0, -s6 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_flr_i32_f32_fneg: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, -s3 +; SI-GISEL-NEXT: v_cvt_flr_i32_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_flr_i32_f32_fneg: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_floor_i32_f32_e64 v1, -s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_flr_i32_f32_fneg: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -s2, -s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cvt_floor_i32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %x.fneg = fsub float -0.000000e+00, %x - %floor = call float @llvm.floor.f32(float %x.fneg) #1 + %floor = call nnan float @llvm.floor.f32(float %x.fneg) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs_fneg: -; SI-NOT: add -; SI-SAFE-NOT: v_cvt_flr_i32_f32 -; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| -; SI: s_endpgm define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) #0 { +; SI-SDAG-LABEL: cvt_flr_i32_f32_fabs_fneg: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_flr_i32_f32_e64 v0, -|s6| +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_flr_i32_f32_fabs_fneg: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, -|s3| +; SI-GISEL-NEXT: v_cvt_flr_i32_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_flr_i32_f32_fabs_fneg: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_floor_i32_f32_e64 v1, -|s2| +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_flr_i32_f32_fabs_fneg: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -|s2|, -|s2| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cvt_floor_i32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %x.fabs = call float @llvm.fabs.f32(float %x) #1 %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs - %floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1 + %floor = call nnan float @llvm.floor.f32(float %x.fabs.fneg) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}no_cvt_flr_i32_f32_0: -; SI-NOT: v_cvt_flr_i32_f32 -; SI: v_floor_f32 -; SI: v_cvt_u32_f32_e32 -; SI: s_endpgm define amdgpu_kernel void @no_cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0 { - %floor = call float @llvm.floor.f32(float %x) #1 +; +; SI-SDAG-LABEL: no_cvt_flr_i32_f32_0: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_floor_f32_e32 v0, s6 +; SI-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: no_cvt_flr_i32_f32_0: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_floor_f32_e32 v0, s3 +; SI-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: no_cvt_flr_i32_f32_0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_floor_f32_e32 v0, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: no_cvt_flr_i32_f32_0: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_floor_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm + %floor = call nnan float @llvm.floor.f32(float %x) #1 %cvt = fptoui float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll index 0203b2d4f896..95d2fe363067 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll @@ -1,79 +1,291 @@ -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-SDAG %s +; RUN: llc -mtriple=amdgcn -global-isel < %s | FileCheck -check-prefix=SI-GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel < %s | FileCheck -check-prefix=GFX11-GISEL %s declare float @llvm.fabs.f32(float) #1 declare float @llvm.floor.f32(float) #1 -; FUNC-LABEL: {{^}}cvt_rpi_i32_f32: -; SI-SAFE-NOT: v_cvt_rpi_i32_f32 -; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} -; SI: s_endpgm define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) #0 { +; SI-SDAG-LABEL: cvt_rpi_i32_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_rpi_i32_f32_e32 v0, s6 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_rpi_i32_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_rpi_i32_f32_e32 v0, s3 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_rpi_i32_f32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_nearest_i32_f32_e32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_rpi_i32_f32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_nearest_i32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %fadd = fadd float %x, 0.5 - %floor = call float @llvm.floor.f32(float %fadd) #1 + %floor = call nnan float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs: -; SI-SAFE-NOT: v_cvt_rpi_i32_f32 -; SI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} -; SI: s_endpgm define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x) #0 { +; SI-SDAG-LABEL: cvt_rpi_i32_f32_fabs: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_rpi_i32_f32_e64 v0, |s6| +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_rpi_i32_f32_fabs: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_rpi_i32_f32_e64 v0, |s3| +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_rpi_i32_f32_fabs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_nearest_i32_f32_e64 v1, |s2| +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_fabs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_nearest_i32_f32_e64 v0, |s2| +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %x.fabs = call float @llvm.fabs.f32(float %x) #1 %fadd = fadd float %x.fabs, 0.5 - %floor = call float @llvm.floor.f32(float %fadd) #1 + %floor = call nnan float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } ; FIXME: This doesn't work because it forms fsub 0.5, x -; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fneg: -; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} -; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, s{{[0-9]+}} -; SI-SAFE-NOT: v_cvt_flr_i32_f32 -; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] -; SI: s_endpgm define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x) #0 { +; SI-SDAG-LABEL: cvt_rpi_i32_f32_fneg: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_sub_f32_e64 v0, 0.5, s6 +; SI-SDAG-NEXT: v_cvt_flr_i32_f32_e32 v0, v0 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_rpi_i32_f32_fneg: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, -s3 +; SI-GISEL-NEXT: v_cvt_rpi_i32_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_rpi_i32_f32_fneg: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_sub_f32_e64 v0, 0.5, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cvt_floor_i32_f32_e32 v0, v0 +; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_fneg: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -s2, -s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cvt_nearest_i32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %x.fneg = fsub float -0.000000e+00, %x %fadd = fadd float %x.fneg, 0.5 - %floor = call float @llvm.floor.f32(float %fadd) #1 + %floor = call nnan float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } ; FIXME: This doesn't work for same reason as above -; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs_fneg: -; SI-SAFE-NOT: v_cvt_rpi_i32_f32 -; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| - -; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, |s{{[0-9]+}}| -; SI-SAFE-NOT: v_cvt_flr_i32_f32 -; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] -; SI: s_endpgm define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) #0 { +; +; SI-SDAG-LABEL: cvt_rpi_i32_f32_fabs_fneg: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_sub_f32_e64 v0, 0.5, |s6| +; SI-SDAG-NEXT: v_cvt_flr_i32_f32_e32 v0, v0 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: cvt_rpi_i32_f32_fabs_fneg: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, -|s3| +; SI-GISEL-NEXT: v_cvt_rpi_i32_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: cvt_rpi_i32_f32_fabs_fneg: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_sub_f32_e64 v0, 0.5, |s2| +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cvt_floor_i32_f32_e32 v0, v0 +; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_fabs_fneg: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -|s2|, -|s2| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cvt_nearest_i32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %x.fabs = call float @llvm.fabs.f32(float %x) #1 %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs %fadd = fadd float %x.fabs.fneg, 0.5 - %floor = call float @llvm.floor.f32(float %fadd) #1 + %floor = call nnan float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}no_cvt_rpi_i32_f32_0: -; SI-NOT: v_cvt_rpi_i32_f32 -; SI: v_add_f32 -; SI: v_floor_f32 -; SI: v_cvt_u32_f32 -; SI: s_endpgm define amdgpu_kernel void @no_cvt_rpi_i32_f32_0(ptr addrspace(1) %out, float %x) #0 { +; SI-SDAG-LABEL: no_cvt_rpi_i32_f32_0: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_add_f32_e64 v0, s6, 0.5 +; SI-SDAG-NEXT: v_floor_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: no_cvt_rpi_i32_f32_0: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_add_f32_e64 v0, s3, 0.5 +; SI-GISEL-NEXT: v_floor_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: no_cvt_rpi_i32_f32_0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_f32_e64 v0, s2, 0.5 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_floor_f32_e32 v0, v0 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: no_cvt_rpi_i32_f32_0: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_add_f32_e64 v0, s2, 0.5 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_floor_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm %fadd = fadd float %x, 0.5 - %floor = call float @llvm.floor.f32(float %fadd) #1 + %floor = call nnan float @llvm.floor.f32(float %fadd) #1 %cvt = fptoui float %floor to i32 store i32 %cvt, ptr addrspace(1) %out ret void diff --git a/llvm/test/TableGen/GlobalISelEmitter/CustomPredicate.td b/llvm/test/TableGen/GlobalISelEmitter/CustomPredicate.td index 2dd9286e1cb1..41e4c6303629 100644 --- a/llvm/test/TableGen/GlobalISelEmitter/CustomPredicate.td +++ b/llvm/test/TableGen/GlobalISelEmitter/CustomPredicate.td @@ -5,6 +5,7 @@ // CHECK: // PatFrag predicates. // CHECK-NEXT: enum { // CHECK-NEXT: GICXXPred_MI_Predicate_and_or_pat = GICXXPred_Invalid + 1, +// CHECK-NEXT: GICXXPred_MI_Predicate_ffloor_nnan, // CHECK-NEXT: GICXXPred_MI_Predicate_mul_pat, // CHECK-NEXT: GICXXPred_MI_Predicate_or_disjoint, // CHECK-NEXT: GICXXPred_MI_Predicate_or_oneuse, @@ -17,6 +18,8 @@ // CHECK: bool MyTargetInstructionSelector::testMIPredicate_MI( // CHECK: case GICXXPred_MI_Predicate_and_or_pat: { // CHECK: return doesComplexCheck(MI); +// CHECK: case GICXXPred_MI_Predicate_ffloor_nnan: { +// CHECK: return MI.getFlag(MachineInstr::FmNoNans); // CHECK: case GICXXPred_MI_Predicate_mul_pat: { // CHECK: return doesComplexCheck(MI); // CHECK: case GICXXPred_MI_Predicate_or_oneuse: { diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td index 957f23e10584..9b30760c31aa 100644 --- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td @@ -155,7 +155,8 @@ def HasC : Predicate<"Subtarget->hasC()"> { let RecomputePerFunction = 1; } // CHECK-LABEL: // PatFrag predicates. // CHECK-NEXT: enum { -// CHECK-NEXT: GICXXPred_MI_Predicate_frag = GICXXPred_Invalid + 1, +// CHECK-NEXT: GICXXPred_MI_Predicate_ffloor_nnan = GICXXPred_Invalid + 1, +// CHECK-NEXT: GICXXPred_MI_Predicate_frag, // CHECK-NEXT: GICXXPred_MI_Predicate_or_disjoint, // CHECK-NEXT: };