From bd2dca0f74e4666bc806abdfae9de06fe37a697c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 15 Jul 2023 12:02:37 -0400 Subject: [PATCH] AMDGPU: Use hex floats instead of ugly bitcasting --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++-- .../legalize-intrinsic-amdgcn-fdiv-fast.mir | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 1ba52f3d2d8b..ab531be9d9bf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4806,9 +4806,9 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, auto Abs = B.buildFAbs(S32, RHS, Flags); const APFloat C0Val(1.0f); - auto C0 = B.buildConstant(S32, 0x6f800000); - auto C1 = B.buildConstant(S32, 0x2f800000); - auto C2 = B.buildConstant(S32, llvm::bit_cast(1.0f)); + auto C0 = B.buildFConstant(S32, 0x1p+96f); + auto C1 = B.buildFConstant(S32, 0x1p-32f); + auto C2 = B.buildFConstant(S32, 1.0f); auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags); auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b429809f2e26..60772e7d2a27 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9277,10 +9277,10 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const { SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS); - const APFloat K0Val(llvm::bit_cast(0x6f800000)); + const APFloat K0Val(0x1p+96f); const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32); - const APFloat K1Val(llvm::bit_cast(0x2f800000)); + const APFloat K1Val(0x1p-32f); const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32); const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-amdgcn-fdiv-fast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-amdgcn-fdiv-fast.mir index ac12ff19915f..98336560ad5e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-amdgcn-fdiv-fast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-amdgcn-fdiv-fast.mir @@ -13,9 +13,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1870659584 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 796917760 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x45F0000000000000 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY1]], [[SELECT]] @@ -41,9 +41,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1870659584 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 796917760 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x45F0000000000000 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(ogt), [[FABS]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[C1]], [[C2]] ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nsz G_FMUL [[COPY1]], [[SELECT]]