diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index e51d2c0d12c4..2cd1bf4a813c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -260,6 +260,7 @@ public: bool visitIntrinsicInst(IntrinsicInst &I); bool visitFMinLike(IntrinsicInst &I); bool visitSqrt(IntrinsicInst &I); + bool visitLog(FPMathOperator &Log, Intrinsic::ID IID); bool visitMbcntLo(IntrinsicInst &I) const; bool visitMbcntHi(IntrinsicInst &I) const; bool run(); @@ -1998,13 +1999,20 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { } bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) { - switch (I.getIntrinsicID()) { + Intrinsic::ID IID = I.getIntrinsicID(); + switch (IID) { case Intrinsic::minnum: case Intrinsic::minimumnum: case Intrinsic::minimum: return visitFMinLike(I); case Intrinsic::sqrt: return visitSqrt(I); + case Intrinsic::log: + case Intrinsic::log10: + return visitLog(cast(I), IID); + case Intrinsic::log2: + // No reason to handle log2. + return false; case Intrinsic::amdgcn_mbcnt_lo: return visitMbcntLo(I); case Intrinsic::amdgcn_mbcnt_hi: @@ -2145,6 +2153,43 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) { return true; } +/// Replace log and log10 intrinsic calls based on fpmath metadata. +bool AMDGPUCodeGenPrepareImpl::visitLog(FPMathOperator &Log, + Intrinsic::ID IID) { + Type *Ty = Log.getType(); + if (!Ty->getScalarType()->isHalfTy() || !ST.has16BitInsts()) + return false; + + FastMathFlags FMF = Log.getFastMathFlags(); + + // Defer fast math cases to codegen. + if (FMF.approxFunc()) + return false; + + // Limit experimentally determined from OpenCL conformance test (1.79) + if (Log.getFPAccuracy() < 1.80f) + return false; + + IRBuilder<> Builder(&cast(Log)); + + // Use the generic intrinsic for convenience in the vector case. Codegen will + // recognize the denormal handling is not necessary from the fpext. + // TODO: Move to generic code + Value *Log2 = + Builder.CreateUnaryIntrinsic(Intrinsic::log2, Log.getOperand(0), FMF); + + double Log2BaseInverted = + IID == Intrinsic::log10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; + Value *Mul = + Builder.CreateFMulFMF(Log2, ConstantFP::get(Ty, Log2BaseInverted), FMF); + + Mul->takeName(&Log); + + Log.replaceAllUsesWith(Mul); + DeadVals.push_back(&Log); + return true; +} + bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { if (skipFunction(F)) return false; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-log.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-log.ll new file mode 100644 index 000000000000..e50f2b63646a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-log.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx700 -passes=amdgpu-codegenprepare %s | FileCheck -check-prefixes=CHECK,GFX7 %s +; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx803 -passes=amdgpu-codegenprepare %s | FileCheck -check-prefixes=CHECK,GFX8 %s + +; Ignore correct case +define half @log_f16(half %x) { +; CHECK-LABEL: define half @log_f16( +; CHECK-SAME: half [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[RESULT:%.*]] = call half @llvm.log.f16(half [[X]]) +; CHECK-NEXT: ret half [[RESULT]] +; + %result = call half @llvm.log.f16(half %x) + ret half %result +} + +; afn case should be handled by codegen +define half @log_afn_f16(half %x) { +; CHECK-LABEL: define half @log_afn_f16( +; CHECK-SAME: half [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RESULT:%.*]] = call afn half @llvm.log.f16(half [[X]]) +; CHECK-NEXT: ret half [[RESULT]] +; + %result = call afn half @llvm.log.f16(half %x) + ret half %result +} + +; exact ulp threshold +define half @log_f16_ulp180(half %x) { +; GFX7-LABEL: define half @log_f16_ulp180( +; GFX7-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call half @llvm.log.f16(half [[X]]), !fpmath [[META0:![0-9]+]] +; GFX7-NEXT: ret half [[RESULT]] +; +; GFX8-LABEL: define half @log_f16_ulp180( +; GFX8-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[TMP1:%.*]] = call half @llvm.log2.f16(half [[X]]) +; GFX8-NEXT: [[RESULT:%.*]] = fmul half [[TMP1]], 0xH398C +; GFX8-NEXT: ret half [[RESULT]] +; + %result = call half @llvm.log.f16(half %x), !fpmath !{float 0x3FFCCCCCC0000000} + ret half %result +} + +; off by one, ignore +define half @log_f16_ulp180_nextdown(half %x) { +; GFX7-LABEL: define half @log_f16_ulp180_nextdown( +; GFX7-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call half @llvm.log.f16(half [[X]]), !fpmath [[META1:![0-9]+]] +; GFX7-NEXT: ret half [[RESULT]] +; +; GFX8-LABEL: define half @log_f16_ulp180_nextdown( +; GFX8-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[RESULT:%.*]] = call half @llvm.log.f16(half [[X]]), !fpmath [[META0:![0-9]+]] +; GFX8-NEXT: ret half [[RESULT]] +; + %result = call half @llvm.log.f16(half %x), !fpmath !{float 0x3FFCCCCCA0000000} + ret half %result +} + +; OpenCL limit +define half @log_f16_ulp2(half %x) { +; GFX7-LABEL: define half @log_f16_ulp2( +; GFX7-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call half @llvm.log.f16(half [[X]]), !fpmath [[META2:![0-9]+]] +; GFX7-NEXT: ret half [[RESULT]] +; +; GFX8-LABEL: define half @log_f16_ulp2( +; GFX8-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[TMP1:%.*]] = call half @llvm.log2.f16(half [[X]]) +; GFX8-NEXT: [[RESULT:%.*]] = fmul half [[TMP1]], 0xH398C +; GFX8-NEXT: ret half [[RESULT]] +; + %result = call half @llvm.log.f16(half %x), !fpmath !0 + ret half %result +} + +; OpenCL limit +define half @log10_f16_ulp2(half %x) { +; GFX7-LABEL: define half @log10_f16_ulp2( +; GFX7-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call half @llvm.log10.f16(half [[X]]), !fpmath [[META2]] +; GFX7-NEXT: ret half [[RESULT]] +; +; GFX8-LABEL: define half @log10_f16_ulp2( +; GFX8-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[TMP1:%.*]] = call half @llvm.log2.f16(half [[X]]) +; GFX8-NEXT: [[RESULT:%.*]] = fmul half [[TMP1]], 0xH34D1 +; GFX8-NEXT: ret half [[RESULT]] +; + %result = call half @llvm.log10.f16(half %x), !fpmath !0 + ret half %result +} + +; Ignore log2 +define half @log2_f16_ulp2(half %x) { +; GFX7-LABEL: define half @log2_f16_ulp2( +; GFX7-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call half @llvm.log2.f16(half [[X]]), !fpmath [[META2]] +; GFX7-NEXT: ret half [[RESULT]] +; +; GFX8-LABEL: define half @log2_f16_ulp2( +; GFX8-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[RESULT:%.*]] = call half @llvm.log2.f16(half [[X]]), !fpmath [[META1:![0-9]+]] +; GFX8-NEXT: ret half [[RESULT]] +; + %result = call half @llvm.log2.f16(half %x), !fpmath !0 + ret half %result +} + +; afn case should be handled by codegen +define half @log_afn_f16_ulp2(half %x) { +; GFX7-LABEL: define half @log_afn_f16_ulp2( +; GFX7-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call afn half @llvm.log.f16(half [[X]]), !fpmath [[META2]] +; GFX7-NEXT: ret half [[RESULT]] +; +; GFX8-LABEL: define half @log_afn_f16_ulp2( +; GFX8-SAME: half [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[RESULT:%.*]] = call afn half @llvm.log.f16(half [[X]]), !fpmath [[META1]] +; GFX8-NEXT: ret half [[RESULT]] +; + %result = call afn half @llvm.log.f16(half %x), !fpmath !0 + ret half %result +} + +; Handle vector with scalarization +define <2 x half> @log_v2f16_ulp2(<2 x half> %x) { +; GFX7-LABEL: define <2 x half> @log_v2f16_ulp2( +; GFX7-SAME: <2 x half> [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call <2 x half> @llvm.log.v2f16(<2 x half> [[X]]), !fpmath [[META2]] +; GFX7-NEXT: ret <2 x half> [[RESULT]] +; +; GFX8-LABEL: define <2 x half> @log_v2f16_ulp2( +; GFX8-SAME: <2 x half> [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.log2.v2f16(<2 x half> [[X]]) +; GFX8-NEXT: [[RESULT:%.*]] = fmul <2 x half> [[TMP1]], splat (half 0xH398C) +; GFX8-NEXT: ret <2 x half> [[RESULT]] +; + %result = call <2 x half> @llvm.log.v2f16(<2 x half> %x), !fpmath !0 + ret <2 x half> %result +} + +; Handle vector with scalarization +define <3 x half> @log_v3f16_ulp2(<3 x half> %x) { +; GFX7-LABEL: define <3 x half> @log_v3f16_ulp2( +; GFX7-SAME: <3 x half> [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call <3 x half> @llvm.log.v3f16(<3 x half> [[X]]), !fpmath [[META2]] +; GFX7-NEXT: ret <3 x half> [[RESULT]] +; +; GFX8-LABEL: define <3 x half> @log_v3f16_ulp2( +; GFX8-SAME: <3 x half> [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[TMP1:%.*]] = call <3 x half> @llvm.log2.v3f16(<3 x half> [[X]]) +; GFX8-NEXT: [[RESULT:%.*]] = fmul <3 x half> [[TMP1]], splat (half 0xH398C) +; GFX8-NEXT: ret <3 x half> [[RESULT]] +; + %result = call <3 x half> @llvm.log.v3f16(<3 x half> %x), !fpmath !0 + ret <3 x half> %result +} + +; bfloat not handled +define bfloat @log_bf16_ulp2(bfloat %x) { +; GFX7-LABEL: define bfloat @log_bf16_ulp2( +; GFX7-SAME: bfloat [[X:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[RESULT:%.*]] = call bfloat @llvm.log.bf16(bfloat [[X]]), !fpmath [[META2]] +; GFX7-NEXT: ret bfloat [[RESULT]] +; +; GFX8-LABEL: define bfloat @log_bf16_ulp2( +; GFX8-SAME: bfloat [[X:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[RESULT:%.*]] = call bfloat @llvm.log.bf16(bfloat [[X]]), !fpmath [[META1]] +; GFX8-NEXT: ret bfloat [[RESULT]] +; + %result = call bfloat @llvm.log.bf16(bfloat %x), !fpmath !0 + ret bfloat %result +} + +!0 = !{float 2.0} + + +;. +; GFX7: [[META0]] = !{float 0x3FFCCCCCC0000000} +; GFX7: [[META1]] = !{float 0x3FFCCCCCA0000000} +; GFX7: [[META2]] = !{float 2.000000e+00} +;. +; GFX8: [[META0]] = !{float 0x3FFCCCCCA0000000} +; GFX8: [[META1]] = !{float 2.000000e+00} +;. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll index b8b3928394d5..f71506464f6c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -8196,6 +8196,517 @@ define <4 x half> @v_log_v4f16_fast(<4 x half> %in) { ret <4 x half> %result } +define half @v_log_f16_fpmath(half %x) { +; SI-LABEL: v_log_f16_fpmath: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_log_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_log_f16_fpmath: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_log_f16_e32 v0, v0 +; VI-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: v_log_f16_fpmath: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_log_f16_e32 v0, v0 +; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-TRUE16-LABEL: v_log_f16_fpmath: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x398c, v0.l +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log_f16_fpmath: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log_f16_fpmath: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x398c, v0.l +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log_f16_fpmath: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log_f16_fpmath: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log_f16_fpmath: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call half @llvm.log.f16(half %x), !fpmath !{float 2.0} + ret half %result +} + +define <2 x half> @v_log_v2f16_fpmath(<2 x half> %x) { +; SI-SDAG-LABEL: v_log_v2f16_fpmath: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_v2f16_fpmath: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_v2f16_fpmath: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x398c +; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_v2f16_fpmath: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c +; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_v2f16_fpmath: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x398c +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_v2f16_fpmath: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x398c398c +; GFX900-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-TRUE16-LABEL: v_log_v2f16_fpmath: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log_v2f16_fpmath: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log_v2f16_fpmath: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v0, 0x398c398c, v0 +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log_v2f16_fpmath: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v0, 0x398c398c, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log_v2f16_fpmath: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log_v2f16_fpmath: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call <2 x half> @llvm.log.v2f16(<2 x half> %x), !fpmath !{float 2.0} + ret <2 x half> %result +} + +define <3 x half> @v_log_v3f16_fpmath(<3 x half> %x) { +; SI-LABEL: v_log_v3f16_fpmath: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_log_f32_e32 v2, v2 +; SI-NEXT: v_log_f32_e32 v0, v0 +; SI-NEXT: v_log_f32_e32 v1, v1 +; SI-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; SI-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; SI-NEXT: v_or_b32_e32 v0, v0, v2 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_log_v3f16_fpmath: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_log_f16_e32 v2, v0 +; VI-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-NEXT: v_log_f16_e32 v1, v1 +; VI-NEXT: v_mov_b32_e32 v3, 0x398c +; VI-NEXT: v_mul_f16_e32 v2, 0x398c, v2 +; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; VI-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_v3f16_fpmath: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 +; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x398c +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v1, v1, s4 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_v3f16_fpmath: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 +; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c398c +; GFX900-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c +; GFX900-GISEL-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-TRUE16-LABEL: v_log_v3f16_fpmath: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l +; GFX1100-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x398c +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v2 op_sel_hi:[1,0] +; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x398c, v1.l +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log_v3f16_fpmath: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v2, v2 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v1, 0x398c, v1 +; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log_v3f16_fpmath: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v0, 0x398c398c, v0 +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v1, 0x398c, v1 +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log_v3f16_fpmath: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v2, v2 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v1, 0x398c, v1 +; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v0, 0x398c398c, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log_v3f16_fpmath: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log_v3f16_fpmath: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call <3 x half> @llvm.log.v3f16(<3 x half> %x), !fpmath !{float 2.0} + ret <3 x half> %result +} + +define <4 x half> @v_log_v4f16_fpmath(<4 x half> %x) { +; SI-SDAG-LABEL: v_log_v4f16_fpmath: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 +; SI-SDAG-NEXT: v_or_b32_e32 v1, v1, v2 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log_v4f16_fpmath: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v0 +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; SI-GISEL-NEXT: v_or_b32_e32 v1, v1, v2 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log_v4f16_fpmath: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f16_e32 v2, v1 +; VI-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x398c +; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x398c, v2 +; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; VI-SDAG-NEXT: v_mul_f16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 +; VI-SDAG-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log_v4f16_fpmath: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 +; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_log_f16_e32 v3, v1 +; VI-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x398c +; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x398c, v2 +; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x398c, v3 +; VI-GISEL-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log_v4f16_fpmath: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: v_log_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 +; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x398c +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] +; GFX900-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v1, v1, s4 op_sel_hi:[1,0] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log_v4f16_fpmath: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 +; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_log_f16_e32 v3, v1 +; GFX900-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c398c +; GFX900-GISEL-NEXT: v_pack_b32_f16 v1, v3, v1 +; GFX900-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX900-GISEL-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-TRUE16-LABEL: v_log_v4f16_fpmath: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v1.h, v1.h +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-TRUE16-NEXT: v_pk_mul_f16 v1, 0x398c, v1 op_sel_hi:[0,1] +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log_v4f16_fpmath: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v2, v2 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v3, v3 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v3 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v1, 0x398c, v1 op_sel_hi:[0,1] +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log_v4f16_fpmath: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v1.h, v1.h +; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v0, 0x398c398c, v0 +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v1, 0x398c398c, v1 +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log_v4f16_fpmath: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v2, v2 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v3, v3 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v3 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v0, 0x398c398c, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v1, 0x398c398c, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log_v4f16_fpmath: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log_v4f16_fpmath: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call <4 x half> @llvm.log.v4f16(<4 x half> %x), !fpmath !{float 2.0} + ret <4 x half> %result +} + declare float @llvm.fabs.f32(float) #2 declare float @llvm.log.f32(float) #2 declare <2 x float> @llvm.log.v2f32(<2 x float>) #2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index 18587466b70d..7c7036924a17 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -8196,6 +8196,517 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) { ret <4 x half> %result } +define half @v_log10_f16_fpmath(half %x) { +; SI-LABEL: v_log10_f16_fpmath: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_log_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_log10_f16_fpmath: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_log_f16_e32 v0, v0 +; VI-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: v_log10_f16_fpmath: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_log_f16_e32 v0, v0 +; GFX900-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-TRUE16-LABEL: v_log10_f16_fpmath: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log10_f16_fpmath: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log10_f16_fpmath: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log10_f16_fpmath: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log10_f16_fpmath: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log10_f16_fpmath: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call half @llvm.log10.f16(half %x), !fpmath !{float 2.0} + ret half %result +} + +define <2 x half> @v_log10_v2f16_fpmath(<2 x half> %x) { +; SI-SDAG-LABEL: v_log10_v2f16_fpmath: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_v2f16_fpmath: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_v2f16_fpmath: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x34d1 +; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_v2f16_fpmath: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x34d1 +; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_v2f16_fpmath: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x34d1 +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_v2f16_fpmath: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x34d134d1 +; GFX900-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-TRUE16-LABEL: v_log10_v2f16_fpmath: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log10_v2f16_fpmath: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log10_v2f16_fpmath: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v0, 0x34d134d1, v0 +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log10_v2f16_fpmath: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v0, 0x34d134d1, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log10_v2f16_fpmath: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log10_v2f16_fpmath: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call <2 x half> @llvm.log10.v2f16(<2 x half> %x), !fpmath !{float 2.0} + ret <2 x half> %result +} + +define <3 x half> @v_log10_v3f16_fpmath(<3 x half> %x) { +; SI-LABEL: v_log10_v3f16_fpmath: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_log_f32_e32 v2, v2 +; SI-NEXT: v_log_f32_e32 v0, v0 +; SI-NEXT: v_log_f32_e32 v1, v1 +; SI-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; SI-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; SI-NEXT: v_or_b32_e32 v0, v0, v2 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_log10_v3f16_fpmath: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_log_f16_e32 v2, v0 +; VI-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-NEXT: v_log_f16_e32 v1, v1 +; VI-NEXT: v_mov_b32_e32 v3, 0x34d1 +; VI-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 +; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; VI-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_v3f16_fpmath: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 +; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x34d1 +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v1, v1, s4 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_v3f16_fpmath: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 +; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x34d134d1 +; GFX900-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x34d1 +; GFX900-GISEL-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-TRUE16-LABEL: v_log10_v3f16_fpmath: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l +; GFX1100-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x34d1 +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v2 op_sel_hi:[1,0] +; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log10_v3f16_fpmath: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v2, v2 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v1, 0x34d1, v1 +; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log10_v3f16_fpmath: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v0, 0x34d134d1, v0 +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v1, 0x34d1, v1 +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log10_v3f16_fpmath: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v2, v2 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v1, 0x34d1, v1 +; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v0, 0x34d134d1, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log10_v3f16_fpmath: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log10_v3f16_fpmath: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call <3 x half> @llvm.log10.v3f16(<3 x half> %x), !fpmath !{float 2.0} + ret <3 x half> %result +} + +define <4 x half> @v_log10_v4f16_fpmath(<4 x half> %x) { +; SI-SDAG-LABEL: v_log10_v4f16_fpmath: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 +; SI-SDAG-NEXT: v_or_b32_e32 v1, v1, v2 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_log10_v4f16_fpmath: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v0 +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; SI-GISEL-NEXT: v_or_b32_e32 v1, v1, v2 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_log10_v4f16_fpmath: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_log_f16_e32 v2, v1 +; VI-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x34d1 +; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 +; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; VI-SDAG-NEXT: v_mul_f16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 +; VI-SDAG-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_log10_v4f16_fpmath: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 +; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_log_f16_e32 v3, v1 +; VI-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x34d1 +; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 +; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x34d1, v3 +; VI-GISEL-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log10_v4f16_fpmath: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: v_log_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 +; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x34d1 +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] +; GFX900-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_mul_f16 v1, v1, s4 op_sel_hi:[1,0] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_log10_v4f16_fpmath: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 +; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_log_f16_e32 v3, v1 +; GFX900-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x34d134d1 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v1, v3, v1 +; GFX900-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX900-GISEL-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-TRUE16-LABEL: v_log10_v4f16_fpmath: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v1.h, v1.h +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-TRUE16-NEXT: v_pk_mul_f16 v1, 0x34d1, v1 op_sel_hi:[0,1] +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log10_v4f16_fpmath: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v2, v2 +; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v3, v3 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v3 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1] +; GFX1100-SDAG-FAKE16-NEXT: v_pk_mul_f16 v1, 0x34d1, v1 op_sel_hi:[0,1] +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log10_v4f16_fpmath: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l +; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v1.h, v1.h +; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v0, 0x34d134d1, v0 +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-TRUE16-NEXT: v_pk_mul_f16 v1, 0x34d134d1, v1 +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log10_v4f16_fpmath: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v2, v2 +; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v3, v3 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 +; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v3 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v0, 0x34d134d1, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_pk_mul_f16 v1, 0x34d134d1, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_log10_v4f16_fpmath: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_log10_v4f16_fpmath: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call <4 x half> @llvm.log10.v4f16(<4 x half> %x), !fpmath !{float 2.0} + ret <4 x half> %result +} + declare float @llvm.fabs.f32(float) #2 declare float @llvm.log10.f32(float) #2 declare <2 x float> @llvm.log10.v2f32(<2 x float>) #2