AMDGPU: Handle sitofp and uitofp exponents in fast pow expansion
https://reviews.llvm.org/D158996
This commit is contained in:
parent
699685b718
commit
dac8f974b5
@ -567,7 +567,8 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool isKnownIntegral(const Value *V) {
|
||||
static bool isKnownIntegral(const Value *V, const DataLayout &DL,
|
||||
FastMathFlags FMF) {
|
||||
if (isa<UndefValue>(V))
|
||||
return true;
|
||||
|
||||
@ -587,6 +588,24 @@ static bool isKnownIntegral(const Value *V) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const Instruction *I = dyn_cast<Instruction>(V);
|
||||
if (!I)
|
||||
return false;
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::SIToFP:
|
||||
case Instruction::UIToFP:
|
||||
// TODO: Could check nofpclass(inf) on incoming argument
|
||||
if (FMF.noInfs())
|
||||
return true;
|
||||
|
||||
// Need to check int size cannot produce infinity, which computeKnownFPClass
|
||||
// knows how to do already.
|
||||
return isKnownNeverInfinity(I, DL);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1013,7 +1032,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
|
||||
if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
|
||||
// We cannot handle corner cases for a general pow() function, give up
|
||||
// unless y is a constant integral value. Then proceed as if it were pown.
|
||||
if (!isKnownIntegral(opr1))
|
||||
if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags()))
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -57,12 +57,95 @@ define half @test_pow_fast_f16__integral_y(half %x, i32 %y.i) {
|
||||
; CHECK-LABEL: test_pow_fast_f16__integral_y:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z3powDhDh@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powDhDh@rel32@hi+12
|
||||
; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_mov_b32 s16, s33
|
||||
; CHECK-NEXT: s_mov_b32 s33, s32
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
|
||||
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s16, 14
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0x800
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
|
||||
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2Dh@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2Dh@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: v_mov_b32_e32 v42, v0
|
||||
; CHECK-NEXT: v_cvt_f32_i32_e32 v0, v1
|
||||
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
|
||||
; CHECK-NEXT: v_cvt_f16_f32_e32 v43, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v42
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
|
||||
; CHECK-NEXT: v_mov_b32_e32 v41, v31
|
||||
; CHECK-NEXT: s_mov_b32 s42, s15
|
||||
; CHECK-NEXT: s_mov_b32 s43, s14
|
||||
; CHECK-NEXT: s_mov_b32 s44, s13
|
||||
; CHECK-NEXT: s_mov_b32 s45, s12
|
||||
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
|
||||
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
|
||||
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2Dh@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2Dh@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
||||
; CHECK-NEXT: v_mul_f16_e32 v0, v0, v43
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s45
|
||||
; CHECK-NEXT: s_mov_b32 s13, s44
|
||||
; CHECK-NEXT: s_mov_b32 s14, s43
|
||||
; CHECK-NEXT: s_mov_b32 s15, s42
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v41
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_cvt_i16_f16_e32 v1, v43
|
||||
; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, v1, v42
|
||||
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
|
||||
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
|
||||
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
|
||||
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
|
||||
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
|
||||
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
|
||||
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
|
||||
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
|
||||
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
|
||||
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
|
||||
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
|
||||
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
|
||||
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0xf800
|
||||
; CHECK-NEXT: s_mov_b32 s33, s4
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%y = sitofp i32 %y.i to half
|
||||
%pow = tail call fast half @_Z3powDhDh(half %x, half %y)
|
||||
ret half %pow
|
||||
@ -72,11 +155,93 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
|
||||
; CHECK-LABEL: test_pow_fast_f32__integral_y:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z3powff@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powff@rel32@hi+12
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_mov_b32 s16, s33
|
||||
; CHECK-NEXT: s_mov_b32 s33, s32
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
|
||||
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s16, 14
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0x800
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
|
||||
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2f@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2f@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
|
||||
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
|
||||
; CHECK-NEXT: v_mov_b32_e32 v42, v0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffffff, v42
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
|
||||
; CHECK-NEXT: v_mov_b32_e32 v41, v31
|
||||
; CHECK-NEXT: s_mov_b32 s42, s15
|
||||
; CHECK-NEXT: s_mov_b32 s43, s14
|
||||
; CHECK-NEXT: s_mov_b32 s44, s13
|
||||
; CHECK-NEXT: s_mov_b32 s45, s12
|
||||
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
|
||||
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
|
||||
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
|
||||
; CHECK-NEXT: v_cvt_f32_i32_e32 v43, v1
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2f@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2f@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
||||
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v43
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s45
|
||||
; CHECK-NEXT: s_mov_b32 s13, s44
|
||||
; CHECK-NEXT: s_mov_b32 s14, s43
|
||||
; CHECK-NEXT: s_mov_b32 s15, s42
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v41
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v43
|
||||
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
|
||||
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
|
||||
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1
|
||||
; CHECK-NEXT: v_and_or_b32 v0, v1, v42, v0
|
||||
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
|
||||
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
|
||||
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
|
||||
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
|
||||
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
|
||||
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
|
||||
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
|
||||
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
|
||||
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
|
||||
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0xf800
|
||||
; CHECK-NEXT: s_mov_b32 s33, s4
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%y = sitofp i32 %y.i to float
|
||||
%pow = tail call fast float @_Z3powff(float %x, float %y)
|
||||
ret float %pow
|
||||
@ -86,11 +251,98 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
|
||||
; CHECK-LABEL: test_pow_fast_f64__integral_y:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v2
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z3powdd@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powdd@rel32@hi+12
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_mov_b32 s16, s33
|
||||
; CHECK-NEXT: s_mov_b32 s33, s32
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
|
||||
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s16, 14
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0x800
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
|
||||
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
|
||||
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
|
||||
; CHECK-NEXT: v_mov_b32_e32 v43, v1
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v42, v2
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v43
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
|
||||
; CHECK-NEXT: v_mov_b32_e32 v41, v31
|
||||
; CHECK-NEXT: s_mov_b32 s42, s15
|
||||
; CHECK-NEXT: s_mov_b32 s43, s14
|
||||
; CHECK-NEXT: s_mov_b32 s44, s13
|
||||
; CHECK-NEXT: s_mov_b32 s45, s12
|
||||
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
|
||||
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
|
||||
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
|
||||
; CHECK-NEXT: v_cvt_f64_i32_e32 v[44:45], v42
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[44:45]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s45
|
||||
; CHECK-NEXT: s_mov_b32 s13, s44
|
||||
; CHECK-NEXT: s_mov_b32 s14, s43
|
||||
; CHECK-NEXT: s_mov_b32 s15, s42
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v41
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 31, v42
|
||||
; CHECK-NEXT: v_and_b32_e32 v2, v2, v43
|
||||
; CHECK-NEXT: buffer_load_dword v45, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: v_or_b32_e32 v1, v2, v1
|
||||
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
|
||||
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
|
||||
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
|
||||
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
|
||||
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
|
||||
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
|
||||
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
|
||||
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
|
||||
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
|
||||
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
|
||||
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
|
||||
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
|
||||
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
|
||||
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0xf800
|
||||
; CHECK-NEXT: s_mov_b32 s33, s4
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%y = sitofp i32 %y.i to double
|
||||
%pow = tail call fast double @_Z3powdd(double %x, double %y)
|
||||
ret double %pow
|
||||
|
@ -2208,8 +2208,18 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
|
||||
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp
|
||||
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
|
||||
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
|
||||
; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn float @_Z3powff(float [[X]], float [[Y_CAST]])
|
||||
; CHECK-NEXT: ret float [[POW]]
|
||||
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
|
||||
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])
|
||||
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
|
||||
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])
|
||||
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
|
||||
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32
|
||||
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
|
||||
; CHECK-NEXT: ret float [[TMP4]]
|
||||
;
|
||||
%y.cast = sitofp i32 %y to float
|
||||
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
|
||||
@ -2280,8 +2290,18 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
|
||||
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp
|
||||
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
|
||||
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
|
||||
; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn float @_Z3powff(float [[X]], float [[Y_CAST]])
|
||||
; CHECK-NEXT: ret float [[POW]]
|
||||
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
|
||||
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])
|
||||
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
|
||||
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])
|
||||
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
|
||||
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32
|
||||
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
|
||||
; CHECK-NEXT: ret float [[TMP4]]
|
||||
;
|
||||
%y.cast = uitofp i32 %y to float
|
||||
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
|
||||
@ -2318,8 +2338,18 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
|
||||
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256
|
||||
; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
|
||||
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float
|
||||
; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn float @_Z3powff(float [[X]], float [[Y_CAST]])
|
||||
; CHECK-NEXT: ret float [[POW]]
|
||||
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
|
||||
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])
|
||||
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
|
||||
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])
|
||||
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
|
||||
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32
|
||||
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
|
||||
; CHECK-NEXT: ret float [[TMP4]]
|
||||
;
|
||||
%y.cast = uitofp i256 %y to float
|
||||
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
|
||||
@ -2330,8 +2360,18 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
|
||||
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256
|
||||
; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
|
||||
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float
|
||||
; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn float @_Z3powff(float [[X]], float [[Y_CAST]])
|
||||
; CHECK-NEXT: ret float [[POW]]
|
||||
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
|
||||
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])
|
||||
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
|
||||
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])
|
||||
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
|
||||
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32
|
||||
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
|
||||
; CHECK-NEXT: ret float [[TMP4]]
|
||||
;
|
||||
%y.cast = sitofp i256 %y to float
|
||||
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
|
||||
@ -2342,8 +2382,18 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
|
||||
; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp
|
||||
; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
|
||||
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
|
||||
; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y_CAST]])
|
||||
; CHECK-NEXT: ret <2 x float> [[POW]]
|
||||
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
|
||||
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[__FABS]])
|
||||
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
|
||||
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])
|
||||
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
|
||||
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
|
||||
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
|
||||
; CHECK-NEXT: ret <2 x float> [[TMP4]]
|
||||
;
|
||||
%y.cast = sitofp <2 x i32> %y to <2 x float>
|
||||
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
|
||||
@ -2378,8 +2428,18 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
|
||||
; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp
|
||||
; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
|
||||
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
|
||||
; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y_CAST]])
|
||||
; CHECK-NEXT: ret <2 x float> [[POW]]
|
||||
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
|
||||
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[__FABS]])
|
||||
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
|
||||
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])
|
||||
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
|
||||
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
|
||||
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
|
||||
; CHECK-NEXT: ret <2 x float> [[TMP4]]
|
||||
;
|
||||
%y.cast = uitofp <2 x i32> %y to <2 x float>
|
||||
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
|
||||
|
Loading…
x
Reference in New Issue
Block a user