[SDAG] Limit sincos/frexp stack slot folding to stores chained to entry (#115906)

When the chain is not the entry node there is a risk the stores are
within a (CALLSEQ_START, CALLSEQ_END), which when the node is expanded
will lead to nested call sequences.

It should be possible to check for this and allow more cases, but for
now, let's limit this to cases where it's definitely safe.

Fixes #115323
This commit is contained in:
Benjamin Maxwell 2024-11-12 20:48:41 +00:00 committed by GitHub
parent 5cd6e21bdd
commit 014455a587
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 208 additions and 107 deletions

View File

@ -2509,7 +2509,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
// Find users of the node that store the results (and share input chains). The
// destination pointers can be used instead of creating stack allocations.
SDValue StoresInChain{};
// FIXME: This should allow stores with the same chains (not just the entry
// chain), but there's a risk the store is within a (CALLSEQ_START,
// CALLSEQ_END) pair, which after this expansion will lead to nested call
// sequences.
SDValue InChain = getEntryNode();
SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
for (SDNode *User : Node->uses()) {
if (!ISD::isNormalStore(User))
@ -2522,11 +2526,9 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
ST->getAddressSpace() != 0 ||
ST->getAlign() <
getDataLayout().getABITypeAlign(StoreType->getScalarType()) ||
(StoresInChain && ST->getChain() != StoresInChain) ||
Node->isPredecessorOf(ST->getChain().getNode()))
ST->getChain() != InChain)
continue;
ResultStores[ResNo] = ST;
StoresInChain = ST->getChain();
}
TargetLowering::ArgListTy Args;
@ -2568,7 +2570,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
Type *RetType = CallRetResNo.has_value()
? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
: Type::getVoidTy(Ctx);
SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName,
TLI->getPointerTy(getDataLayout()));
TargetLowering::CallLoweringInfo CLI(*this);

View File

@ -1365,33 +1365,45 @@ define dso_local fp128 @qpFREXP(ptr %a, ptr %b) {
; CHECK-LABEL: qpFREXP:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: std r0, 48(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: std r0, 80(r1)
; CHECK-NEXT: addi r5, r1, 44
; CHECK-NEXT: mr r30, r4
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: mr r5, r4
; CHECK-NEXT: bl frexpf128
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: lwz r3, 44(r1)
; CHECK-NEXT: stw r3, 0(r30)
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFREXP:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: std r0, 48(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: std r0, 80(r1)
; CHECK-P8-NEXT: addi r5, r1, 44
; CHECK-P8-NEXT: mr r30, r4
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r5, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl frexpf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: lwz r3, 44(r1)
; CHECK-P8-NEXT: stw r3, 0(r30)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:

View File

@ -543,42 +543,50 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwind {
; RV32IFD-LABEL: test_frexp_v4f32_v4i32:
; RV32IFD: # %bb.0:
; RV32IFD-NEXT: addi sp, sp, -48
; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs1, 24(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs2, 16(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs3, 8(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: addi sp, sp, -64
; RV32IFD-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.s fs0, fa3
; RV32IFD-NEXT: fmv.s fs1, fa2
; RV32IFD-NEXT: fmv.s fs2, fa1
; RV32IFD-NEXT: mv s0, a0
; RV32IFD-NEXT: addi a0, a0, 16
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs3, fa0
; RV32IFD-NEXT: addi a0, s0, 20
; RV32IFD-NEXT: addi a0, sp, 12
; RV32IFD-NEXT: fmv.s fa0, fs2
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs2, fa0
; RV32IFD-NEXT: addi a0, s0, 24
; RV32IFD-NEXT: addi a0, sp, 16
; RV32IFD-NEXT: fmv.s fa0, fs1
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs1, fa0
; RV32IFD-NEXT: addi a0, s0, 28
; RV32IFD-NEXT: addi a0, sp, 20
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
; RV32IFD-NEXT: lw a2, 16(sp)
; RV32IFD-NEXT: lw a3, 20(sp)
; RV32IFD-NEXT: sw a0, 16(s0)
; RV32IFD-NEXT: sw a1, 20(s0)
; RV32IFD-NEXT: sw a2, 24(s0)
; RV32IFD-NEXT: sw a3, 28(s0)
; RV32IFD-NEXT: fsw fs3, 0(s0)
; RV32IFD-NEXT: fsw fs2, 4(s0)
; RV32IFD-NEXT: fsw fs1, 8(s0)
; RV32IFD-NEXT: fsw fa0, 12(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs1, 24(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs2, 16(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs3, 8(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 48
; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 64
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_frexp_v4f32_v4i32:
@ -631,44 +639,52 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
;
; RV32IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32:
; RV32IZFINXZDINX: # %bb.0:
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: addi sp, sp, -48
; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: mv s0, a4
; RV32IZFINXZDINX-NEXT: mv s1, a3
; RV32IZFINXZDINX-NEXT: mv s2, a2
; RV32IZFINXZDINX-NEXT: mv a2, a1
; RV32IZFINXZDINX-NEXT: mv s3, a0
; RV32IZFINXZDINX-NEXT: addi a1, a0, 16
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s4, a0
; RV32IZFINXZDINX-NEXT: addi a1, s3, 20
; RV32IZFINXZDINX-NEXT: addi a1, sp, 12
; RV32IZFINXZDINX-NEXT: mv a0, s2
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s2, a0
; RV32IZFINXZDINX-NEXT: addi a1, s3, 24
; RV32IZFINXZDINX-NEXT: addi a1, sp, 16
; RV32IZFINXZDINX-NEXT: mv a0, s1
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s1, a0
; RV32IZFINXZDINX-NEXT: addi a1, s3, 28
; RV32IZFINXZDINX-NEXT: addi a1, sp, 20
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: lw a1, 8(sp)
; RV32IZFINXZDINX-NEXT: lw a2, 12(sp)
; RV32IZFINXZDINX-NEXT: lw a3, 16(sp)
; RV32IZFINXZDINX-NEXT: lw a4, 20(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 16(s3)
; RV32IZFINXZDINX-NEXT: sw a2, 20(s3)
; RV32IZFINXZDINX-NEXT: sw a3, 24(s3)
; RV32IZFINXZDINX-NEXT: sw a4, 28(s3)
; RV32IZFINXZDINX-NEXT: sw s4, 0(s3)
; RV32IZFINXZDINX-NEXT: sw s2, 4(s3)
; RV32IZFINXZDINX-NEXT: sw s1, 8(s3)
; RV32IZFINXZDINX-NEXT: sw a0, 12(s3)
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32:
@ -1080,34 +1096,41 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IFD-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; RV32IFD: # %bb.0:
; RV32IFD-NEXT: addi sp, sp, -32
; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.s fs0, fa2
; RV32IFD-NEXT: fmv.s fs1, fa1
; RV32IFD-NEXT: fmv.s fs2, fa0
; RV32IFD-NEXT: addi sp, sp, -48
; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs1, 24(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs2, 16(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.s fs0, fa3
; RV32IFD-NEXT: fmv.s fs1, fa2
; RV32IFD-NEXT: fmv.s fs2, fa1
; RV32IFD-NEXT: mv s0, a0
; RV32IFD-NEXT: addi a0, a0, 12
; RV32IFD-NEXT: fmv.s fa0, fa3
; RV32IFD-NEXT: mv a0, sp
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: addi a0, s0, 8
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: addi a0, sp, 4
; RV32IFD-NEXT: fmv.s fa0, fs2
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: addi a0, s0, 4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: fmv.s fa0, fs1
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fa0, fs2
; RV32IFD-NEXT: mv a0, s0
; RV32IFD-NEXT: addi a0, sp, 12
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: lw a0, 0(sp)
; RV32IFD-NEXT: lw a1, 4(sp)
; RV32IFD-NEXT: lw a2, 8(sp)
; RV32IFD-NEXT: lw a3, 12(sp)
; RV32IFD-NEXT: sw a0, 0(s0)
; RV32IFD-NEXT: sw a1, 4(s0)
; RV32IFD-NEXT: sw a2, 8(s0)
; RV32IFD-NEXT: sw a3, 12(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs1, 24(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: fld fs2, 16(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 48
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
@ -1151,34 +1174,43 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
;
; RV32IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; RV32IZFINXZDINX: # %bb.0:
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: mv s0, a3
; RV32IZFINXZDINX-NEXT: mv s1, a2
; RV32IZFINXZDINX-NEXT: mv s2, a1
; RV32IZFINXZDINX-NEXT: addi sp, sp, -48
; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: mv s0, a4
; RV32IZFINXZDINX-NEXT: mv s1, a3
; RV32IZFINXZDINX-NEXT: mv s2, a2
; RV32IZFINXZDINX-NEXT: mv a2, a1
; RV32IZFINXZDINX-NEXT: mv s3, a0
; RV32IZFINXZDINX-NEXT: addi a1, a0, 12
; RV32IZFINXZDINX-NEXT: mv a0, a4
; RV32IZFINXZDINX-NEXT: addi a1, sp, 12
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: addi a1, s3, 8
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: addi a1, sp, 16
; RV32IZFINXZDINX-NEXT: mv a0, s2
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: addi a1, s3, 4
; RV32IZFINXZDINX-NEXT: addi a1, sp, 20
; RV32IZFINXZDINX-NEXT: mv a0, s1
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv a0, s2
; RV32IZFINXZDINX-NEXT: mv a1, s3
; RV32IZFINXZDINX-NEXT: addi a1, sp, 24
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: lw a0, 12(sp)
; RV32IZFINXZDINX-NEXT: lw a1, 16(sp)
; RV32IZFINXZDINX-NEXT: lw a2, 20(sp)
; RV32IZFINXZDINX-NEXT: lw a3, 24(sp)
; RV32IZFINXZDINX-NEXT: sw a0, 0(s3)
; RV32IZFINXZDINX-NEXT: sw a1, 4(s3)
; RV32IZFINXZDINX-NEXT: sw a2, 8(s3)
; RV32IZFINXZDINX-NEXT: sw a3, 12(s3)
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32_only_use_exp:

View File

@ -325,27 +325,28 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
;
; WIN32-LABEL: test_frexp_v4f32_v4i32:
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: subl $44, %esp
; WIN32-NEXT: subl $60, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: leal 24(%esi), %eax
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
; WIN32-NEXT: leal 20(%esi), %eax
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
; WIN32-NEXT: leal 16(%esi), %eax
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: leal 28(%esi), %eax
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
@ -360,13 +361,22 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: movl %edi, 28(%esi)
; WIN32-NEXT: movl %edx, 24(%esi)
; WIN32-NEXT: movl %ecx, 20(%esi)
; WIN32-NEXT: movl %eax, 16(%esi)
; WIN32-NEXT: fstps 12(%esi)
; WIN32-NEXT: fstps 8(%esi)
; WIN32-NEXT: fstps 4(%esi)
; WIN32-NEXT: fstps (%esi)
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: addl $44, %esp
; WIN32-NEXT: addl $60, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: retl
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
ret { <4 x float>, <4 x i32> } %result
@ -489,35 +499,46 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
;
; WIN32-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: subl $12, %esp
; WIN32-NEXT: subl $28, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: leal 8(%esi), %eax
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
; WIN32-NEXT: leal 4(%esi), %eax
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
; WIN32-NEXT: leal 12(%esi), %eax
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp)
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: movl %edi, 12(%esi)
; WIN32-NEXT: movl %edx, 8(%esi)
; WIN32-NEXT: movl %ecx, 4(%esi)
; WIN32-NEXT: movl %eax, (%esi)
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: addl $12, %esp
; WIN32-NEXT: addl $28, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: retl
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
%result.1 = extractvalue { <4 x float>, <4 x i32> } %result, 1

View File

@ -0,0 +1,35 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 5
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
; Test for issue https://github.com/llvm/llvm-project/issues/115323
declare double @g(double, double)
define double @f(double %a) {
; CHECK-LABEL: f:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subl $44, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: fldl 48(%esp)
; CHECK-NEXT: leal 24(%esp), %eax
; CHECK-NEXT: movl %eax, 12(%esp)
; CHECK-NEXT: leal 32(%esp), %eax
; CHECK-NEXT: movl %eax, 8(%esp)
; CHECK-NEXT: fstpl (%esp)
; CHECK-NEXT: calll sincos
; CHECK-NEXT: fldl 32(%esp)
; CHECK-NEXT: fldl 24(%esp)
; CHECK-NEXT: faddl {{\.?LCPI[0-9]+_[0-9]+}}
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstpl 8(%esp)
; CHECK-NEXT: fstpl (%esp)
; CHECK-NEXT: calll g@PLT
; CHECK-NEXT: addl $44, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
entry:
%0 = tail call double @llvm.sin.f64(double %a)
%1 = tail call double @llvm.cos.f64(double %a)
%add = fadd double %1, 3.140000e+00
%call = tail call double @g(double %add, double %0)
ret double %call
}