[AMDGPU] Remove NoSignedZerosFPMath uses (#178343)
One of global flags in `resetTargetOptions`, users should use `nsz` instead. `fneg_fadd_0_f64` from `AMDGPU/fneg-combines.new.ll` will have regression when `fadd` is annotated with `nsz`.
This commit is contained in:
parent
6912b91891
commit
62aa40a4dd
@ -143,8 +143,7 @@ static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
}
|
||||
|
||||
static bool mayIgnoreSignedZero(MachineInstr &MI) {
|
||||
const TargetOptions &Options = MI.getMF()->getTarget().Options;
|
||||
return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
|
||||
return MI.getFlag(MachineInstr::MIFlag::FmNsz);
|
||||
}
|
||||
|
||||
static bool isInv2Pi(const APFloat &APF) {
|
||||
|
||||
@ -631,9 +631,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
||||
}
|
||||
|
||||
bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
|
||||
if (getTargetMachine().Options.NoSignedZerosFPMath)
|
||||
return true;
|
||||
|
||||
const auto Flags = Op.getNode()->getFlags();
|
||||
if (Flags.hasNoSignedZeros())
|
||||
return true;
|
||||
@ -5007,7 +5004,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
|
||||
SDLoc SL(N);
|
||||
switch (Opc) {
|
||||
case ISD::FADD: {
|
||||
if (!mayIgnoreSignedZero(N0))
|
||||
if (!mayIgnoreSignedZero(N0) && !N->getFlags().hasNoSignedZeros())
|
||||
return SDValue();
|
||||
|
||||
// (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
|
||||
@ -5055,7 +5052,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
|
||||
case ISD::FMA:
|
||||
case ISD::FMAD: {
|
||||
// TODO: handle llvm.amdgcn.fma.legacy
|
||||
if (!mayIgnoreSignedZero(N0))
|
||||
if (!mayIgnoreSignedZero(N0) && !N->getFlags().hasNoSignedZeros())
|
||||
return SDValue();
|
||||
|
||||
// (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
|
||||
|
||||
@ -80,11 +80,6 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
|
||||
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
|
||||
ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
|
||||
|
||||
// FIXME: Shouldn't be target specific
|
||||
Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
|
||||
NoSignedZerosFPMath =
|
||||
NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
|
||||
|
||||
const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
|
||||
if (DynLdsGlobal || hasLDSKernelArgument(F))
|
||||
UsesDynamicLDS = true;
|
||||
|
||||
@ -61,8 +61,6 @@ protected:
|
||||
// Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC.
|
||||
bool IsChainFunction = false;
|
||||
|
||||
bool NoSignedZerosFPMath = false;
|
||||
|
||||
// Function may be memory bound.
|
||||
bool MemoryBound = false;
|
||||
|
||||
@ -107,10 +105,6 @@ public:
|
||||
return isEntryFunction() || isChainFunction();
|
||||
}
|
||||
|
||||
bool hasNoSignedZerosFPMath() const {
|
||||
return NoSignedZerosFPMath;
|
||||
}
|
||||
|
||||
bool isMemoryBound() const {
|
||||
return MemoryBound;
|
||||
}
|
||||
|
||||
@ -2773,7 +2773,6 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) {
|
||||
//
|
||||
// FIXME: Also need to check strictfp
|
||||
bool IsIEEEMode = MFI->getMode().IEEE;
|
||||
bool HasNSZ = MFI->hasNoSignedZerosFPMath();
|
||||
|
||||
bool Changed = false;
|
||||
for (MachineBasicBlock *MBB : depth_first(&MF)) {
|
||||
@ -2812,8 +2811,7 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) {
|
||||
|
||||
// TODO: Omod might be OK if there is NSZ only on the source
|
||||
// instruction, and not the omod multiply.
|
||||
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
|
||||
!tryFoldOMod(MI))
|
||||
if (IsIEEEMode || !MI.getFlag(MachineInstr::FmNsz) || !tryFoldOMod(MI))
|
||||
Changed |= tryFoldClamp(MI);
|
||||
}
|
||||
|
||||
|
||||
@ -744,9 +744,8 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
|
||||
: ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
|
||||
MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
|
||||
GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()),
|
||||
IsEntryFunction(MFI.isEntryFunction()),
|
||||
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
|
||||
MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
|
||||
IsEntryFunction(MFI.isEntryFunction()), MemoryBound(MFI.isMemoryBound()),
|
||||
WaveLimiter(MFI.needsWaveLimiter()),
|
||||
HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
|
||||
HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
|
||||
NumWaveDispatchSGPRs(MFI.getNumWaveDispatchSGPRs()),
|
||||
@ -803,7 +802,6 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
|
||||
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
|
||||
Occupancy = YamlMFI.Occupancy;
|
||||
IsEntryFunction = YamlMFI.IsEntryFunction;
|
||||
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
|
||||
MemoryBound = YamlMFI.MemoryBound;
|
||||
WaveLimiter = YamlMFI.WaveLimiter;
|
||||
HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
|
||||
|
||||
@ -267,7 +267,6 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
|
||||
Align DynLDSAlign;
|
||||
bool IsEntryFunction = false;
|
||||
bool IsChainFunction = false;
|
||||
bool NoSignedZerosFPMath = false;
|
||||
bool MemoryBound = false;
|
||||
bool WaveLimiter = false;
|
||||
bool HasSpilledSGPRs = false;
|
||||
@ -328,7 +327,6 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
|
||||
YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
|
||||
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
|
||||
YamlIO.mapOptional("isChainFunction", MFI.IsChainFunction, false);
|
||||
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
|
||||
YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
|
||||
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
|
||||
YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -393,7 +393,7 @@ define amdgpu_ps void @v_omod_div2_f32(float %a) #0 {
|
||||
; GFX11PLUS-NEXT: global_store_b32 v[0:1], v0, off
|
||||
; GFX11PLUS-NEXT: s_endpgm
|
||||
%add = fadd float %a, 1.0
|
||||
%div2 = fmul float %add, 0.5
|
||||
%div2 = fmul nsz float %add, 0.5
|
||||
store float %div2, ptr addrspace(1) poison
|
||||
ret void
|
||||
}
|
||||
@ -451,7 +451,7 @@ define amdgpu_ps void @v_omod_mul2_f32(float %a) #0 {
|
||||
; GFX11PLUS-NEXT: global_store_b32 v[0:1], v0, off
|
||||
; GFX11PLUS-NEXT: s_endpgm
|
||||
%add = fadd float %a, 1.0
|
||||
%div2 = fmul float %add, 2.0
|
||||
%div2 = fmul nsz float %add, 2.0
|
||||
store float %div2, ptr addrspace(1) poison
|
||||
ret void
|
||||
}
|
||||
@ -483,7 +483,7 @@ define amdgpu_ps void @v_omod_mul2_med3(float %x, float %y, float %z) #0 {
|
||||
; GFX12-NEXT: global_store_b32 v[0:1], v0, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
%fmed3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
|
||||
%div2 = fmul float %fmed3, 2.0
|
||||
%div2 = fmul nsz float %fmed3, 2.0
|
||||
store float %div2, ptr addrspace(1) poison
|
||||
ret void
|
||||
}
|
||||
@ -541,7 +541,7 @@ define amdgpu_ps void @v_omod_mul4_f32(float %a) #0 {
|
||||
; GFX11PLUS-NEXT: global_store_b32 v[0:1], v0, off
|
||||
; GFX11PLUS-NEXT: s_endpgm
|
||||
%add = fadd float %a, 1.0
|
||||
%div2 = fmul float %add, 4.0
|
||||
%div2 = fmul nsz float %add, 4.0
|
||||
store float %div2, ptr addrspace(1) poison
|
||||
ret void
|
||||
}
|
||||
@ -649,7 +649,7 @@ define amdgpu_ps void @v_omod_mul4_dbg_use_f32(float %a) #0 {
|
||||
; GFX11PLUS-NEXT: s_endpgm
|
||||
%add = fadd float %a, 1.0
|
||||
call void @llvm.dbg.value(metadata float %add, i64 0, metadata !4, metadata !9), !dbg !10
|
||||
%div2 = fmul float %add, 4.0
|
||||
%div2 = fmul nsz float %add, 4.0
|
||||
store float %div2, ptr addrspace(1) poison
|
||||
ret void
|
||||
}
|
||||
@ -676,7 +676,7 @@ define amdgpu_ps void @v_clamp_omod_div2_f32(float %a) #0 {
|
||||
; GFX11PLUS-NEXT: global_store_b32 v[0:1], v0, off
|
||||
; GFX11PLUS-NEXT: s_endpgm
|
||||
%add = fadd float %a, 1.0
|
||||
%div2 = fmul float %add, 0.5
|
||||
%div2 = fmul nsz float %add, 0.5
|
||||
|
||||
%max = call float @llvm.maxnum.f32(float %div2, float 0.0)
|
||||
%clamp = call float @llvm.minnum.f32(float %max, float 1.0)
|
||||
@ -933,7 +933,7 @@ define amdgpu_ps void @v_omod_div2_omod_div2_f32(float %a) #0 {
|
||||
; GFX11PLUS-NEXT: global_store_b32 v[0:1], v0, off
|
||||
; GFX11PLUS-NEXT: s_endpgm
|
||||
%add = fadd float %a, 1.0
|
||||
%div2.0 = fmul float %add, 0.5
|
||||
%div2.0 = fmul nsz float %add, 0.5
|
||||
%div2.1 = fmul float %div2.0, 0.5
|
||||
store float %div2.1, ptr addrspace(1) poison
|
||||
ret void
|
||||
@ -1132,7 +1132,7 @@ define amdgpu_ps void @v_omod_div2_f16_denormals(half %a) #0 {
|
||||
; GFX12-FAKE16-NEXT: global_store_b16 v[0:1], v0, off
|
||||
; GFX12-FAKE16-NEXT: s_endpgm
|
||||
%add = fadd half %a, 1.0
|
||||
%div2 = fmul half %add, 0.5
|
||||
%div2 = fmul nsz half %add, 0.5
|
||||
store half %div2, ptr addrspace(1) poison
|
||||
ret void
|
||||
}
|
||||
@ -1190,7 +1190,7 @@ define amdgpu_ps void @v_omod_mul2_f16_denormals(half %a) #0 {
|
||||
; GFX12-FAKE16-NEXT: global_store_b16 v[0:1], v0, off
|
||||
; GFX12-FAKE16-NEXT: s_endpgm
|
||||
%add = fadd half %a, 1.0
|
||||
%mul2 = fadd half %add, %add
|
||||
%mul2 = fadd nsz half %add, %add
|
||||
store half %mul2, ptr addrspace(1) poison
|
||||
ret void
|
||||
}
|
||||
@ -1238,7 +1238,7 @@ define amdgpu_ps void @v_omod_div2_f16_no_denormals(half %a) #3 {
|
||||
; GFX12-FAKE16-NEXT: global_store_b16 v[0:1], v0, off
|
||||
; GFX12-FAKE16-NEXT: s_endpgm
|
||||
%add = fadd half %a, 1.0
|
||||
%div2 = fmul half %add, 0.5
|
||||
%div2 = fmul nsz half %add, 0.5
|
||||
store half %div2, ptr addrspace(1) poison
|
||||
ret void
|
||||
}
|
||||
@ -1270,7 +1270,7 @@ define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
|
||||
; GFX11PLUS-NEXT: s_endpgm
|
||||
%mul = fmul float %a, %a
|
||||
%add = fadd float %mul, %b
|
||||
%mad = fmul float %add, 2.0
|
||||
%mad = fmul nsz float %add, 2.0
|
||||
%res = fmul float %mad, %b
|
||||
store float %res, ptr addrspace(1) poison
|
||||
ret void
|
||||
@ -1297,7 +1297,7 @@ define amdgpu_ps void @v_clamp_omod_div2_f32_minimumnum_maximumnum(float %a) #0
|
||||
; GFX11PLUS-NEXT: global_store_b32 v[0:1], v0, off
|
||||
; GFX11PLUS-NEXT: s_endpgm
|
||||
%add = fadd float %a, 1.0
|
||||
%div2 = fmul float %add, 0.5
|
||||
%div2 = fmul nsz float %add, 0.5
|
||||
|
||||
%max = call float @llvm.maximumnum.f32(float %div2, float 0.0)
|
||||
%clamp = call float @llvm.minimumnum.f32(float %max, float 1.0)
|
||||
@ -1319,13 +1319,13 @@ declare half @llvm.minnum.f16(half, half) #1
|
||||
declare half @llvm.maxnum.f16(half, half) #1
|
||||
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
|
||||
|
||||
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
|
||||
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" }
|
||||
attributes #3 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
|
||||
attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
|
||||
attributes #3 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
|
||||
attributes #4 = { nounwind "no-signed-zeros-fp-math"="false" }
|
||||
attributes #5 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
|
||||
attributes #6 = { nounwind "denormal-fp-math"="ieee,ieee" "no-signed-zeros-fp-math"="true" }
|
||||
attributes #6 = { nounwind "denormal-fp-math"="ieee,ieee" }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!2, !3}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}add_select_fabs_fabs_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
@ -726,7 +726,7 @@ define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
|
||||
%y = load volatile float, ptr addrspace(1) poison
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%add = fadd float %x, 4.0
|
||||
%fneg = fsub float -0.0, %add
|
||||
%fneg = fsub nsz float -0.0, %add
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, ptr addrspace(1) poison
|
||||
ret void
|
||||
@ -742,7 +742,7 @@ define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
|
||||
%x = load volatile float, ptr addrspace(1) poison
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%add = fsub float %x, 4.0
|
||||
%fneg = fsub float -0.0, %add
|
||||
%fneg = fsub nsz float -0.0, %add
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, ptr addrspace(1) poison
|
||||
ret void
|
||||
@ -758,7 +758,7 @@ define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
|
||||
%x = load volatile float, ptr addrspace(1) poison
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%mul = fmul float %x, 4.0
|
||||
%fneg = fsub float -0.0, %mul
|
||||
%fneg = fsub nsz float -0.0, %mul
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, ptr addrspace(1) poison
|
||||
ret void
|
||||
@ -776,7 +776,7 @@ define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
|
||||
%z = load volatile float, ptr addrspace(1) poison
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
|
||||
%fneg = fsub float -0.0, %fma
|
||||
%fneg = fsub nsz float -0.0, %fma
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, ptr addrspace(1) poison
|
||||
ret void
|
||||
@ -793,7 +793,7 @@ define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
|
||||
%z = load volatile float, ptr addrspace(1) poison
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
|
||||
%fneg = fsub float -0.0, %fmad
|
||||
%fneg = fsub nsz float -0.0, %fmad
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, ptr addrspace(1) poison
|
||||
ret void
|
||||
@ -811,7 +811,7 @@ define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
|
||||
%y = load volatile float, ptr addrspace(1) poison
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%rcp = call float @llvm.amdgcn.rcp.f32(float %x)
|
||||
%fneg = fsub float -0.0, %rcp
|
||||
%fneg = fsub nsz float -0.0, %rcp
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, ptr addrspace(1) poison
|
||||
ret void
|
||||
|
||||
@ -12,7 +12,6 @@
|
||||
; CHECK-NEXT: dynLDSAlign: 1
|
||||
; CHECK-NEXT: isEntryFunction: true
|
||||
; CHECK-NEXT: isChainFunction: false
|
||||
; CHECK-NEXT: noSignedZerosFPMath: false
|
||||
; CHECK-NEXT: memoryBound: false
|
||||
; CHECK-NEXT: waveLimiter: false
|
||||
; CHECK-NEXT: hasSpilledSGPRs: false
|
||||
@ -285,7 +284,6 @@
|
||||
; CHECK-NEXT: dynLDSAlign: 1
|
||||
; CHECK-NEXT: isEntryFunction: true
|
||||
; CHECK-NEXT: isChainFunction: false
|
||||
; CHECK-NEXT: noSignedZerosFPMath: false
|
||||
; CHECK-NEXT: memoryBound: false
|
||||
; CHECK-NEXT: waveLimiter: false
|
||||
; CHECK-NEXT: hasSpilledSGPRs: false
|
||||
|
||||
@ -11,7 +11,6 @@
|
||||
; AFTER-PEI-NEXT: dynLDSAlign: 1
|
||||
; AFTER-PEI-NEXT: isEntryFunction: true
|
||||
; AFTER-PEI-NEXT: isChainFunction: false
|
||||
; AFTER-PEI-NEXT: noSignedZerosFPMath: false
|
||||
; AFTER-PEI-NEXT: memoryBound: false
|
||||
; AFTER-PEI-NEXT: waveLimiter: false
|
||||
; AFTER-PEI-NEXT: hasSpilledSGPRs: true
|
||||
|
||||
@ -12,7 +12,6 @@
|
||||
; CHECK-NEXT: dynLDSAlign: 1
|
||||
; CHECK-NEXT: isEntryFunction: true
|
||||
; CHECK-NEXT: isChainFunction: false
|
||||
; CHECK-NEXT: noSignedZerosFPMath: false
|
||||
; CHECK-NEXT: memoryBound: false
|
||||
; CHECK-NEXT: waveLimiter: false
|
||||
; CHECK-NEXT: hasSpilledSGPRs: false
|
||||
|
||||
@ -12,7 +12,6 @@
|
||||
; CHECK-NEXT: dynLDSAlign: 1
|
||||
; CHECK-NEXT: isEntryFunction: true
|
||||
; CHECK-NEXT: isChainFunction: false
|
||||
; CHECK-NEXT: noSignedZerosFPMath: false
|
||||
; CHECK-NEXT: memoryBound: false
|
||||
; CHECK-NEXT: waveLimiter: false
|
||||
; CHECK-NEXT: hasSpilledSGPRs: false
|
||||
|
||||
@ -12,7 +12,6 @@
|
||||
# FULL-NEXT: dynLDSAlign: 1
|
||||
# FULL-NEXT: isEntryFunction: true
|
||||
# FULL-NEXT: isChainFunction: false
|
||||
# FULL-NEXT: noSignedZerosFPMath: false
|
||||
# FULL-NEXT: memoryBound: true
|
||||
# FULL-NEXT: waveLimiter: true
|
||||
# FULL-NEXT: hasSpilledSGPRs: false
|
||||
@ -96,7 +95,6 @@ machineFunctionInfo:
|
||||
ldsSize: 2048
|
||||
gdsSize: 256
|
||||
isEntryFunction: true
|
||||
noSignedZerosFPMath: false
|
||||
memoryBound: true
|
||||
waveLimiter: true
|
||||
scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11'
|
||||
@ -125,7 +123,6 @@ body: |
|
||||
# FULL-NEXT: dynLDSAlign: 1
|
||||
# FULL-NEXT: isEntryFunction: false
|
||||
# FULL-NEXT: isChainFunction: false
|
||||
# FULL-NEXT: noSignedZerosFPMath: false
|
||||
# FULL-NEXT: memoryBound: false
|
||||
# FULL-NEXT: waveLimiter: false
|
||||
# FULL-NEXT: hasSpilledSGPRs: false
|
||||
@ -207,7 +204,6 @@ body: |
|
||||
# FULL-NEXT: dynLDSAlign: 1
|
||||
# FULL-NEXT: isEntryFunction: false
|
||||
# FULL-NEXT: isChainFunction: false
|
||||
# FULL-NEXT: noSignedZerosFPMath: false
|
||||
# FULL-NEXT: memoryBound: false
|
||||
# FULL-NEXT: waveLimiter: false
|
||||
# FULL-NEXT: hasSpilledSGPRs: false
|
||||
@ -290,7 +286,6 @@ body: |
|
||||
# FULL-NEXT: dynLDSAlign: 1
|
||||
# FULL-NEXT: isEntryFunction: true
|
||||
# FULL-NEXT: isChainFunction: false
|
||||
# FULL-NEXT: noSignedZerosFPMath: false
|
||||
# FULL-NEXT: memoryBound: false
|
||||
# FULL-NEXT: waveLimiter: false
|
||||
# FULL-NEXT: hasSpilledSGPRs: false
|
||||
|
||||
@ -15,7 +15,6 @@
|
||||
; CHECK-NEXT: dynLDSAlign: 1
|
||||
; CHECK-NEXT: isEntryFunction: true
|
||||
; CHECK-NEXT: isChainFunction: false
|
||||
; CHECK-NEXT: noSignedZerosFPMath: false
|
||||
; CHECK-NEXT: memoryBound: false
|
||||
; CHECK-NEXT: waveLimiter: false
|
||||
; CHECK-NEXT: hasSpilledSGPRs: false
|
||||
@ -78,7 +77,6 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
|
||||
; CHECK-NEXT: dynLDSAlign: 1
|
||||
; CHECK-NEXT: isEntryFunction: true
|
||||
; CHECK-NEXT: isChainFunction: false
|
||||
; CHECK-NEXT: noSignedZerosFPMath: false
|
||||
; CHECK-NEXT: memoryBound: false
|
||||
; CHECK-NEXT: waveLimiter: false
|
||||
; CHECK-NEXT: hasSpilledSGPRs: false
|
||||
@ -145,7 +143,6 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
|
||||
; CHECK-NEXT: dynLDSAlign: 1
|
||||
; CHECK-NEXT: isEntryFunction: false
|
||||
; CHECK-NEXT: isChainFunction: false
|
||||
; CHECK-NEXT: noSignedZerosFPMath: false
|
||||
; CHECK-NEXT: memoryBound: false
|
||||
; CHECK-NEXT: waveLimiter: false
|
||||
; CHECK-NEXT: hasSpilledSGPRs: false
|
||||
@ -204,7 +201,6 @@ define void @function() {
|
||||
; CHECK-NEXT: dynLDSAlign: 1
|
||||
; CHECK-NEXT: isEntryFunction: false
|
||||
; CHECK-NEXT: isChainFunction: false
|
||||
; CHECK-NEXT: noSignedZerosFPMath: true
|
||||
; CHECK-NEXT: memoryBound: false
|
||||
; CHECK-NEXT: waveLimiter: false
|
||||
; CHECK-NEXT: hasSpilledSGPRs: false
|
||||
|
||||
@ -20,7 +20,6 @@
|
||||
# RESULT-NEXT: gdsSize: 128
|
||||
# RESULT-NEXT: dynLDSAlign: 16
|
||||
# RESULT-NEXT: isEntryFunction: true
|
||||
# RESULT-NEXT: noSignedZerosFPMath: true
|
||||
# RESULT-NEXT: memoryBound: true
|
||||
# RESULT-NEXT: waveLimiter: true
|
||||
# RESULT-NEXT: hasSpilledSGPRs: true
|
||||
@ -74,7 +73,6 @@ machineFunctionInfo:
|
||||
gdsSize: 128
|
||||
dynLDSAlign: 16
|
||||
isEntryFunction: true
|
||||
noSignedZerosFPMath: true
|
||||
memoryBound: true
|
||||
waveLimiter: true
|
||||
hasSpilledSGPRs: true
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user