AMDGPU/GlobalISel: Partially move constant selection to patterns (#100786)
This is still relying on the manual code for splitting 64-bit constants, and handling pointers. We were missing some of the tablegen patterns for all immediate types, so this has some side effect DAG path improvements. This also reduces the diff in the 2 selector outputs.
This commit is contained in:
parent
b4444dca47
commit
b356aa3e2d
@ -398,8 +398,10 @@ def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm">,
|
||||
def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
|
||||
GISDNodeXFormEquiv<NegateImm>;
|
||||
|
||||
def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">,
|
||||
def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastFPImm32">,
|
||||
GISDNodeXFormEquiv<bitcast_fpimm_to_i32>;
|
||||
def gi_bitcast_fpimm_to_i64 : GICustomOperandRenderer<"renderBitcastFPImm64">,
|
||||
GISDNodeXFormEquiv<bitcast_fpimm_to_i64>;
|
||||
|
||||
def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">,
|
||||
GISDNodeXFormEquiv<IMMPopCount>;
|
||||
|
||||
@ -2504,10 +2504,19 @@ bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
|
||||
if (selectImpl(I, *CoverageInfo))
|
||||
return true;
|
||||
|
||||
// FIXME: Relying on manual selection for 64-bit case, and pointer typed
|
||||
// constants.
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
MachineOperand &ImmOp = I.getOperand(1);
|
||||
Register DstReg = I.getOperand(0).getReg();
|
||||
unsigned Size = MRI->getType(DstReg).getSizeInBits();
|
||||
LLT Ty = MRI->getType(DstReg);
|
||||
unsigned Size = Ty.getSizeInBits();
|
||||
assert((Size == 64 || Ty.isPointer()) &&
|
||||
"patterns should have selected this");
|
||||
|
||||
bool IsFP = false;
|
||||
|
||||
// The AMDGPU backend only supports Imm operands and not CImm or FPImm.
|
||||
@ -5606,18 +5615,12 @@ void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
|
||||
MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
assert(OpIdx == -1);
|
||||
|
||||
void AMDGPUInstructionSelector::renderBitcastFPImm(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
const MachineOperand &Op = MI.getOperand(1);
|
||||
if (MI.getOpcode() == TargetOpcode::G_FCONSTANT)
|
||||
MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
|
||||
else {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
|
||||
MIB.addImm(Op.getCImm()->getSExtValue());
|
||||
}
|
||||
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
|
||||
MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
|
||||
|
||||
@ -333,8 +333,17 @@ private:
|
||||
void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
|
||||
void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
void renderBitcastFPImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
|
||||
void renderBitcastFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
renderBitcastFPImm(MIB, MI, OpIdx);
|
||||
}
|
||||
void renderBitcastFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
renderBitcastFPImm(MIB, MI, OpIdx);
|
||||
}
|
||||
|
||||
void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
|
||||
@ -828,7 +828,9 @@ def InlineImmFP64 : FPImmLeaf<f64, [{
|
||||
|
||||
class VGPRImm <dag frag> : PatLeaf<frag, [{
|
||||
return isVGPRImm(N);
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{return true;}];
|
||||
}
|
||||
|
||||
def NegateImm : SDNodeXForm<imm, [{
|
||||
return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
|
||||
|
||||
@ -2163,18 +2163,44 @@ def : GCNPat <
|
||||
(S_MOV_B32 $ga)
|
||||
>;
|
||||
|
||||
// FIXME: Workaround for ordering issue with peephole optimizer where
|
||||
// a register class copy interferes with immediate folding. Should
|
||||
// use s_mov_b32, which can be shrunk to s_movk_i32
|
||||
def : GCNPat <
|
||||
(VGPRImm<(f16 fpimm)>:$imm),
|
||||
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
|
||||
>;
|
||||
foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
|
||||
let True16Predicate = pred in {
|
||||
def : GCNPat <
|
||||
(VGPRImm<(i16 imm)>:$imm),
|
||||
(V_MOV_B32_e32 imm:$imm)
|
||||
>;
|
||||
}
|
||||
|
||||
def : GCNPat <
|
||||
(VGPRImm<(bf16 fpimm)>:$imm),
|
||||
(V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
|
||||
>;
|
||||
// FIXME: Workaround for ordering issue with peephole optimizer where
|
||||
// a register class copy interferes with immediate folding. Should
|
||||
// use s_mov_b32, which can be shrunk to s_movk_i32
|
||||
def : GCNPat <
|
||||
(VGPRImm<(f16 fpimm)>:$imm),
|
||||
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(VGPRImm<(bf16 fpimm)>:$imm),
|
||||
(V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
|
||||
>;
|
||||
}
|
||||
|
||||
let True16Predicate = UseRealTrue16Insts in {
|
||||
def : GCNPat <
|
||||
(VGPRImm<(i16 imm)>:$imm),
|
||||
(V_MOV_B16_t16_e64 0, imm:$imm, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(VGPRImm<(f16 fpimm)>:$imm),
|
||||
(V_MOV_B16_t16_e64 0, $imm, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(VGPRImm<(bf16 fpimm)>:$imm),
|
||||
(V_MOV_B16_t16_e64 0, $imm, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
|
||||
// immediate and wil be expanded as needed, but we will only use these patterns
|
||||
@ -2229,20 +2255,15 @@ def : GCNPat <
|
||||
(S_MOV_B64 InlineImm64:$imm)
|
||||
>;
|
||||
|
||||
// XXX - Should this use a s_cmp to set SCC?
|
||||
|
||||
// Set to sign-extended 64-bit value (true = -1, false = 0)
|
||||
def : GCNPat <
|
||||
(i1 imm:$imm),
|
||||
(S_MOV_B64 (i64 (as_i64imm $imm)))
|
||||
> {
|
||||
// Set to sign-extended 64-bit value (true = -1, false = 0)
|
||||
def : GCNPat <(i1 imm:$imm),
|
||||
(S_MOV_B64 imm:$imm)> {
|
||||
let WaveSizePredicate = isWave64;
|
||||
}
|
||||
|
||||
def : GCNPat <
|
||||
(i1 imm:$imm),
|
||||
(S_MOV_B32 (i32 (as_i32imm $imm)))
|
||||
> {
|
||||
def : GCNPat <(i1 imm:$imm),
|
||||
(S_MOV_B32 imm:$imm)> {
|
||||
let WaveSizePredicate = isWave32;
|
||||
}
|
||||
|
||||
|
||||
@ -501,8 +501,8 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX7-NEXT: .LBB4_1: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -710,8 +710,8 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v2
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX7-NEXT: .LBB5_1: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -936,7 +936,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -1150,7 +1150,7 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
|
||||
@ -501,8 +501,8 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX7-NEXT: .LBB4_1: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -710,8 +710,8 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v2
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX7-NEXT: .LBB5_1: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -936,7 +936,7 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -1150,7 +1150,7 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
|
||||
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], 0
|
||||
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
|
||||
@ -226,15 +226,16 @@ exit:
|
||||
define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3 x i32> inreg %.WorkgroupId, <3 x i32> %.LocalInvocationId) #0 {
|
||||
; GFX10-LABEL: single_lane_execution_attribute:
|
||||
; GFX10: ; %bb.0: ; %.entry
|
||||
; GFX10-NEXT: s_mov_b32 s6, 0
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_mov_b32 s12, 0
|
||||
; GFX10-NEXT: s_mov_b32 s13, -1
|
||||
; GFX10-NEXT: s_mov_b32 s2, s0
|
||||
; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], s[12:13]
|
||||
; GFX10-NEXT: s_mov_b32 s3, s12
|
||||
; GFX10-NEXT: s_mov_b32 s7, -1
|
||||
; GFX10-NEXT: s_mov_b32 s2, s1
|
||||
; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
|
||||
; GFX10-NEXT: s_mov_b32 s1, 0
|
||||
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0
|
||||
; GFX10-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
|
||||
; GFX10-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
|
||||
; GFX10-NEXT: s_or_b64 s[12:13], s[4:5], s[0:1]
|
||||
; GFX10-NEXT: s_mov_b32 s3, -1
|
||||
; GFX10-NEXT: s_load_dwordx8 s[4:11], s[12:13], 0x0
|
||||
; GFX10-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, 1, v1
|
||||
@ -248,8 +249,8 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
|
||||
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2
|
||||
; GFX10-NEXT: s_cbranch_vccnz .LBB4_4
|
||||
; GFX10-NEXT: ; %bb.1: ; %.preheader.preheader
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s12
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, s12
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, s1
|
||||
; GFX10-NEXT: .LBB4_2: ; %.preheader
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: buffer_load_dword v5, v3, s[4:7], 0 offen
|
||||
@ -261,17 +262,17 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
|
||||
; GFX10-NEXT: s_cbranch_vccnz .LBB4_2
|
||||
; GFX10-NEXT: ; %bb.3: ; %.preheader._crit_edge
|
||||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v2
|
||||
; GFX10-NEXT: s_mov_b32 s13, 0
|
||||
; GFX10-NEXT: s_or_b32 s2, s0, vcc_lo
|
||||
; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
|
||||
; GFX10-NEXT: s_mov_b32 s3, 0
|
||||
; GFX10-NEXT: s_or_b32 s1, s0, vcc_lo
|
||||
; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s1
|
||||
; GFX10-NEXT: .LBB4_4: ; %Flow
|
||||
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s13
|
||||
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s3
|
||||
; GFX10-NEXT: s_cbranch_vccz .LBB4_6
|
||||
; GFX10-NEXT: ; %bb.5: ; %.19
|
||||
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
|
||||
; GFX10-NEXT: v_or_b32_e32 v3, 2, v1
|
||||
; GFX10-NEXT: .LBB4_6: ; %.22
|
||||
; GFX10-NEXT: v_add_lshl_u32 v0, v0, s1, 2
|
||||
; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, 2
|
||||
; GFX10-NEXT: buffer_store_dword v3, v0, s[8:11], 0 offen
|
||||
; GFX10-NEXT: s_endpgm
|
||||
.entry:
|
||||
|
||||
@ -193,12 +193,12 @@ bb12:
|
||||
define amdgpu_kernel void @break_loop(i32 %arg) {
|
||||
; CHECK-LABEL: break_loop:
|
||||
; CHECK: ; %bb.0: ; %bb
|
||||
; CHECK-NEXT: s_load_dword s2, s[6:7], 0x0
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], 0
|
||||
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr1
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
|
||||
; CHECK-NEXT: v_subrev_u32_e32 v0, s0, v0
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], 0
|
||||
; CHECK-NEXT: s_branch .LBB5_3
|
||||
; CHECK-NEXT: .LBB5_1: ; %bb4
|
||||
; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
|
||||
|
||||
@ -8,9 +8,9 @@ define double @v_floor_f64_ieee(double %x) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
|
||||
@ -31,9 +31,9 @@ define double @v_floor_f64_ieee_nnan(double %x) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
||||
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
@ -51,9 +51,9 @@ define double @v_floor_f64_ieee_fneg(double %x) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
|
||||
@ -75,9 +75,9 @@ define double @v_floor_f64_nonieee(double %x) #1 {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
|
||||
@ -98,9 +98,9 @@ define double @v_floor_f64_nonieee_nnan(double %x) #1 {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
||||
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
@ -118,9 +118,9 @@ define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
|
||||
@ -142,9 +142,9 @@ define double @v_floor_f64_fabs(double %x) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]|
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
|
||||
@ -166,9 +166,9 @@ define double @v_floor_f64_fneg_fabs(double %x) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]|
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
|
||||
@ -190,9 +190,9 @@ define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) {
|
||||
; GFX6-LABEL: s_floor_f64:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3]
|
||||
; GFX6-NEXT: s_mov_b32 s0, -1
|
||||
; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, s3
|
||||
@ -214,9 +214,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) {
|
||||
; GFX6-LABEL: s_floor_f64_fneg:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3]
|
||||
; GFX6-NEXT: s_mov_b32 s0, -1
|
||||
; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, s3
|
||||
@ -239,9 +239,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) {
|
||||
; GFX6-LABEL: s_floor_f64_fabs:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]|
|
||||
; GFX6-NEXT: s_mov_b32 s0, -1
|
||||
; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, s3
|
||||
@ -264,9 +264,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
|
||||
; GFX6-LABEL: s_floor_f64_fneg_fabs:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]|
|
||||
; GFX6-NEXT: s_mov_b32 s0, -1
|
||||
; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
|
||||
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
||||
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, s3
|
||||
|
||||
@ -434,8 +434,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
|
||||
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
|
||||
@ -446,8 +446,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
|
||||
; GFX90A_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
|
||||
@ -615,8 +615,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
|
||||
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
|
||||
; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
|
||||
; GFX940_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
|
||||
@ -627,8 +627,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
|
||||
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GFX940_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
|
||||
; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
|
||||
; GFX940_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
|
||||
; GFX940_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
|
||||
@ -837,8 +837,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
|
||||
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
|
||||
@ -849,8 +849,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX90A_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
|
||||
; GFX90A_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
|
||||
; GFX90A_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
|
||||
; GFX90A_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
|
||||
@ -1089,8 +1089,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
|
||||
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
|
||||
; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
|
||||
; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
|
||||
; GFX940_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
|
||||
@ -1101,8 +1101,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
|
||||
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GFX940_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
|
||||
; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX940_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
|
||||
; GFX940_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
|
||||
; GFX940_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
|
||||
; GFX940_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
|
||||
|
||||
@ -142,13 +142,9 @@ body: |
|
||||
; WAVE64-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
|
||||
; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
|
||||
; WAVE64-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967296, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -68719453481, implicit $exec
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[V_MOV_B6]], implicit [[V_MOV_B7]]
|
||||
;
|
||||
; WAVE32-LABEL: name: constant_v_s64
|
||||
; WAVE32: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
|
||||
@ -157,13 +153,9 @@ body: |
|
||||
; WAVE32-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
|
||||
; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
|
||||
; WAVE32-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967296, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -68719453481, implicit $exec
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[V_MOV_B6]], implicit [[V_MOV_B7]]
|
||||
%0:vgpr(s64) = G_CONSTANT i64 0
|
||||
%1:vgpr(s64) = G_CONSTANT i64 1
|
||||
%2:vgpr(s64) = G_CONSTANT i64 -1
|
||||
@ -184,34 +176,26 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; WAVE64-LABEL: name: constant_s_s64
|
||||
; WAVE64: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
|
||||
; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
|
||||
; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
|
||||
; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
|
||||
; WAVE64-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27
|
||||
; WAVE64-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
|
||||
; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255
|
||||
; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16
|
||||
; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
|
||||
; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; WAVE64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1
|
||||
; WAVE64-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; WAVE64-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
|
||||
; WAVE64-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27
|
||||
; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
|
||||
; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
|
||||
; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -68719453481
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_3]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
|
||||
;
|
||||
; WAVE32-LABEL: name: constant_s_s64
|
||||
; WAVE32: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
|
||||
; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
|
||||
; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
|
||||
; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
|
||||
; WAVE32-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27
|
||||
; WAVE32-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
|
||||
; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255
|
||||
; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16
|
||||
; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
|
||||
; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1
|
||||
; WAVE32-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; WAVE32-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
|
||||
; WAVE32-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27
|
||||
; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
|
||||
; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
|
||||
; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -68719453481
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_3]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
|
||||
%0:sgpr(s64) = G_CONSTANT i64 0
|
||||
%1:sgpr(s64) = G_CONSTANT i64 1
|
||||
%2:sgpr(s64) = G_CONSTANT i64 -1
|
||||
@ -310,6 +294,195 @@ body: |
|
||||
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: constant_s_p2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; WAVE64-LABEL: name: constant_s_p2
|
||||
; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
|
||||
; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
|
||||
;
|
||||
; WAVE32-LABEL: name: constant_s_p2
|
||||
; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
|
||||
; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
|
||||
%0:sgpr(p2) = G_CONSTANT i32 0
|
||||
%1:sgpr(p2) = G_CONSTANT i32 1
|
||||
%2:sgpr(p2) = G_CONSTANT i32 -1
|
||||
%3:sgpr(p2) = G_CONSTANT i32 -54
|
||||
%4:sgpr(p2) = G_CONSTANT i32 27
|
||||
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: constant_v_p2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; WAVE64-LABEL: name: constant_v_p2
|
||||
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
|
||||
;
|
||||
; WAVE32-LABEL: name: constant_v_p2
|
||||
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
|
||||
%0:vgpr(p2) = G_CONSTANT i32 0
|
||||
%1:vgpr(p2) = G_CONSTANT i32 1
|
||||
%2:vgpr(p2) = G_CONSTANT i32 -1
|
||||
%3:vgpr(p2) = G_CONSTANT i32 -54
|
||||
%4:vgpr(p2) = G_CONSTANT i32 27
|
||||
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: constant_s_p5
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; WAVE64-LABEL: name: constant_s_p5
|
||||
; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
|
||||
; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
|
||||
;
|
||||
; WAVE32-LABEL: name: constant_s_p5
|
||||
; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
|
||||
; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
|
||||
%0:sgpr(p5) = G_CONSTANT i32 0
|
||||
%1:sgpr(p5) = G_CONSTANT i32 1
|
||||
%2:sgpr(p5) = G_CONSTANT i32 -1
|
||||
%3:sgpr(p5) = G_CONSTANT i32 -54
|
||||
%4:sgpr(p5) = G_CONSTANT i32 27
|
||||
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: constant_v_p5
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; WAVE64-LABEL: name: constant_v_p5
|
||||
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
|
||||
;
|
||||
; WAVE32-LABEL: name: constant_v_p5
|
||||
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 0
|
||||
%1:vgpr(p5) = G_CONSTANT i32 1
|
||||
%2:vgpr(p5) = G_CONSTANT i32 -1
|
||||
%3:vgpr(p5) = G_CONSTANT i32 -54
|
||||
%4:vgpr(p5) = G_CONSTANT i32 27
|
||||
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: constant_s_p6
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; WAVE64-LABEL: name: constant_s_p6
|
||||
; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
|
||||
; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
|
||||
;
|
||||
; WAVE32-LABEL: name: constant_s_p6
|
||||
; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
|
||||
; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
|
||||
%0:sgpr(p6) = G_CONSTANT i32 0
|
||||
%1:sgpr(p6) = G_CONSTANT i32 1
|
||||
%2:sgpr(p6) = G_CONSTANT i32 -1
|
||||
%3:sgpr(p6) = G_CONSTANT i32 -54
|
||||
%4:sgpr(p6) = G_CONSTANT i32 27
|
||||
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: constant_v_p6
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; WAVE64-LABEL: name: constant_v_p6
|
||||
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
|
||||
; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
|
||||
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
|
||||
;
|
||||
; WAVE32-LABEL: name: constant_v_p6
|
||||
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
|
||||
; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
|
||||
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
|
||||
%0:vgpr(p6) = G_CONSTANT i32 0
|
||||
%1:vgpr(p6) = G_CONSTANT i32 1
|
||||
%2:vgpr(p6) = G_CONSTANT i32 -1
|
||||
%3:vgpr(p6) = G_CONSTANT i32 -54
|
||||
%4:vgpr(p6) = G_CONSTANT i32 27
|
||||
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: constant_s_p1
|
||||
legalized: true
|
||||
|
||||
@ -87,13 +87,13 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: fconstant_s_s64
|
||||
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4607182418800017408
|
||||
; GCN-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4620693217682128896
|
||||
; GCN-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4611686018427387904
|
||||
; GCN-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4601552919265804288
|
||||
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B]]
|
||||
; GCN-NEXT: $sgpr2_sgpr3 = COPY [[S_MOV_B1]]
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
|
||||
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4607182418800017408
|
||||
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4620693217682128896
|
||||
; GCN-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -4611686018427387904
|
||||
; GCN-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4601552919265804288
|
||||
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B64_]]
|
||||
; GCN-NEXT: $sgpr2_sgpr3 = COPY [[S_MOV_B]]
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B1]]
|
||||
%0:sgpr(s64) = G_FCONSTANT double 1.0
|
||||
%1:sgpr(s64) = G_FCONSTANT double 8.0
|
||||
%2:sgpr(s64) = G_FCONSTANT double -2.0
|
||||
|
||||
@ -1374,11 +1374,11 @@ body: |
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6-NEXT: {{ $}}
|
||||
; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
|
||||
; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
|
||||
; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
|
||||
; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
@ -1389,11 +1389,11 @@ body: |
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
|
||||
; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
|
||||
; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
|
||||
; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
@ -1404,11 +1404,11 @@ body: |
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8-NEXT: {{ $}}
|
||||
; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
|
||||
; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
|
||||
; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
|
||||
; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
@ -1419,11 +1419,11 @@ body: |
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
|
||||
; GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
|
||||
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
@ -1434,11 +1434,11 @@ body: |
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
|
||||
; GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
|
||||
@ -741,17 +741,15 @@ body: |
|
||||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -759,17 +757,15 @@ body: |
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -777,17 +773,15 @@ body: |
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -795,17 +789,15 @@ body: |
|
||||
; GFX12: liveins: $sgpr0_sgpr1
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
@ -831,35 +823,31 @@ body: |
|
||||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
|
||||
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
;
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
|
||||
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -867,17 +855,15 @@ body: |
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -885,17 +871,15 @@ body: |
|
||||
; GFX12: liveins: $sgpr0_sgpr1
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
|
||||
; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
@ -921,17 +905,15 @@ body: |
|
||||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -939,17 +921,15 @@ body: |
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -957,17 +937,15 @@ body: |
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -975,17 +953,15 @@ body: |
|
||||
; GFX12: liveins: $sgpr0_sgpr1
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
@ -1010,17 +986,15 @@ body: |
|
||||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
|
||||
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -1028,17 +1002,15 @@ body: |
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -1046,17 +1018,15 @@ body: |
|
||||
; GFX11: liveins: $sgpr0_sgpr1
|
||||
; GFX11-NEXT: {{ $}}
|
||||
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
|
||||
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
;
|
||||
@ -1064,17 +1034,15 @@ body: |
|
||||
; GFX12: liveins: $sgpr0_sgpr1
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
|
||||
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
|
||||
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
||||
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
|
||||
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
|
||||
@ -45,9 +45,7 @@ regBankSelected: true
|
||||
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
|
||||
|
||||
# Max immediate for CI
|
||||
# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
|
||||
# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3
|
||||
# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
|
||||
# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869180
|
||||
# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
|
||||
# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
|
||||
# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||||
@ -59,9 +57,7 @@ regBankSelected: true
|
||||
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
|
||||
|
||||
# Immediate overflow for CI
|
||||
# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4
|
||||
# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
|
||||
# GCN: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869184
|
||||
# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
|
||||
# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
|
||||
# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||||
@ -77,9 +73,7 @@ regBankSelected: true
|
||||
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
|
||||
|
||||
# Overflow 32-bit byte offset
|
||||
# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
|
||||
# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
|
||||
# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
|
||||
# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
|
||||
# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||||
|
||||
@ -314,8 +314,8 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
|
||||
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B]], implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def dead $scc
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
|
||||
%0:sgpr(p0) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 0
|
||||
@ -337,10 +337,8 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160
|
||||
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136
|
||||
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1085102592571150096
|
||||
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B]], implicit-def dead $scc
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
|
||||
%0:sgpr(p0) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096
|
||||
@ -362,9 +360,7 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
|
||||
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
|
||||
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
|
||||
%0:sgpr(p0) = COPY $sgpr0_sgpr1
|
||||
@ -387,9 +383,7 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
|
||||
@ -416,9 +410,7 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||||
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
|
||||
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
|
||||
%0:sgpr(p0) = COPY $sgpr0_sgpr1
|
||||
@ -441,7 +433,7 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -2
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -2
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
|
||||
@ -468,7 +460,7 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -4
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -4
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
|
||||
@ -495,7 +487,7 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -8
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -8
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
|
||||
@ -522,7 +514,7 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -16
|
||||
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -16
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
|
||||
@ -743,17 +735,15 @@ body: |
|
||||
; CHECK: liveins: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec
|
||||
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
|
||||
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1085102592571150096, implicit $exec
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
|
||||
; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
|
||||
; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec
|
||||
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
|
||||
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -1085102592571150096
|
||||
%2:vgpr(p0) = G_PTRMASK %0, %1
|
||||
|
||||
@ -906,28 +906,26 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX11-NEXT: s_mov_b32 s16, 0xb36211c7
|
||||
; GFX11-NEXT: s_mov_b32 s6, 2.0
|
||||
; GFX11-NEXT: s_movk_i32 s17, 0x102
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v9, s16 :: v_dual_lshlrev_b32 v2, 2, v0
|
||||
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x34
|
||||
; GFX11-NEXT: s_mov_b32 s8, 0x40400000
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v9, 0xb36211c7 :: v_dual_lshlrev_b32 v2, 2, v0
|
||||
; GFX11-NEXT: s_mov_b32 s12, 0x40c00000
|
||||
; GFX11-NEXT: s_mov_b32 s10, 0x40a00000
|
||||
; GFX11-NEXT: s_mov_b32 s9, 4.0
|
||||
; GFX11-NEXT: s_mov_b32 s14, 0x41000000
|
||||
; GFX11-NEXT: s_mov_b32 s13, 0x40e00000
|
||||
; GFX11-NEXT: v_dual_mov_b32 v10, s17 :: v_dual_mov_b32 v3, s8
|
||||
; GFX11-NEXT: v_mov_b32_e32 v6, s12
|
||||
; GFX11-NEXT: v_mov_b32_e32 v4, s9
|
||||
; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v5, s10
|
||||
; GFX11-NEXT: v_bfrev_b32_e32 v10, 4.0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v3, s8
|
||||
; GFX11-NEXT: v_dual_mov_b32 v4, s9 :: v_dual_mov_b32 v7, s13
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v0, s4
|
||||
; GFX11-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX11-NEXT: s_mov_b32 s4, 0
|
||||
; GFX11-NEXT: s_mov_b32 s5, 1.0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v7, s13
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX11-NEXT: flat_load_b32 v11, v[0:1]
|
||||
@ -1012,17 +1010,16 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX11-NEXT: s_mov_b32 s12, 0xb36211c6
|
||||
; GFX11-NEXT: s_mov_b32 s6, 2.0
|
||||
; GFX11-NEXT: s_movk_i32 s13, 0x102
|
||||
; GFX11-NEXT: s_mov_b32 s8, 0x42004600
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
|
||||
; GFX11-NEXT: s_mov_b32 s9, 0x44004700
|
||||
; GFX11-NEXT: s_mov_b32 s10, 0x45004800
|
||||
; GFX11-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v3, s8
|
||||
; GFX11-NEXT: v_mov_b32_e32 v7, s13
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
|
||||
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x34
|
||||
; GFX11-NEXT: v_dual_mov_b32 v4, s9 :: v_dual_mov_b32 v5, s10
|
||||
; GFX11-NEXT: v_mov_b32_e32 v6, 0xb36211c6
|
||||
; GFX11-NEXT: v_bfrev_b32_e32 v7, 4.0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v3, s8
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s9
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
||||
; GFX11-NEXT: s_mov_b32 s4, 0
|
||||
|
||||
@ -326,7 +326,6 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
|
||||
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[10:11], s[10:11] op_sel:[0,1]
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
|
||||
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
|
||||
@ -334,6 +333,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
|
||||
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
||||
@ -77,11 +77,12 @@ define double @v_rsq_clamp_f64(double %src) #0 {
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
|
||||
; VI-NEXT: s_mov_b32 s4, -1
|
||||
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
||||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
||||
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
||||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
||||
; VI-NEXT: v_mov_b32_e32 v2, -1
|
||||
; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
|
||||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
||||
; VI-NEXT: v_mov_b32_e32 v2, -1
|
||||
; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
|
||||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-LABEL: v_rsq_clamp_f64:
|
||||
@ -92,13 +93,14 @@ define double @v_rsq_clamp_f64(double %src) #0 {
|
||||
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
|
||||
; GFX12-NEXT: s_mov_b32 s0, -1
|
||||
; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
|
||||
ret double %rsq_clamp
|
||||
@ -115,11 +117,12 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
|
||||
; VI-NEXT: s_mov_b32 s4, -1
|
||||
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
||||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
||||
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
||||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
||||
; VI-NEXT: v_mov_b32_e32 v2, -1
|
||||
; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
|
||||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
||||
; VI-NEXT: v_mov_b32_e32 v2, -1
|
||||
; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
|
||||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-LABEL: v_rsq_clamp_fabs_f64:
|
||||
@ -130,13 +133,14 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
|
||||
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
|
||||
; GFX12-NEXT: s_mov_b32 s0, -1
|
||||
; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
%fabs.src = call double @llvm.fabs.f64(double %src)
|
||||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
|
||||
@ -185,11 +189,12 @@ define double @v_rsq_clamp_undef_f64() #0 {
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5]
|
||||
; VI-NEXT: s_mov_b32 s4, -1
|
||||
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
||||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
||||
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
||||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
||||
; VI-NEXT: v_mov_b32_e32 v2, -1
|
||||
; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
|
||||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
||||
; VI-NEXT: v_mov_b32_e32 v2, -1
|
||||
; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
|
||||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-LABEL: v_rsq_clamp_undef_f64:
|
||||
@ -200,13 +205,14 @@ define double @v_rsq_clamp_undef_f64() #0 {
|
||||
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
|
||||
; GFX12-NEXT: s_mov_b32 s0, -1
|
||||
; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
|
||||
ret double %rsq_clamp
|
||||
@ -254,11 +260,12 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
|
||||
; VI-NEXT: s_mov_b32 s4, -1
|
||||
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
||||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
||||
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
||||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
||||
; VI-NEXT: v_mov_b32_e32 v2, -1
|
||||
; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
|
||||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
|
||||
; VI-NEXT: v_mov_b32_e32 v2, -1
|
||||
; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
|
||||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-LABEL: v_rsq_clamp_f64_non_ieee:
|
||||
@ -269,13 +276,14 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
|
||||
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
|
||||
; GFX12-NEXT: s_mov_b32 s0, -1
|
||||
; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, -1
|
||||
; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
|
||||
ret double %rsq_clamp
|
||||
|
||||
@ -117,10 +117,8 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
|
||||
; GCN-LABEL: set_inactive_f64:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
|
||||
; GCN-NEXT: s_mov_b32 s4, 0xcccccccd
|
||||
; GCN-NEXT: s_mov_b32 s5, 0x4010cccc
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0xcccccccd
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 0x4010cccc
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
||||
|
||||
@ -7,12 +7,11 @@ declare void @llvm.memset.p1.i32(ptr addrspace(1), i8, i32, i1)
|
||||
define amdgpu_cs void @memset_p1i8(ptr addrspace(1) %dst, i8 %val) {
|
||||
; LOOP-LABEL: memset_p1i8:
|
||||
; LOOP: ; %bb.0: ; %loadstoreloop.preheader
|
||||
; LOOP-NEXT: s_mov_b64 s[4:5], 0
|
||||
; LOOP-NEXT: s_mov_b64 s[0:1], 0
|
||||
; LOOP-NEXT: s_mov_b32 s2, 0
|
||||
; LOOP-NEXT: s_mov_b32 s3, 0xf000
|
||||
; LOOP-NEXT: s_mov_b64 s[0:1], 0
|
||||
; LOOP-NEXT: v_mov_b32_e32 v3, s4
|
||||
; LOOP-NEXT: v_mov_b32_e32 v4, s5
|
||||
; LOOP-NEXT: v_mov_b32_e32 v4, s1
|
||||
; LOOP-NEXT: v_mov_b32_e32 v3, s0
|
||||
; LOOP-NEXT: .LBB0_1: ; %loadstoreloop
|
||||
; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; LOOP-NEXT: v_add_i32_e32 v5, vcc, v0, v3
|
||||
|
||||
@ -76,29 +76,25 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(ptr addrspace(1) inreg %p
|
||||
define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
|
||||
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s4, 0
|
||||
; GFX6-NEXT: s_mov_b32 s5, 4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0
|
||||
; GFX7-NEXT: s_mov_b32 s5, 4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -120,29 +116,25 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) in
|
||||
define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
|
||||
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s4, 4
|
||||
; GFX6-NEXT: s_mov_b32 s5, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, 4
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s4, 4
|
||||
; GFX7-NEXT: s_mov_b32 s5, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 4
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -234,9 +226,9 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0
|
||||
; GFX6-NEXT: s_mov_b32 s1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s0
|
||||
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -244,21 +236,16 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0
|
||||
; GFX7-NEXT: s_mov_b32 s1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s0
|
||||
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_mov_b32 s0, 0
|
||||
; GFX12-NEXT: s_mov_b32 s1, 4
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
|
||||
; GFX12-NEXT: s_nop 0
|
||||
@ -273,7 +260,7 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
|
||||
; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s0, 4
|
||||
; GFX6-NEXT: s_mov_b32 s1, s0
|
||||
; GFX6-NEXT: s_mov_b32 s1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
@ -283,7 +270,7 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
|
||||
; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s0, 4
|
||||
; GFX7-NEXT: s_mov_b32 s1, s0
|
||||
; GFX7-NEXT: s_mov_b32 s1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
@ -292,13 +279,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
|
||||
;
|
||||
; GFX12-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_mov_b32 s0, 4
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: s_mov_b32 s1, s0
|
||||
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 4
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
|
||||
; GFX12-NEXT: s_nop 0
|
||||
@ -715,28 +697,24 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(ptr addrspace(1) inreg %p
|
||||
define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
|
||||
; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s4, 0
|
||||
; GFX6-NEXT: s_mov_b32 s5, 4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0
|
||||
; GFX7-NEXT: s_mov_b32 s5, 4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -758,28 +736,24 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) in
|
||||
define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
|
||||
; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s4, 4
|
||||
; GFX6-NEXT: s_mov_b32 s5, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, 4
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s4, 4
|
||||
; GFX7-NEXT: s_mov_b32 s5, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 4
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -868,8 +842,8 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0
|
||||
; GFX6-NEXT: s_mov_b32 s1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s0
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -878,21 +852,16 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0
|
||||
; GFX7-NEXT: s_mov_b32 s1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s0
|
||||
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX12-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_mov_b32 s0, 0
|
||||
; GFX12-NEXT: s_mov_b32 s1, 4
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
|
||||
; GFX12-NEXT: global_load_b32 v0, v[0:1], off scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
@ -905,7 +874,7 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
|
||||
; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s0, 4
|
||||
; GFX6-NEXT: s_mov_b32 s1, s0
|
||||
; GFX6-NEXT: s_mov_b32 s1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
@ -915,7 +884,7 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
|
||||
; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s0, 4
|
||||
; GFX7-NEXT: s_mov_b32 s1, s0
|
||||
; GFX7-NEXT: s_mov_b32 s1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
@ -924,13 +893,8 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
|
||||
;
|
||||
; GFX12-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_mov_b32 s0, 4
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: s_mov_b32 s1, s0
|
||||
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 4
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
|
||||
; GFX12-NEXT: global_load_b32 v0, v[0:1], off scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
@ -1307,15 +1271,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(ptr addrspace(1) inr
|
||||
define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
|
||||
; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s4, 0
|
||||
; GFX6-NEXT: s_mov_b32 s5, 4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 4
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
||||
; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
@ -1324,15 +1286,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(
|
||||
;
|
||||
; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0
|
||||
; GFX7-NEXT: s_mov_b32 s5, 4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 4
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
||||
; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
@ -1404,8 +1364,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0
|
||||
; GFX6-NEXT: s_mov_b32 s1, 4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 2
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s0
|
||||
; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
@ -1418,8 +1378,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0
|
||||
; GFX7-NEXT: s_mov_b32 s1, 4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 2
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s0
|
||||
; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
@ -1428,13 +1388,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
|
||||
;
|
||||
; GFX12-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_mov_b32 s0, 0
|
||||
; GFX12-NEXT: s_mov_b32 s1, 4
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, 2
|
||||
; GFX12-NEXT: global_wb scope:SCOPE_DEV
|
||||
; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
|
||||
@ -1549,15 +1504,13 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(ptr addrspace(1) inreg
|
||||
define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) {
|
||||
; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s4, 0
|
||||
; GFX6-NEXT: s_mov_b32 s5, 4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, 4
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
@ -1567,15 +1520,13 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1)
|
||||
;
|
||||
; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0
|
||||
; GFX7-NEXT: s_mov_b32 s5, 4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v4, 4
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
@ -1649,8 +1600,8 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, v2
|
||||
; GFX6-NEXT: s_mov_b32 s1, 4
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s0
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
@ -1663,8 +1614,8 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v4, v2
|
||||
; GFX7-NEXT: s_mov_b32 s1, 4
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s0
|
||||
; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
@ -1673,13 +1624,9 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
|
||||
;
|
||||
; GFX12-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_mov_b32 s0, 0
|
||||
; GFX12-NEXT: s_mov_b32 s1, 4
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
|
||||
; GFX12-NEXT: v_mov_b32_e32 v4, v2
|
||||
; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v5, s0
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v5
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v6, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
|
||||
; GFX12-NEXT: global_wb scope:SCOPE_DEV
|
||||
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[0:1], v[3:4], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
|
||||
@ -218,6 +218,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
|
||||
; CHECK-NEXT: s_subb_u32 s5, 0, s11
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9]
|
||||
; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
|
||||
; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
|
||||
; CHECK-NEXT: v_trunc_f32_e32 v2, v1
|
||||
@ -326,10 +327,9 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9]
|
||||
; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
|
||||
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||
; CHECK-NEXT: s_mov_b32 s0, 0
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
|
||||
@ -193,7 +193,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: s_mov_b32 s7, -1
|
||||
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0
|
||||
; CHECK-NEXT: s_mov_b32 s0, 1
|
||||
; CHECK-NEXT: s_mov_b32 s7, 1
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
|
||||
@ -212,6 +212,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
|
||||
; CHECK-NEXT: s_subb_u32 s5, 0, s9
|
||||
; CHECK-NEXT: s_mov_b32 s7, 0
|
||||
; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
|
||||
; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
|
||||
; CHECK-NEXT: v_trunc_f32_e32 v2, v1
|
||||
@ -272,43 +273,43 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
|
||||
; CHECK-NEXT: v_mul_lo_u32 v2, s11, v0
|
||||
; CHECK-NEXT: v_mul_lo_u32 v3, s10, v1
|
||||
; CHECK-NEXT: v_mul_hi_u32 v4, s10, v0
|
||||
; CHECK-NEXT: v_mul_hi_u32 v5, s10, v0
|
||||
; CHECK-NEXT: v_mul_hi_u32 v0, s11, v0
|
||||
; CHECK-NEXT: v_mul_hi_u32 v5, s11, v1
|
||||
; CHECK-NEXT: v_mul_hi_u32 v6, s11, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CHECK-NEXT: v_mul_lo_u32 v4, s11, v1
|
||||
; CHECK-NEXT: v_mul_lo_u32 v5, s11, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
||||
; CHECK-NEXT: v_mul_hi_u32 v3, s10, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v0, v2
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v5, s11
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s11
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s10, v0
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s9
|
||||
; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, s9
|
||||
; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v3, v1, vcc
|
||||
; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s11, v1
|
||||
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1]
|
||||
; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0
|
||||
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1]
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2
|
||||
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[0:1]
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0
|
||||
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s9, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v5, s[0:1]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v3
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
|
||||
@ -321,12 +322,11 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
|
||||
; CHECK-NEXT: s_mov_b32 s0, 0
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: .LBB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_xor_b32 s0, s0, 1
|
||||
; CHECK-NEXT: s_xor_b32 s0, s7, 1
|
||||
; CHECK-NEXT: s_and_b32 s0, s0, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
|
||||
|
||||
@ -191,7 +191,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: s_mov_b32 s7, -1
|
||||
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
|
||||
; CHECK-NEXT: s_mov_b32 s4, 1
|
||||
; CHECK-NEXT: s_mov_b32 s6, 1
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
@ -199,6 +199,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3
|
||||
; CHECK-NEXT: s_sub_u32 s4, 0, s2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s1
|
||||
; CHECK-NEXT: s_mov_b32 s6, 0
|
||||
; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2
|
||||
; CHECK-NEXT: s_subb_u32 s5, 0, s3
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
|
||||
@ -317,12 +318,11 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: .LBB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_xor_b32 s1, s4, 1
|
||||
; CHECK-NEXT: s_xor_b32 s1, s6, 1
|
||||
; CHECK-NEXT: s_and_b32 s1, s1, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
|
||||
|
||||
@ -188,13 +188,14 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: s_mov_b32 s7, -1
|
||||
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
|
||||
; CHECK-NEXT: s_mov_b32 s4, 1
|
||||
; CHECK-NEXT: s_mov_b32 s6, 1
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s3
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3
|
||||
; CHECK-NEXT: s_sub_u32 s4, 0, s2
|
||||
; CHECK-NEXT: s_mov_b32 s6, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s1
|
||||
; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2
|
||||
; CHECK-NEXT: s_subb_u32 s5, 0, s3
|
||||
@ -313,12 +314,11 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: .LBB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_xor_b32 s1, s4, 1
|
||||
; CHECK-NEXT: s_xor_b32 s1, s6, 1
|
||||
; CHECK-NEXT: s_and_b32 s1, s1, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -310,8 +310,8 @@ ret:
|
||||
|
||||
; GFX11-LABEL: tied_operand_test:
|
||||
; GFX11: ; %bb.0: ; %entry
|
||||
; GFX11-DAG: scratch_load_u16 [[LDRESULT:v[0-9]+]], off, off
|
||||
; GFX11-DAG: v_mov_b32_e32 [[C:v[0-9]+]], 0x7b
|
||||
; GFX11: scratch_load_u16 [[LDRESULT:v[0-9]+]], off, off
|
||||
; GFX11: v_dual_mov_b32 [[C:v[0-9]+]], 0x7b :: v_dual_mov_b32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX11-DAG: ds_store_b16 v{{[0-9]+}}, [[LDRESULT]] offset:10
|
||||
; GFX11-DAG: ds_store_b16 v{{[0-9]+}}, [[C]] offset:8
|
||||
; GFX11-NEXT: s_endpgm
|
||||
|
||||
@ -974,19 +974,19 @@ define amdgpu_kernel void @bit4_inselt(ptr addrspace(1) %out, <4 x i1> %vec, i32
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_and_b32 s3, s3, 3
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GCN-NEXT: v_lshrrev_b16_e64 v2, 1, s2
|
||||
; GCN-NEXT: v_lshrrev_b16_e64 v3, 2, s2
|
||||
; GCN-NEXT: v_lshrrev_b16_e64 v4, 3, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s2
|
||||
; GCN-NEXT: v_lshrrev_b16_e64 v4, 2, s2
|
||||
; GCN-NEXT: v_lshrrev_b16_e64 v5, 3, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 1
|
||||
; GCN-NEXT: v_or_b32_e32 v0, s3, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
|
||||
; GCN-NEXT: v_and_b32_e32 v3, 3, v3
|
||||
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
|
||||
; GCN-NEXT: buffer_store_byte v1, off, s[12:15], 0
|
||||
; GCN-NEXT: buffer_store_byte v4, off, s[12:15], 0 offset:3
|
||||
; GCN-NEXT: buffer_store_byte v3, off, s[12:15], 0 offset:2
|
||||
; GCN-NEXT: v_and_b32_e32 v4, 3, v4
|
||||
; GCN-NEXT: v_and_b32_e32 v5, 1, v5
|
||||
; GCN-NEXT: buffer_store_byte v3, off, s[12:15], 0
|
||||
; GCN-NEXT: buffer_store_byte v5, off, s[12:15], 0 offset:3
|
||||
; GCN-NEXT: buffer_store_byte v4, off, s[12:15], 0 offset:2
|
||||
; GCN-NEXT: buffer_store_byte v2, off, s[12:15], 0 offset:1
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 1
|
||||
; GCN-NEXT: buffer_store_byte v1, v0, s[12:15], 0 offen
|
||||
; GCN-NEXT: buffer_load_ubyte v0, off, s[12:15], 0
|
||||
; GCN-NEXT: buffer_load_ubyte v1, off, s[12:15], 0 offset:1
|
||||
|
||||
@ -94,14 +94,14 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) {
|
||||
; CHECK-LABEL: module_0_kernel_normal_extern_normal:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; CHECK-NEXT: s_add_i32 s0, s0, 4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b32 v2, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b32 v2, v1
|
||||
; CHECK-NEXT: s_endpgm
|
||||
store i16 2, ptr addrspace(3) @kernel_normal
|
||||
|
||||
@ -134,14 +134,14 @@ define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_add_i32 s4, s4, 4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
|
||||
; CHECK-NEXT: ds_write_b32 v3, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
|
||||
; CHECK-NEXT: ds_write_b32 v3, v1
|
||||
; CHECK-NEXT: s_endpgm
|
||||
call void @use_module()
|
||||
store i16 1, ptr addrspace(3) @module_variable
|
||||
@ -157,14 +157,14 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) {
|
||||
; CHECK-LABEL: module_0_kernel_overalign_extern_normal:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; CHECK-NEXT: s_add_i32 s0, s0, 4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b32 v2, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b32 v2, v1
|
||||
; CHECK-NEXT: s_endpgm
|
||||
store i16 2, ptr addrspace(3) @kernel_overalign
|
||||
|
||||
@ -197,14 +197,14 @@ define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) {
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
|
||||
; CHECK-NEXT: ds_write_b32 v3, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
|
||||
; CHECK-NEXT: ds_write_b32 v3, v1
|
||||
; CHECK-NEXT: s_endpgm
|
||||
call void @use_module()
|
||||
store i16 1, ptr addrspace(3) @module_variable
|
||||
@ -220,14 +220,14 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) {
|
||||
; CHECK-LABEL: module_0_kernel_normal_extern_overalign:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; CHECK-NEXT: s_add_i32 s0, s0, 8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b32 v2, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b32 v2, v1
|
||||
; CHECK-NEXT: s_endpgm
|
||||
store i16 2, ptr addrspace(3) @kernel_normal
|
||||
|
||||
@ -260,14 +260,14 @@ define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) {
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
|
||||
; CHECK-NEXT: ds_write_b32 v3, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
|
||||
; CHECK-NEXT: ds_write_b32 v3, v1
|
||||
; CHECK-NEXT: s_endpgm
|
||||
call void @use_module()
|
||||
store i16 1, ptr addrspace(3) @module_variable
|
||||
@ -283,14 +283,14 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx)
|
||||
; CHECK-LABEL: module_0_kernel_overalign_extern_overalign:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; CHECK-NEXT: s_add_i32 s0, s0, 8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b32 v2, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b32 v2, v1
|
||||
; CHECK-NEXT: s_endpgm
|
||||
store i16 2, ptr addrspace(3) @kernel_overalign
|
||||
|
||||
@ -323,14 +323,14 @@ define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx)
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
|
||||
; CHECK-NEXT: ds_write_b32 v3, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
|
||||
; CHECK-NEXT: ds_write_b32 v3, v1
|
||||
; CHECK-NEXT: s_endpgm
|
||||
call void @use_module()
|
||||
store i16 1, ptr addrspace(3) @module_variable
|
||||
@ -368,11 +368,11 @@ define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_normal(i32 %id
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: s_mov_b32 s15, 0
|
||||
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; CHECK-NEXT: ds_write_b16 v3, v4
|
||||
; CHECK-NEXT: ds_write_b16 v4, v3
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
@ -408,12 +408,12 @@ define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_normal(i32 %id
|
||||
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||||
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
@ -445,11 +445,11 @@ define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_normal(i32
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: s_mov_b32 s15, 2
|
||||
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; CHECK-NEXT: ds_write_b16 v3, v4
|
||||
; CHECK-NEXT: ds_write_b16 v4, v3
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
@ -485,12 +485,12 @@ define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_normal(i32
|
||||
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||||
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
@ -522,11 +522,11 @@ define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_overalign(i32
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: s_mov_b32 s15, 1
|
||||
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; CHECK-NEXT: ds_write_b16 v3, v4
|
||||
; CHECK-NEXT: ds_write_b16 v4, v3
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
@ -562,12 +562,12 @@ define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_overalign(i32
|
||||
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||||
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
@ -599,11 +599,11 @@ define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_overalign(i
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: s_mov_b32 s15, 3
|
||||
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; CHECK-NEXT: ds_write_b16 v3, v4
|
||||
; CHECK-NEXT: ds_write_b16 v4, v3
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
@ -639,12 +639,12 @@ define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i
|
||||
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||||
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||||
; CHECK-NEXT: ds_write_b16 v0, v1
|
||||
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
|
||||
; CHECK-NEXT: ds_write_b16 v1, v0
|
||||
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
||||
@ -19,18 +19,18 @@ $_f2 = comdat any
|
||||
define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 {
|
||||
; GCN-LABEL: test:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 2
|
||||
; GCN-NEXT: ds_write_b8 v0, v1
|
||||
; GCN-NEXT: ds_read_u8 v2, v0 offset:2
|
||||
; GCN-NEXT: ds_read_u16 v3, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: ds_write_b8 v1, v0
|
||||
; GCN-NEXT: ds_read_u8 v2, v1 offset:2
|
||||
; GCN-NEXT: ds_read_u16 v3, v1
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b8 v0, v2 offset:6
|
||||
; GCN-NEXT: ds_write_b16 v0, v3 offset:4
|
||||
; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v1 src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
|
||||
; GCN-NEXT: global_store_byte v0, v1, s[0:1]
|
||||
; GCN-NEXT: ds_write_b8 v1, v2 offset:6
|
||||
; GCN-NEXT: ds_write_b16 v1, v3 offset:4
|
||||
; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v0 src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
|
||||
; GCN-NEXT: global_store_byte v1, v0, s[0:1]
|
||||
; GCN-NEXT: s_endpgm
|
||||
; CHECK-LABEL: define protected amdgpu_kernel void @test(
|
||||
; CHECK-SAME: ptr addrspace(1) nocapture [[PTR_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
||||
@ -47,7 +47,6 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce
|
||||
; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP2]] to i8
|
||||
; CHECK-NEXT: store i8 [[FROMBOOL8]], ptr addrspace(1) [[PTR_COERCE]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
store i8 3, ptr addrspace(3) @_f1, align 1
|
||||
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)
|
||||
|
||||
@ -17,10 +17,9 @@ define void @fence_loads(ptr %ptr) {
|
||||
; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1
|
||||
; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4)
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1, mmra !2
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, mmra !2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], mmra !2
|
||||
; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[COPY4]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
|
||||
; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
fence release, !mmra !0
|
||||
%ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -924,12 +924,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -937,12 +933,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -950,13 +942,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
|
||||
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -968,13 +955,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x7ff
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1028,39 +1010,26 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
|
||||
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1072,13 +1041,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x800
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1092,15 +1056,6 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
|
||||
; GFX10-SDAG: ; %bb.0:
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
|
||||
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
|
||||
; GFX11-SDAG: ; %bb.0:
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -1132,12 +1087,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1145,12 +1096,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1158,13 +1105,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
|
||||
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1176,13 +1118,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0xfff
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1233,45 +1170,32 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
|
||||
|
||||
; Fill 12-bit low-bits (1ull << 33) | 4096
|
||||
define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
|
||||
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
|
||||
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
@ -1280,44 +1204,12 @@ define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1000
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX9-SDAG: ; %bb.0:
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
|
||||
; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
|
||||
; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX10-SDAG: ; %bb.0:
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX11-SDAG: ; %bb.0:
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
@ -1340,12 +1232,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1353,12 +1241,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1366,13 +1250,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
|
||||
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1384,13 +1263,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1fff
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1441,45 +1315,32 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
|
||||
|
||||
; Fill 13-bit low-bits (1ull << 33) | 8192
|
||||
define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
|
||||
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
|
||||
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
@ -1488,44 +1349,12 @@ define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x2000
|
||||
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX9-SDAG: ; %bb.0:
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
|
||||
; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
|
||||
; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX10-SDAG: ; %bb.0:
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX11-SDAG: ; %bb.0:
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
|
||||
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
@ -1548,12 +1377,9 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
|
||||
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1561,12 +1387,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
|
||||
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1574,13 +1396,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
|
||||
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1592,13 +1409,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x7ff
|
||||
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1653,39 +1465,27 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
|
||||
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
|
||||
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
|
||||
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1697,13 +1497,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x800
|
||||
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1718,15 +1513,6 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
|
||||
; GFX10-SDAG: ; %bb.0:
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
|
||||
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
|
||||
; GFX11-SDAG: ; %bb.0:
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -1758,12 +1544,9 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
|
||||
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1771,12 +1554,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
|
||||
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1784,13 +1563,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
|
||||
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1802,13 +1576,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0xfff
|
||||
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1863,42 +1632,30 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
|
||||
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
|
||||
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
|
||||
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
@ -1907,13 +1664,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1000
|
||||
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1928,24 +1680,6 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX10-SDAG: ; %bb.0:
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX11-SDAG: ; %bb.0:
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
|
||||
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
@ -1968,12 +1702,9 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
|
||||
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1981,12 +1712,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
|
||||
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1994,13 +1721,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
|
||||
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -2012,13 +1734,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1fff
|
||||
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -2073,42 +1790,30 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
|
||||
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
|
||||
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
|
||||
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
|
||||
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
|
||||
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX11-GISEL: ; %bb.0:
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
|
||||
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
@ -2117,13 +1822,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x2000
|
||||
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -2138,24 +1838,6 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
|
||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX10-SDAG: ; %bb.0:
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX11-SDAG: ; %bb.0:
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
|
||||
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
|
||||
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
|
||||
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
|
||||
@ -1008,10 +1008,10 @@ define double @v_roundeven_f64(double %x) {
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
|
||||
; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3]
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
|
||||
; GFX6-NEXT: v_mov_b32_e32 v6, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v7, 0x432fffff
|
||||
; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
|
||||
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
|
||||
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1087,17 +1087,17 @@ define double @v_roundeven_f64_fneg(double %x) {
|
||||
; GFX6-LABEL: v_roundeven_f64_fneg:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
|
||||
; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v6
|
||||
; GFX6-NEXT: v_xor_b32_e32 v8, 0x80000000, v1
|
||||
; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v8
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
|
||||
; GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3]
|
||||
; GFX6-NEXT: s_mov_b32 s4, -1
|
||||
; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
|
||||
; GFX6-NEXT: v_mov_b32_e32 v6, -1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v7, 0x432fffff
|
||||
; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
|
||||
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
|
||||
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX7-LABEL: v_roundeven_f64_fneg:
|
||||
|
||||
@ -1,10 +1,8 @@
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-OPT %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-NOOPT %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}scalar_to_vector_i16:
|
||||
; GCN-NOOPT: s_mov_b32 [[S:s[0-9]+]], 42
|
||||
; GCN-NOOPT: v_mov_b32_e32 [[V:v[0-9]+]], [[S]]
|
||||
; GCN-OPT: v_mov_b32_e32 [[V:v[0-9]+]], 42
|
||||
; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 42
|
||||
; GCN: buffer_store_short [[V]],
|
||||
define void @scalar_to_vector_i16() {
|
||||
%tmp = load <2 x i16>, ptr addrspace(5) undef
|
||||
@ -14,9 +12,7 @@ define void @scalar_to_vector_i16() {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}scalar_to_vector_f16:
|
||||
; GCN-NOOPT: s_mov_b32 [[S:s[0-9]+]], 0x3c00
|
||||
; GCN-NOOPT: v_mov_b32_e32 [[V:v[0-9]+]], [[S]]
|
||||
; GCN-OPT: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00
|
||||
; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00
|
||||
; GCN: buffer_store_short [[V]],
|
||||
define void @scalar_to_vector_f16() {
|
||||
%tmp = load <2 x half>, ptr addrspace(5) undef
|
||||
|
||||
@ -25,8 +25,7 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() {
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr4
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: ds_write_b8 v1, v2
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], exec
|
||||
; CHECK-NEXT: v_writelane_b32 v0, s4, 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user