AMDGPU/GlobalISel: Partially move constant selection to patterns (#100786)

This is still relying on the manual code for splitting 64-bit
constants, and handling pointers.

We were missing some of the tablegen patterns for all immediate types,
so this has some side effect DAG path improvements. This also reduces
the diff in the 2 selector outputs.
This commit is contained in:
Matt Arsenault 2024-07-30 18:18:16 +04:00 committed by GitHub
parent b4444dca47
commit b356aa3e2d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
38 changed files with 2246 additions and 1789 deletions

View File

@ -398,8 +398,10 @@ def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm">,
def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
GISDNodeXFormEquiv<NegateImm>;
def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">,
def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastFPImm32">,
GISDNodeXFormEquiv<bitcast_fpimm_to_i32>;
def gi_bitcast_fpimm_to_i64 : GICustomOperandRenderer<"renderBitcastFPImm64">,
GISDNodeXFormEquiv<bitcast_fpimm_to_i64>;
def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">,
GISDNodeXFormEquiv<IMMPopCount>;

View File

@ -2504,10 +2504,19 @@ bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
}
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
if (selectImpl(I, *CoverageInfo))
return true;
// FIXME: Relying on manual selection for 64-bit case, and pointer typed
// constants.
MachineBasicBlock *BB = I.getParent();
MachineOperand &ImmOp = I.getOperand(1);
Register DstReg = I.getOperand(0).getReg();
unsigned Size = MRI->getType(DstReg).getSizeInBits();
LLT Ty = MRI->getType(DstReg);
unsigned Size = Ty.getSizeInBits();
assert((Size == 64 || Ty.isPointer()) &&
"patterns should have selected this");
bool IsFP = false;
// The AMDGPU backend only supports Imm operands and not CImm or FPImm.
@ -5606,18 +5615,12 @@ void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
}
void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx == -1);
void AMDGPUInstructionSelector::renderBitcastFPImm(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
const MachineOperand &Op = MI.getOperand(1);
if (MI.getOpcode() == TargetOpcode::G_FCONSTANT)
MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
else {
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
MIB.addImm(Op.getCImm()->getSExtValue());
}
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
}
void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,

View File

@ -333,8 +333,17 @@ private:
void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderBitcastFPImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderBitcastFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const {
renderBitcastFPImm(MIB, MI, OpIdx);
}
void renderBitcastFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const {
renderBitcastFPImm(MIB, MI, OpIdx);
}
void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

View File

@ -828,7 +828,9 @@ def InlineImmFP64 : FPImmLeaf<f64, [{
class VGPRImm <dag frag> : PatLeaf<frag, [{
return isVGPRImm(N);
}]>;
}]> {
let GISelPredicateCode = [{return true;}];
}
def NegateImm : SDNodeXForm<imm, [{
return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);

View File

@ -2163,18 +2163,44 @@ def : GCNPat <
(S_MOV_B32 $ga)
>;
// FIXME: Workaround for ordering issue with peephole optimizer where
// a register class copy interferes with immediate folding. Should
// use s_mov_b32, which can be shrunk to s_movk_i32
def : GCNPat <
(VGPRImm<(f16 fpimm)>:$imm),
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
>;
foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
let True16Predicate = pred in {
def : GCNPat <
(VGPRImm<(i16 imm)>:$imm),
(V_MOV_B32_e32 imm:$imm)
>;
}
def : GCNPat <
(VGPRImm<(bf16 fpimm)>:$imm),
(V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
>;
// FIXME: Workaround for ordering issue with peephole optimizer where
// a register class copy interferes with immediate folding. Should
// use s_mov_b32, which can be shrunk to s_movk_i32
def : GCNPat <
(VGPRImm<(f16 fpimm)>:$imm),
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
>;
def : GCNPat <
(VGPRImm<(bf16 fpimm)>:$imm),
(V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
>;
}
let True16Predicate = UseRealTrue16Insts in {
def : GCNPat <
(VGPRImm<(i16 imm)>:$imm),
(V_MOV_B16_t16_e64 0, imm:$imm, 0)
>;
def : GCNPat <
(VGPRImm<(f16 fpimm)>:$imm),
(V_MOV_B16_t16_e64 0, $imm, 0)
>;
def : GCNPat <
(VGPRImm<(bf16 fpimm)>:$imm),
(V_MOV_B16_t16_e64 0, $imm, 0)
>;
}
// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
// immediate and wil be expanded as needed, but we will only use these patterns
@ -2229,20 +2255,15 @@ def : GCNPat <
(S_MOV_B64 InlineImm64:$imm)
>;
// XXX - Should this use a s_cmp to set SCC?
// Set to sign-extended 64-bit value (true = -1, false = 0)
def : GCNPat <
(i1 imm:$imm),
(S_MOV_B64 (i64 (as_i64imm $imm)))
> {
// Set to sign-extended 64-bit value (true = -1, false = 0)
def : GCNPat <(i1 imm:$imm),
(S_MOV_B64 imm:$imm)> {
let WaveSizePredicate = isWave64;
}
def : GCNPat <
(i1 imm:$imm),
(S_MOV_B32 (i32 (as_i32imm $imm)))
> {
def : GCNPat <(i1 imm:$imm),
(S_MOV_B32 imm:$imm)> {
let WaveSizePredicate = isWave32;
}

View File

@ -501,8 +501,8 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB4_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@ -710,8 +710,8 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v2
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB5_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@ -936,7 +936,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@ -1150,7 +1150,7 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)

View File

@ -501,8 +501,8 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB4_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@ -710,8 +710,8 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v2
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB5_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@ -936,7 +936,7 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@ -1150,7 +1150,7 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)

View File

@ -226,15 +226,16 @@ exit:
define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3 x i32> inreg %.WorkgroupId, <3 x i32> %.LocalInvocationId) #0 {
; GFX10-LABEL: single_lane_execution_attribute:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_mov_b32 s6, 0
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_mov_b32 s12, 0
; GFX10-NEXT: s_mov_b32 s13, -1
; GFX10-NEXT: s_mov_b32 s2, s0
; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], s[12:13]
; GFX10-NEXT: s_mov_b32 s3, s12
; GFX10-NEXT: s_mov_b32 s7, -1
; GFX10-NEXT: s_mov_b32 s2, s1
; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX10-NEXT: s_mov_b32 s1, 0
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0
; GFX10-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
; GFX10-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
; GFX10-NEXT: s_or_b64 s[12:13], s[4:5], s[0:1]
; GFX10-NEXT: s_mov_b32 s3, -1
; GFX10-NEXT: s_load_dwordx8 s[4:11], s[12:13], 0x0
; GFX10-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1
; GFX10-NEXT: v_and_b32_e32 v3, 1, v1
@ -248,8 +249,8 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2
; GFX10-NEXT: s_cbranch_vccnz .LBB4_4
; GFX10-NEXT: ; %bb.1: ; %.preheader.preheader
; GFX10-NEXT: v_mov_b32_e32 v3, s12
; GFX10-NEXT: v_mov_b32_e32 v4, s12
; GFX10-NEXT: v_mov_b32_e32 v3, s1
; GFX10-NEXT: v_mov_b32_e32 v4, s1
; GFX10-NEXT: .LBB4_2: ; %.preheader
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: buffer_load_dword v5, v3, s[4:7], 0 offen
@ -261,17 +262,17 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
; GFX10-NEXT: s_cbranch_vccnz .LBB4_2
; GFX10-NEXT: ; %bb.3: ; %.preheader._crit_edge
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v2
; GFX10-NEXT: s_mov_b32 s13, 0
; GFX10-NEXT: s_or_b32 s2, s0, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
; GFX10-NEXT: s_mov_b32 s3, 0
; GFX10-NEXT: s_or_b32 s1, s0, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s1
; GFX10-NEXT: .LBB4_4: ; %Flow
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s13
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s3
; GFX10-NEXT: s_cbranch_vccz .LBB4_6
; GFX10-NEXT: ; %bb.5: ; %.19
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
; GFX10-NEXT: v_or_b32_e32 v3, 2, v1
; GFX10-NEXT: .LBB4_6: ; %.22
; GFX10-NEXT: v_add_lshl_u32 v0, v0, s1, 2
; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, 2
; GFX10-NEXT: buffer_store_dword v3, v0, s[8:11], 0 offen
; GFX10-NEXT: s_endpgm
.entry:

View File

@ -193,12 +193,12 @@ bb12:
define amdgpu_kernel void @break_loop(i32 %arg) {
; CHECK-LABEL: break_loop:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_load_dword s2, s[6:7], 0x0
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
; CHECK-NEXT: ; implicit-def: $vgpr1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
; CHECK-NEXT: v_subrev_u32_e32 v0, s0, v0
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_branch .LBB5_3
; CHECK-NEXT: .LBB5_1: ; %bb4
; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1

View File

@ -8,9 +8,9 @@ define double @v_floor_f64_ieee(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@ -31,9 +31,9 @@ define double @v_floor_f64_ieee_nnan(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@ -51,9 +51,9 @@ define double @v_floor_f64_ieee_fneg(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@ -75,9 +75,9 @@ define double @v_floor_f64_nonieee(double %x) #1 {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@ -98,9 +98,9 @@ define double @v_floor_f64_nonieee_nnan(double %x) #1 {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@ -118,9 +118,9 @@ define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@ -142,9 +142,9 @@ define double @v_floor_f64_fabs(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]|
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@ -166,9 +166,9 @@ define double @v_floor_f64_fneg_fabs(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]|
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@ -190,9 +190,9 @@ define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) {
; GFX6-LABEL: s_floor_f64:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3]
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
; GFX6-NEXT: v_mov_b32_e32 v2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
@ -214,9 +214,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3]
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
; GFX6-NEXT: v_mov_b32_e32 v2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
@ -239,9 +239,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]|
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
; GFX6-NEXT: v_mov_b32_e32 v2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
@ -264,9 +264,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fneg_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]|
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
; GFX6-NEXT: v_mov_b32_e32 v2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3

View File

@ -434,8 +434,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
; GFX90A_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
; GFX90A_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
; GFX90A_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
; GFX90A_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
; GFX90A_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
@ -446,8 +446,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
; GFX90A_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX90A_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
; GFX90A_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
; GFX90A_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
; GFX90A_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
@ -615,8 +615,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
; GFX940_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
; GFX940_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
; GFX940_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
; GFX940_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
; GFX940_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
; GFX940_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
@ -627,8 +627,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
; GFX940_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
; GFX940_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX940_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
; GFX940_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
; GFX940_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
; GFX940_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
@ -837,8 +837,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
; GFX90A_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
; GFX90A_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
; GFX90A_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
; GFX90A_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
; GFX90A_ITERATIVE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
@ -849,8 +849,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
; GFX90A_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX90A_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
; GFX90A_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
; GFX90A_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
; GFX90A_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
@ -1089,8 +1089,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
; GFX940_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
; GFX940_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
; GFX940_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
; GFX940_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
; GFX940_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
; GFX940_ITERATIVE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
@ -1101,8 +1101,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
; GFX940_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
; GFX940_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX940_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
; GFX940_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
; GFX940_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
; GFX940_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc

View File

@ -142,13 +142,9 @@ body: |
; WAVE64-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec
; WAVE64-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec
; WAVE64-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
; WAVE64-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967296, implicit $exec
; WAVE64-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -68719453481, implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[V_MOV_B6]], implicit [[V_MOV_B7]]
;
; WAVE32-LABEL: name: constant_v_s64
; WAVE32: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
@ -157,13 +153,9 @@ body: |
; WAVE32-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec
; WAVE32-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec
; WAVE32-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
; WAVE32-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967296, implicit $exec
; WAVE32-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -68719453481, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[V_MOV_B6]], implicit [[V_MOV_B7]]
%0:vgpr(s64) = G_CONSTANT i64 0
%1:vgpr(s64) = G_CONSTANT i64 1
%2:vgpr(s64) = G_CONSTANT i64 -1
@ -184,34 +176,26 @@ tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: constant_s_s64
; WAVE64: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
; WAVE64-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27
; WAVE64-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255
; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16
; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; WAVE64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1
; WAVE64-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; WAVE64-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
; WAVE64-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27
; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -68719453481
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_3]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
;
; WAVE32-LABEL: name: constant_s_s64
; WAVE32: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
; WAVE32-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27
; WAVE32-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255
; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16
; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1
; WAVE32-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; WAVE32-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
; WAVE32-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27
; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -68719453481
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_3]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
%0:sgpr(s64) = G_CONSTANT i64 0
%1:sgpr(s64) = G_CONSTANT i64 1
%2:sgpr(s64) = G_CONSTANT i64 -1
@ -310,6 +294,195 @@ body: |
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
...
---
name: constant_s_p2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: constant_s_p2
; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
;
; WAVE32-LABEL: name: constant_s_p2
; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
%0:sgpr(p2) = G_CONSTANT i32 0
%1:sgpr(p2) = G_CONSTANT i32 1
%2:sgpr(p2) = G_CONSTANT i32 -1
%3:sgpr(p2) = G_CONSTANT i32 -54
%4:sgpr(p2) = G_CONSTANT i32 27
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
...
---
name: constant_v_p2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: constant_v_p2
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
;
; WAVE32-LABEL: name: constant_v_p2
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
%0:vgpr(p2) = G_CONSTANT i32 0
%1:vgpr(p2) = G_CONSTANT i32 1
%2:vgpr(p2) = G_CONSTANT i32 -1
%3:vgpr(p2) = G_CONSTANT i32 -54
%4:vgpr(p2) = G_CONSTANT i32 27
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
...
---
name: constant_s_p5
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: constant_s_p5
; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
;
; WAVE32-LABEL: name: constant_s_p5
; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
%0:sgpr(p5) = G_CONSTANT i32 0
%1:sgpr(p5) = G_CONSTANT i32 1
%2:sgpr(p5) = G_CONSTANT i32 -1
%3:sgpr(p5) = G_CONSTANT i32 -54
%4:sgpr(p5) = G_CONSTANT i32 27
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
...
---
name: constant_v_p5
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: constant_v_p5
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
;
; WAVE32-LABEL: name: constant_v_p5
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
%0:vgpr(p5) = G_CONSTANT i32 0
%1:vgpr(p5) = G_CONSTANT i32 1
%2:vgpr(p5) = G_CONSTANT i32 -1
%3:vgpr(p5) = G_CONSTANT i32 -54
%4:vgpr(p5) = G_CONSTANT i32 27
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
...
---
name: constant_s_p6
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: constant_s_p6
; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
;
; WAVE32-LABEL: name: constant_s_p6
; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
%0:sgpr(p6) = G_CONSTANT i32 0
%1:sgpr(p6) = G_CONSTANT i32 1
%2:sgpr(p6) = G_CONSTANT i32 -1
%3:sgpr(p6) = G_CONSTANT i32 -54
%4:sgpr(p6) = G_CONSTANT i32 27
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
...
---
name: constant_v_p6
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: constant_v_p6
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
;
; WAVE32-LABEL: name: constant_v_p6
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
%0:vgpr(p6) = G_CONSTANT i32 0
%1:vgpr(p6) = G_CONSTANT i32 1
%2:vgpr(p6) = G_CONSTANT i32 -1
%3:vgpr(p6) = G_CONSTANT i32 -54
%4:vgpr(p6) = G_CONSTANT i32 27
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
...
---
name: constant_s_p1
legalized: true

View File

@ -87,13 +87,13 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: fconstant_s_s64
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4607182418800017408
; GCN-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4620693217682128896
; GCN-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4611686018427387904
; GCN-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4601552919265804288
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B]]
; GCN-NEXT: $sgpr2_sgpr3 = COPY [[S_MOV_B1]]
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4607182418800017408
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4620693217682128896
; GCN-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -4611686018427387904
; GCN-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4601552919265804288
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B64_]]
; GCN-NEXT: $sgpr2_sgpr3 = COPY [[S_MOV_B]]
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B1]]
%0:sgpr(s64) = G_FCONSTANT double 1.0
%1:sgpr(s64) = G_FCONSTANT double 8.0
%2:sgpr(s64) = G_FCONSTANT double -2.0

View File

@ -1374,11 +1374,11 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
@ -1389,11 +1389,11 @@ body: |
; GFX7: liveins: $sgpr0_sgpr1
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
@ -1404,11 +1404,11 @@ body: |
; GFX8: liveins: $sgpr0_sgpr1
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
@ -1419,11 +1419,11 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
; GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
@ -1434,11 +1434,11 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
; GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1

View File

@ -741,17 +741,15 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -759,17 +757,15 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -777,17 +773,15 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -795,17 +789,15 @@ body: |
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
@ -831,35 +823,31 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
;
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -867,17 +855,15 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -885,17 +871,15 @@ body: |
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
@ -921,17 +905,15 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -939,17 +921,15 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -957,17 +937,15 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -975,17 +953,15 @@ body: |
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
@ -1010,17 +986,15 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -1028,17 +1002,15 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -1046,17 +1018,15 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@ -1064,17 +1034,15 @@ body: |
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1

View File

@ -45,9 +45,7 @@ regBankSelected: true
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
# Max immediate for CI
# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3
# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869180
# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
@ -59,9 +57,7 @@ regBankSelected: true
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
# Immediate overflow for CI
# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4
# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
# GCN: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869184
# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
@ -77,9 +73,7 @@ regBankSelected: true
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
# Overflow 32-bit byte offset
# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1
# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]

View File

@ -314,8 +314,8 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B]], implicit-def dead $scc
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def dead $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 0
@ -337,10 +337,8 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def dead $scc
; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1085102592571150096
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B]], implicit-def dead $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096
@ -362,9 +360,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
@ -387,9 +383,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@ -416,9 +410,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
@ -441,7 +433,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -2
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@ -468,7 +460,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -4
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@ -495,7 +487,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -8
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@ -522,7 +514,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -16
; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@ -743,17 +735,15 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1085102592571150096, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -1085102592571150096
%2:vgpr(p0) = G_PTRMASK %0, %1

View File

@ -906,28 +906,26 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_mov_b32 s16, 0xb36211c7
; GFX11-NEXT: s_mov_b32 s6, 2.0
; GFX11-NEXT: s_movk_i32 s17, 0x102
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v9, s16 :: v_dual_lshlrev_b32 v2, 2, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x34
; GFX11-NEXT: s_mov_b32 s8, 0x40400000
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v9, 0xb36211c7 :: v_dual_lshlrev_b32 v2, 2, v0
; GFX11-NEXT: s_mov_b32 s12, 0x40c00000
; GFX11-NEXT: s_mov_b32 s10, 0x40a00000
; GFX11-NEXT: s_mov_b32 s9, 4.0
; GFX11-NEXT: s_mov_b32 s14, 0x41000000
; GFX11-NEXT: s_mov_b32 s13, 0x40e00000
; GFX11-NEXT: v_dual_mov_b32 v10, s17 :: v_dual_mov_b32 v3, s8
; GFX11-NEXT: v_mov_b32_e32 v6, s12
; GFX11-NEXT: v_mov_b32_e32 v4, s9
; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v5, s10
; GFX11-NEXT: v_bfrev_b32_e32 v10, 4.0
; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v3, s8
; GFX11-NEXT: v_dual_mov_b32 v4, s9 :: v_dual_mov_b32 v7, s13
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v0, s4
; GFX11-NEXT: v_mov_b32_e32 v1, s5
; GFX11-NEXT: s_mov_b32 s4, 0
; GFX11-NEXT: s_mov_b32 s5, 1.0
; GFX11-NEXT: v_mov_b32_e32 v7, s13
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-NEXT: flat_load_b32 v11, v[0:1]
@ -1012,17 +1010,16 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_mov_b32 s12, 0xb36211c6
; GFX11-NEXT: s_mov_b32 s6, 2.0
; GFX11-NEXT: s_movk_i32 s13, 0x102
; GFX11-NEXT: s_mov_b32 s8, 0x42004600
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX11-NEXT: s_mov_b32 s9, 0x44004700
; GFX11-NEXT: s_mov_b32 s10, 0x45004800
; GFX11-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v3, s8
; GFX11-NEXT: v_mov_b32_e32 v7, s13
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x34
; GFX11-NEXT: v_dual_mov_b32 v4, s9 :: v_dual_mov_b32 v5, s10
; GFX11-NEXT: v_mov_b32_e32 v6, 0xb36211c6
; GFX11-NEXT: v_bfrev_b32_e32 v7, 4.0
; GFX11-NEXT: v_mov_b32_e32 v3, s8
; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s9
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX11-NEXT: s_mov_b32 s4, 0

View File

@ -326,7 +326,6 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[10:11], s[10:11] op_sel:[0,1]
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
@ -334,6 +333,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
; GCN-NEXT: v_mov_b32_e32 v0, 0

View File

@ -77,11 +77,12 @@ define double @v_rsq_clamp_f64(double %src) #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
; VI-NEXT: s_mov_b32 s4, -1
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s5, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: v_mov_b32_e32 v2, -1
; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: v_mov_b32_e32 v2, -1
; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_f64:
@ -92,13 +93,14 @@ define double @v_rsq_clamp_f64(double %src) #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
; GFX12-NEXT: s_mov_b32 s0, -1
; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
; GFX12-NEXT: v_mov_b32_e32 v2, -1
; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: v_mov_b32_e32 v2, -1
; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
ret double %rsq_clamp
@ -115,11 +117,12 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
; VI-NEXT: s_mov_b32 s4, -1
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s5, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: v_mov_b32_e32 v2, -1
; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: v_mov_b32_e32 v2, -1
; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_fabs_f64:
@ -130,13 +133,14 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
; GFX12-NEXT: s_mov_b32 s0, -1
; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
; GFX12-NEXT: v_mov_b32_e32 v2, -1
; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: v_mov_b32_e32 v2, -1
; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fabs.src = call double @llvm.fabs.f64(double %src)
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
@ -185,11 +189,12 @@ define double @v_rsq_clamp_undef_f64() #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s4, -1
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s5, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: v_mov_b32_e32 v2, -1
; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: v_mov_b32_e32 v2, -1
; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_undef_f64:
@ -200,13 +205,14 @@ define double @v_rsq_clamp_undef_f64() #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
; GFX12-NEXT: s_mov_b32 s0, -1
; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
; GFX12-NEXT: v_mov_b32_e32 v2, -1
; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: v_mov_b32_e32 v2, -1
; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
ret double %rsq_clamp
@ -254,11 +260,12 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
; VI-NEXT: s_mov_b32 s4, -1
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s5, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: v_mov_b32_e32 v2, -1
; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: v_mov_b32_e32 v2, -1
; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_f64_non_ieee:
@ -269,13 +276,14 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
; GFX12-NEXT: s_mov_b32 s0, -1
; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
; GFX12-NEXT: v_mov_b32_e32 v2, -1
; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: v_mov_b32_e32 v2, -1
; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
ret double %rsq_clamp

View File

@ -117,10 +117,8 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
; GCN-LABEL: set_inactive_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s4, 0xcccccccd
; GCN-NEXT: s_mov_b32 s5, 0x4010cccc
; GCN-NEXT: v_mov_b32_e32 v2, s4
; GCN-NEXT: v_mov_b32_e32 v3, s5
; GCN-NEXT: v_mov_b32_e32 v2, 0xcccccccd
; GCN-NEXT: v_mov_b32_e32 v3, 0x4010cccc
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3

View File

@ -7,12 +7,11 @@ declare void @llvm.memset.p1.i32(ptr addrspace(1), i8, i32, i1)
define amdgpu_cs void @memset_p1i8(ptr addrspace(1) %dst, i8 %val) {
; LOOP-LABEL: memset_p1i8:
; LOOP: ; %bb.0: ; %loadstoreloop.preheader
; LOOP-NEXT: s_mov_b64 s[4:5], 0
; LOOP-NEXT: s_mov_b64 s[0:1], 0
; LOOP-NEXT: s_mov_b32 s2, 0
; LOOP-NEXT: s_mov_b32 s3, 0xf000
; LOOP-NEXT: s_mov_b64 s[0:1], 0
; LOOP-NEXT: v_mov_b32_e32 v3, s4
; LOOP-NEXT: v_mov_b32_e32 v4, s5
; LOOP-NEXT: v_mov_b32_e32 v4, s1
; LOOP-NEXT: v_mov_b32_e32 v3, s0
; LOOP-NEXT: .LBB0_1: ; %loadstoreloop
; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1
; LOOP-NEXT: v_add_i32_e32 v5, vcc, v0, v3

View File

@ -76,29 +76,25 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(ptr addrspace(1) inreg %p
define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0
; GFX6-NEXT: s_mov_b32 s5, 4
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: v_mov_b32_e32 v0, 0
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: v_mov_b32_e32 v1, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, s4
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s4, 0
; GFX7-NEXT: s_mov_b32 s5, 4
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: v_mov_b32_e32 v1, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s4
; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_endpgm
;
@ -120,29 +116,25 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) in
define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 4
; GFX6-NEXT: s_mov_b32 s5, s4
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: v_mov_b32_e32 v0, 4
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: v_mov_b32_e32 v1, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s4, 4
; GFX7-NEXT: s_mov_b32 s5, s4
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: v_mov_b32_e32 v0, 4
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: v_mov_b32_e32 v1, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_endpgm
;
@ -234,9 +226,9 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, s0
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
;
@ -244,21 +236,16 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s0
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_endpgm
;
; GFX12-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_mov_b32 s0, 0
; GFX12-NEXT: s_mov_b32 s1, 4
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_nop 0
@ -273,7 +260,7 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 4
; GFX6-NEXT: s_mov_b32 s1, s0
; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
@ -283,7 +270,7 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 4
; GFX7-NEXT: s_mov_b32 s1, s0
; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
@ -292,13 +279,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
;
; GFX12-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_mov_b32 s0, 4
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_mov_b32 s1, s0
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 4
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_nop 0
@ -715,28 +697,24 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(ptr addrspace(1) inreg %p
define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0
; GFX6-NEXT: s_mov_b32 s5, 4
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: v_mov_b32_e32 v0, 0
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v1, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, s4
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s4, 0
; GFX7-NEXT: s_mov_b32 s5, 4
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v1, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s4
; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
@ -758,28 +736,24 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) in
define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 4
; GFX6-NEXT: s_mov_b32 s5, s4
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: v_mov_b32_e32 v0, 4
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v1, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s4, 4
; GFX7-NEXT: s_mov_b32 s5, s4
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: v_mov_b32_e32 v0, 4
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v1, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
@ -868,8 +842,8 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, s0
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
@ -878,21 +852,16 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s0
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_mov_b32 s0, 0
; GFX12-NEXT: s_mov_b32 s1, 4
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: global_load_b32 v0, v[0:1], off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
@ -905,7 +874,7 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 4
; GFX6-NEXT: s_mov_b32 s1, s0
; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
@ -915,7 +884,7 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 4
; GFX7-NEXT: s_mov_b32 s1, s0
; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
@ -924,13 +893,8 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
;
; GFX12-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_mov_b32 s0, 4
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_mov_b32 s1, s0
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 4
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: global_load_b32 v0, v[0:1], off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
@ -1307,15 +1271,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(ptr addrspace(1) inr
define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0
; GFX6-NEXT: s_mov_b32 s5, 4
; GFX6-NEXT: v_mov_b32_e32 v1, s4
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: v_mov_b32_e32 v0, 2
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, s4
; GFX6-NEXT: v_mov_b32_e32 v2, s5
; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
@ -1324,15 +1286,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(
;
; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s4, 0
; GFX7-NEXT: s_mov_b32 s5, 4
; GFX7-NEXT: v_mov_b32_e32 v1, s4
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: v_mov_b32_e32 v0, 2
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s4
; GFX7-NEXT: v_mov_b32_e32 v2, s5
; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
@ -1404,8 +1364,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: v_mov_b32_e32 v2, 2
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, s0
; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
@ -1418,8 +1378,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: v_mov_b32_e32 v2, 2
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s0
; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
@ -1428,13 +1388,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
;
; GFX12-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_mov_b32 s0, 0
; GFX12-NEXT: s_mov_b32 s1, 4
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: v_mov_b32_e32 v2, 2
; GFX12-NEXT: global_wb scope:SCOPE_DEV
; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
@ -1549,15 +1504,13 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(ptr addrspace(1) inreg
define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) {
; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0
; GFX6-NEXT: s_mov_b32 s5, 4
; GFX6-NEXT: v_mov_b32_e32 v3, s4
; GFX6-NEXT: v_mov_b32_e32 v3, 0
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: v_mov_b32_e32 v2, v0
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v4, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, s4
; GFX6-NEXT: v_mov_b32_e32 v4, s5
; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
@ -1567,15 +1520,13 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1)
;
; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s4, 0
; GFX7-NEXT: s_mov_b32 s5, 4
; GFX7-NEXT: v_mov_b32_e32 v3, s4
; GFX7-NEXT: v_mov_b32_e32 v3, 0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: v_mov_b32_e32 v2, v0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v4, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s4
; GFX7-NEXT: v_mov_b32_e32 v4, s5
; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
@ -1649,8 +1600,8 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: v_mov_b32_e32 v4, v2
; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, s0
; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
@ -1663,8 +1614,8 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: v_mov_b32_e32 v4, v2
; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, s0
; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
@ -1673,13 +1624,9 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
;
; GFX12-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_mov_b32 s0, 0
; GFX12-NEXT: s_mov_b32 s1, 4
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX12-NEXT: v_mov_b32_e32 v4, v2
; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v5, s0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v5
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v6, vcc_lo
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: global_wb scope:SCOPE_DEV
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[0:1], v[3:4], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0

View File

@ -218,6 +218,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
; CHECK-NEXT: s_subb_u32 s5, 0, s11
; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9]
; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; CHECK-NEXT: v_trunc_f32_e32 v2, v1
@ -326,10 +327,9 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; CHECK-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9]
; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1

View File

@ -193,7 +193,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_mov_b32 s7, -1
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0
; CHECK-NEXT: s_mov_b32 s0, 1
; CHECK-NEXT: s_mov_b32 s7, 1
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
@ -212,6 +212,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
; CHECK-NEXT: s_subb_u32 s5, 0, s9
; CHECK-NEXT: s_mov_b32 s7, 0
; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; CHECK-NEXT: v_trunc_f32_e32 v2, v1
@ -272,43 +273,43 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
; CHECK-NEXT: v_mul_lo_u32 v2, s11, v0
; CHECK-NEXT: v_mul_lo_u32 v3, s10, v1
; CHECK-NEXT: v_mul_hi_u32 v4, s10, v0
; CHECK-NEXT: v_mul_hi_u32 v5, s10, v0
; CHECK-NEXT: v_mul_hi_u32 v0, s11, v0
; CHECK-NEXT: v_mul_hi_u32 v5, s11, v1
; CHECK-NEXT: v_mul_hi_u32 v6, s11, v1
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_mul_lo_u32 v4, s11, v1
; CHECK-NEXT: v_mul_lo_u32 v5, s11, v1
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_mul_hi_u32 v3, s10, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v0, v2
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2]
; CHECK-NEXT: v_mov_b32_e32 v5, s11
; CHECK-NEXT: v_mov_b32_e32 v3, s11
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s10, v0
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
; CHECK-NEXT: v_mov_b32_e32 v3, s9
; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2]
; CHECK-NEXT: v_mov_b32_e32 v4, s9
; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v3, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s11, v1
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2
; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1]
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1]
; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[0:1]
; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s9, v1
; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v5, s[0:1]
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v3
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
@ -321,12 +322,11 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: s_xor_b32 s0, s0, 1
; CHECK-NEXT: s_xor_b32 s0, s7, 1
; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5

View File

@ -191,7 +191,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_mov_b32 s7, -1
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
; CHECK-NEXT: s_mov_b32 s4, 1
; CHECK-NEXT: s_mov_b32 s6, 1
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
@ -199,6 +199,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3
; CHECK-NEXT: s_sub_u32 s4, 0, s2
; CHECK-NEXT: v_mov_b32_e32 v3, s1
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2
; CHECK-NEXT: s_subb_u32 s5, 0, s3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
@ -317,12 +318,11 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: s_xor_b32 s1, s4, 1
; CHECK-NEXT: s_xor_b32 s1, s6, 1
; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5

View File

@ -188,13 +188,14 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_mov_b32 s7, -1
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
; CHECK-NEXT: s_mov_b32 s4, 1
; CHECK-NEXT: s_mov_b32 s6, 1
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: v_mov_b32_e32 v0, s3
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3
; CHECK-NEXT: s_sub_u32 s4, 0, s2
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: v_mov_b32_e32 v3, s1
; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2
; CHECK-NEXT: s_subb_u32 s5, 0, s3
@ -313,12 +314,11 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: s_xor_b32 s1, s4, 1
; CHECK-NEXT: s_xor_b32 s1, s6, 1
; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5

File diff suppressed because it is too large Load Diff

View File

@ -310,8 +310,8 @@ ret:
; GFX11-LABEL: tied_operand_test:
; GFX11: ; %bb.0: ; %entry
; GFX11-DAG: scratch_load_u16 [[LDRESULT:v[0-9]+]], off, off
; GFX11-DAG: v_mov_b32_e32 [[C:v[0-9]+]], 0x7b
; GFX11: scratch_load_u16 [[LDRESULT:v[0-9]+]], off, off
; GFX11: v_dual_mov_b32 [[C:v[0-9]+]], 0x7b :: v_dual_mov_b32 v{{[0-9]+}}, s{{[0-9]+}}
; GFX11-DAG: ds_store_b16 v{{[0-9]+}}, [[LDRESULT]] offset:10
; GFX11-DAG: ds_store_b16 v{{[0-9]+}}, [[C]] offset:8
; GFX11-NEXT: s_endpgm

View File

@ -974,19 +974,19 @@ define amdgpu_kernel void @bit4_inselt(ptr addrspace(1) %out, <4 x i1> %vec, i32
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_and_b32 s3, s3, 3
; GCN-NEXT: v_mov_b32_e32 v1, s2
; GCN-NEXT: v_lshrrev_b16_e64 v2, 1, s2
; GCN-NEXT: v_lshrrev_b16_e64 v3, 2, s2
; GCN-NEXT: v_lshrrev_b16_e64 v4, 3, s2
; GCN-NEXT: v_mov_b32_e32 v3, s2
; GCN-NEXT: v_lshrrev_b16_e64 v4, 2, s2
; GCN-NEXT: v_lshrrev_b16_e64 v5, 3, s2
; GCN-NEXT: v_mov_b32_e32 v1, 1
; GCN-NEXT: v_or_b32_e32 v0, s3, v0
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
; GCN-NEXT: v_and_b32_e32 v3, 3, v3
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
; GCN-NEXT: buffer_store_byte v1, off, s[12:15], 0
; GCN-NEXT: buffer_store_byte v4, off, s[12:15], 0 offset:3
; GCN-NEXT: buffer_store_byte v3, off, s[12:15], 0 offset:2
; GCN-NEXT: v_and_b32_e32 v4, 3, v4
; GCN-NEXT: v_and_b32_e32 v5, 1, v5
; GCN-NEXT: buffer_store_byte v3, off, s[12:15], 0
; GCN-NEXT: buffer_store_byte v5, off, s[12:15], 0 offset:3
; GCN-NEXT: buffer_store_byte v4, off, s[12:15], 0 offset:2
; GCN-NEXT: buffer_store_byte v2, off, s[12:15], 0 offset:1
; GCN-NEXT: v_mov_b32_e32 v1, 1
; GCN-NEXT: buffer_store_byte v1, v0, s[12:15], 0 offen
; GCN-NEXT: buffer_load_ubyte v0, off, s[12:15], 0
; GCN-NEXT: buffer_load_ubyte v1, off, s[12:15], 0 offset:1

View File

@ -94,14 +94,14 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) {
; CHECK-LABEL: module_0_kernel_normal_extern_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 2
; CHECK-NEXT: v_mov_b32_e32 v0, 2
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
; CHECK-NEXT: s_add_i32 s0, s0, 4
; CHECK-NEXT: v_mov_b32_e32 v2, s0
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b32 v2, v0
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: s_endpgm
store i16 2, ptr addrspace(3) @kernel_normal
@ -134,14 +134,14 @@ define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_add_i32 s4, s4, 4
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: v_mov_b32_e32 v3, s4
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
; CHECK-NEXT: ds_write_b32 v3, v0
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
; CHECK-NEXT: ds_write_b32 v3, v1
; CHECK-NEXT: s_endpgm
call void @use_module()
store i16 1, ptr addrspace(3) @module_variable
@ -157,14 +157,14 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) {
; CHECK-LABEL: module_0_kernel_overalign_extern_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 2
; CHECK-NEXT: v_mov_b32_e32 v0, 2
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
; CHECK-NEXT: s_add_i32 s0, s0, 4
; CHECK-NEXT: v_mov_b32_e32 v2, s0
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b32 v2, v0
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: s_endpgm
store i16 2, ptr addrspace(3) @kernel_overalign
@ -197,14 +197,14 @@ define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_add_i32 s4, s4, 8
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: v_mov_b32_e32 v3, s4
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
; CHECK-NEXT: ds_write_b32 v3, v0
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
; CHECK-NEXT: ds_write_b32 v3, v1
; CHECK-NEXT: s_endpgm
call void @use_module()
store i16 1, ptr addrspace(3) @module_variable
@ -220,14 +220,14 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) {
; CHECK-LABEL: module_0_kernel_normal_extern_overalign:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 2
; CHECK-NEXT: v_mov_b32_e32 v0, 2
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
; CHECK-NEXT: s_add_i32 s0, s0, 8
; CHECK-NEXT: v_mov_b32_e32 v2, s0
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b32 v2, v0
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: s_endpgm
store i16 2, ptr addrspace(3) @kernel_normal
@ -260,14 +260,14 @@ define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_add_i32 s4, s4, 8
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: v_mov_b32_e32 v3, s4
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
; CHECK-NEXT: ds_write_b32 v3, v0
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
; CHECK-NEXT: ds_write_b32 v3, v1
; CHECK-NEXT: s_endpgm
call void @use_module()
store i16 1, ptr addrspace(3) @module_variable
@ -283,14 +283,14 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx)
; CHECK-LABEL: module_0_kernel_overalign_extern_overalign:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 2
; CHECK-NEXT: v_mov_b32_e32 v0, 2
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
; CHECK-NEXT: s_add_i32 s0, s0, 8
; CHECK-NEXT: v_mov_b32_e32 v2, s0
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b32 v2, v0
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: s_endpgm
store i16 2, ptr addrspace(3) @kernel_overalign
@ -323,14 +323,14 @@ define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx)
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_add_i32 s4, s4, 8
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: v_mov_b32_e32 v3, s4
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
; CHECK-NEXT: ds_write_b32 v3, v0
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
; CHECK-NEXT: ds_write_b32 v3, v1
; CHECK-NEXT: s_endpgm
call void @use_module()
store i16 1, ptr addrspace(3) @module_variable
@ -368,11 +368,11 @@ define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_normal(i32 %id
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_mov_b32_e32 v4, 2
; CHECK-NEXT: v_mov_b32_e32 v3, 2
; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s15, 0
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
; CHECK-NEXT: ds_write_b16 v3, v4
; CHECK-NEXT: ds_write_b16 v4, v3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@ -408,12 +408,12 @@ define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_normal(i32 %id
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@ -445,11 +445,11 @@ define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_normal(i32
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_mov_b32_e32 v4, 2
; CHECK-NEXT: v_mov_b32_e32 v3, 2
; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s15, 2
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
; CHECK-NEXT: ds_write_b16 v3, v4
; CHECK-NEXT: ds_write_b16 v4, v3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@ -485,12 +485,12 @@ define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_normal(i32
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@ -522,11 +522,11 @@ define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_overalign(i32
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_mov_b32_e32 v4, 2
; CHECK-NEXT: v_mov_b32_e32 v3, 2
; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s15, 1
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
; CHECK-NEXT: ds_write_b16 v3, v4
; CHECK-NEXT: ds_write_b16 v4, v3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@ -562,12 +562,12 @@ define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_overalign(i32
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@ -599,11 +599,11 @@ define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_overalign(i
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_mov_b32_e32 v4, 2
; CHECK-NEXT: v_mov_b32_e32 v3, 2
; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s15, 3
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
; CHECK-NEXT: ds_write_b16 v3, v4
; CHECK-NEXT: ds_write_b16 v4, v3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@ -639,12 +639,12 @@ define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_mov_b32_e32 v1, 1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: ds_write_b16 v0, v1
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
; CHECK-NEXT: ds_write_b16 v1, v0
; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm

View File

@ -19,18 +19,18 @@ $_f2 = comdat any
define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 {
; GCN-LABEL: test:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2
; GCN-NEXT: ds_write_b8 v0, v1
; GCN-NEXT: ds_read_u8 v2, v0 offset:2
; GCN-NEXT: ds_read_u16 v3, v0
; GCN-NEXT: v_mov_b32_e32 v0, 2
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: ds_write_b8 v1, v0
; GCN-NEXT: ds_read_u8 v2, v1 offset:2
; GCN-NEXT: ds_read_u16 v3, v1
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: ds_write_b8 v0, v2 offset:6
; GCN-NEXT: ds_write_b16 v0, v3 offset:4
; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v1 src0_sel:BYTE_0 src1_sel:DWORD
; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
; GCN-NEXT: global_store_byte v0, v1, s[0:1]
; GCN-NEXT: ds_write_b8 v1, v2 offset:6
; GCN-NEXT: ds_write_b16 v1, v3 offset:4
; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v0 src0_sel:BYTE_0 src1_sel:DWORD
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
; GCN-NEXT: global_store_byte v1, v0, s[0:1]
; GCN-NEXT: s_endpgm
; CHECK-LABEL: define protected amdgpu_kernel void @test(
; CHECK-SAME: ptr addrspace(1) nocapture [[PTR_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
@ -47,7 +47,6 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce
; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP2]] to i8
; CHECK-NEXT: store i8 [[FROMBOOL8]], ptr addrspace(1) [[PTR_COERCE]], align 1
; CHECK-NEXT: ret void
;
entry:
store i8 3, ptr addrspace(3) @_f1, align 1
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)

View File

@ -17,10 +17,9 @@ define void @fence_loads(ptr %ptr) {
; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1
; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4)
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1, mmra !2
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, mmra !2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], mmra !2
; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[COPY4]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
; CHECK-NEXT: SI_RETURN
fence release, !mmra !0
%ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2

File diff suppressed because it is too large Load Diff

View File

@ -924,12 +924,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -937,12 +933,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -950,13 +942,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -968,13 +955,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x7ff
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1028,39 +1010,26 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1072,13 +1041,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x800
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1092,15 +1056,6 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -1132,12 +1087,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1145,12 +1096,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1158,13 +1105,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1176,13 +1118,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0xfff
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1233,45 +1170,32 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; Fill 12-bit low-bits (1ull << 33) | 4096
define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX12-GISEL: ; %bb.0:
@ -1280,44 +1204,12 @@ define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1000
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1340,12 +1232,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1353,12 +1241,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1366,13 +1250,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1384,13 +1263,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1fff
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1441,45 +1315,32 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; Fill 13-bit low-bits (1ull << 33) | 8192
define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX12-GISEL: ; %bb.0:
@ -1488,44 +1349,12 @@ define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x2000
; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1548,12 +1377,9 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1561,12 +1387,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1574,13 +1396,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1592,13 +1409,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x7ff
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1653,39 +1465,27 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1697,13 +1497,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x800
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1718,15 +1513,6 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@ -1758,12 +1544,9 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1771,12 +1554,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1784,13 +1563,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1802,13 +1576,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0xfff
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1863,42 +1632,30 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX12-GISEL: ; %bb.0:
@ -1907,13 +1664,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1000
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1928,24 +1680,6 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@ -1968,12 +1702,9 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1981,12 +1712,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -1994,13 +1721,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -2012,13 +1734,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1fff
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -2073,42 +1790,30 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX12-GISEL: ; %bb.0:
@ -2117,13 +1822,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x2000
; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@ -2138,24 +1838,6 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0

View File

@ -1008,10 +1008,10 @@ define double @v_roundeven_f64(double %x) {
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
; GFX6-NEXT: v_mov_b32_e32 v6, -1
; GFX6-NEXT: v_mov_b32_e32 v7, 0x432fffff
; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7]
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
@ -1087,17 +1087,17 @@ define double @v_roundeven_f64_fneg(double %x) {
; GFX6-LABEL: v_roundeven_f64_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v6
; GFX6-NEXT: v_xor_b32_e32 v8, 0x80000000, v1
; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v8
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
; GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
; GFX6-NEXT: v_mov_b32_e32 v6, -1
; GFX6-NEXT: v_mov_b32_e32 v7, 0x432fffff
; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7]
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_f64_fneg:

View File

@ -1,10 +1,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-OPT %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-NOOPT %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; GCN-LABEL: {{^}}scalar_to_vector_i16:
; GCN-NOOPT: s_mov_b32 [[S:s[0-9]+]], 42
; GCN-NOOPT: v_mov_b32_e32 [[V:v[0-9]+]], [[S]]
; GCN-OPT: v_mov_b32_e32 [[V:v[0-9]+]], 42
; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 42
; GCN: buffer_store_short [[V]],
define void @scalar_to_vector_i16() {
%tmp = load <2 x i16>, ptr addrspace(5) undef
@ -14,9 +12,7 @@ define void @scalar_to_vector_i16() {
}
; GCN-LABEL: {{^}}scalar_to_vector_f16:
; GCN-NOOPT: s_mov_b32 [[S:s[0-9]+]], 0x3c00
; GCN-NOOPT: v_mov_b32_e32 [[V:v[0-9]+]], [[S]]
; GCN-OPT: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00
; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00
; GCN: buffer_store_short [[V]],
define void @scalar_to_vector_f16() {
%tmp = load <2 x half>, ptr addrspace(5) undef

View File

@ -25,8 +25,7 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() {
; CHECK-NEXT: ; implicit-def: $sgpr4
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: v_mov_b32_e32 v2, s4
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: ds_write_b8 v1, v2
; CHECK-NEXT: s_mov_b64 s[4:5], exec
; CHECK-NEXT: v_writelane_b32 v0, s4, 0