From eb065bf028af8a1eb64dcf8f9ecb72138d221706 Mon Sep 17 00:00:00 2001 From: vangthao95 Date: Mon, 6 Apr 2026 10:22:11 -0700 Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for G_EXTRACT_VECTOR_ELT (#189144) --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 93 ++ .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 2 + .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 19 + .../AMDGPU/AMDGPURegBankLegalizeRules.h | 5 +- .../AMDGPU/GlobalISel/extractelement.ll | 1080 +++++++---------- .../regbankselect-extract-vector-elt.mir | 732 ++++++----- 6 files changed, 954 insertions(+), 977 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 46ff5342a7dd..008236535b1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -890,6 +890,95 @@ bool RegBankLegalizeHelper::lowerSplitBitCount64To32(MachineInstr &MI) { return true; } +bool RegBankLegalizeHelper::lowerExtrVecEltToSel(MachineInstr &MI) { + // Lower extract vector element to a compare-select chain: + // result = elt[0] + // for i in 1..N-1: + // result = (idx == i) ? elt[i] : result + // + // When the index is divergent, each lane may want a different element, so + // we must check every element per lane. + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Idx = MI.getOperand(2).getReg(); + + LLT VecTy = MRI.getType(Src); + LLT ScalarTy = VecTy.getScalarType(); + unsigned NumElts = VecTy.getNumElements(); + MachineRegisterInfo::VRegAttrs VgprRB_EltTy = {VgprRB, ScalarTy}; + + auto Unmerge = B.buildUnmerge(VgprRB_EltTy, Src); + + if (ScalarTy.getSizeInBits() == 32) { + Register PrevSelect = Unmerge.getReg(0); + for (unsigned I = 1; I < NumElts; ++I) { + auto IdxConst = B.buildConstant({SgprRB, MRI.getType(Idx)}, I); + auto Cmp = B.buildICmp(CmpInst::ICMP_EQ, VccRB_S1, Idx, IdxConst); + PrevSelect = + B.buildSelect(VgprRB_EltTy, Cmp, Unmerge.getReg(I), PrevSelect) + .getReg(0); + } + B.buildCopy(Dst, PrevSelect); + } else if (ScalarTy.getSizeInBits() == 64) { + auto InitUnmerge = B.buildUnmerge(VgprRB_S32, Unmerge.getReg(0)); + Register PrevLo = InitUnmerge.getReg(0); + Register PrevHi = InitUnmerge.getReg(1); + for (unsigned I = 1; I < NumElts; ++I) { + auto IdxConst = B.buildConstant({SgprRB, MRI.getType(Idx)}, I); + auto Cmp = B.buildICmp(CmpInst::ICMP_EQ, VccRB_S1, Idx, IdxConst); + auto EltUnmerge = B.buildUnmerge(VgprRB_S32, Unmerge.getReg(I)); + PrevLo = B.buildSelect(VgprRB_S32, Cmp, EltUnmerge.getReg(0), PrevLo) + .getReg(0); + PrevHi = B.buildSelect(VgprRB_S32, Cmp, EltUnmerge.getReg(1), PrevHi) + .getReg(0); + } + B.buildMergeLikeInstr(Dst, {PrevLo, PrevHi}); + } else { + reportGISelFailure( + MF, MORE, "amdgpu-regbanklegalize", + "AMDGPU RegBankLegalize: ExtrVecEltToSel unsupported element type", MI); + return false; + } + + MI.eraseFromParent(); + return true; +} + +bool RegBankLegalizeHelper::lowerExtrVecEltTo32(MachineInstr &MI) { + // Reduce a 64-bit element extract to two 32-bit extracts: + // vec32 = bitcast to <2N x s32> + // lo = vec32[idx * 2] + // hi = vec32[idx * 2 + 1] + // result = merge(lo, hi) + // + // When the index is uniform, all lanes extract the same element, so we can + // just split the s64 extract into two s32 extracts which lower to MOVREL. + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Idx = MI.getOperand(2).getReg(); + + LLT SrcTy = MRI.getType(Src); + LLT Vec32Ty = LLT::fixed_vector(2 * SrcTy.getNumElements(), 32); + + assert(MRI.getRegBank(Src) == VgprRB && MRI.getRegBank(Idx) == SgprRB && + "expected VGPR src and SGPR idx"); + + auto CastSrc = B.buildBitcast({VgprRB, Vec32Ty}, Src); + + // Calculate new Lo and Hi indices + auto One = B.buildConstant(SgprRB_S32, 1); + auto IdxLo = B.buildShl(SgprRB_S32, Idx, One); + auto IdxHi = B.buildAdd(SgprRB_S32, IdxLo, One); + + auto ExtLo = B.buildExtractVectorElement(VgprRB_S32, CastSrc, IdxLo); + auto ExtHi = B.buildExtractVectorElement(VgprRB_S32, CastSrc, IdxHi); + + B.buildMergeLikeInstr(Dst, {ExtLo.getReg(0), ExtHi.getReg(0)}); + + MI.eraseFromParent(); + return true; +} + bool RegBankLegalizeHelper::lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, WaterfallInfo &WFI) { @@ -1161,6 +1250,10 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI, return applyRegisterBanksINTRIN_IMAGE(MI); case SplitBitCount64To32: return lowerSplitBitCount64To32(MI); + case ExtrVecEltToSel: + return lowerExtrVecEltToSel(MI); + case ExtrVecEltTo32: + return lowerExtrVecEltTo32(MI); } if (!WFI.SgprWaterfallOperandRegs.empty()) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index 577c26e4bf02..8c39ad3238b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -140,6 +140,8 @@ private: bool lowerSplitBitCount64To32(MachineInstr &MI); bool lowerUnpackMinMax(MachineInstr &MI); bool lowerUnpackAExt(MachineInstr &MI); + bool lowerExtrVecEltToSel(MachineInstr &MI); + bool lowerExtrVecEltTo32(MachineInstr &MI); bool applyRegisterBanksINTRIN_IMAGE(MachineInstr &MI); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 4d8079b4663b..9ce1e1afb064 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -215,6 +215,14 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, static_cast(MRI.getTargetRegisterInfo()); return TRI->getSGPRClassForBitWidth(MRI.getType(Reg).getSizeInBits()); } + case BRC: { + // Check if there is SGPR and VGPR register class of same size as the LLT. + const SIRegisterInfo *TRI = + static_cast(MRI.getTargetRegisterInfo()); + unsigned LLTSize = MRI.getType(Reg).getSizeInBits(); + return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize) && + TRI->getVGPRClassForBitWidth(LLTSize); + } case _: return true; default: @@ -670,6 +678,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniBRC}, {{}, {}, VerifyAllSgprGPHI}}) .Any({{DivBRC}, {{}, {}, VerifyAllSgprOrVgprGPHI}}); + addRulesForGOpcs({G_EXTRACT_VECTOR_ELT}) + .Any({{UniB32, UniBRC, UniS32}, {{SgprB32}, {SgprBRC, Sgpr32}}}) + .Any({{DivB32, DivBRC, UniS32}, {{VgprB32}, {VgprBRC, Sgpr32}}}) + .Any({{DivB32, BRC, DivS32}, + {{VgprB32}, {VgprBRC, Vgpr32}, ExtrVecEltToSel}}) + .Any({{UniB64, UniBRC, UniS32}, {{SgprB64}, {SgprBRC, Sgpr32}}}) + .Any({{DivB64, DivBRC, UniS32}, + {{VgprB64}, {VgprBRC, Sgpr32}, ExtrVecEltTo32}}) + .Any({{DivB64, BRC, DivS32}, + {{VgprB64}, {VgprBRC, Vgpr32}, ExtrVecEltToSel}}); + // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}} // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}} // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}} diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index d8a88b0ee558..c2501faa2cf2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -116,6 +116,7 @@ enum UniformityLLTOpPredicateID { B160, B256, B512, + BRC, UniB32, UniB64, @@ -290,7 +291,9 @@ enum LoweringMethodID { VerifyAllSgprGPHI, VerifyAllSgprOrVgprGPHI, ApplyINTRIN_IMAGE, - SplitBitCount64To32 + SplitBitCount64To32, + ExtrVecEltToSel, + ExtrVecEltTo32 }; enum FastRulesTypes { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 206011adf021..a21229458b50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GPRIDX %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,MOVREL %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GPRIDX %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,MOVREL %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s define float @dyn_extract_v8f32_const_s_v(i32 %sel) { ; GCN-LABEL: dyn_extract_v8f32_const_s_v: @@ -53,39 +53,20 @@ entry: } define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 -; GPRIDX-NEXT: s_cselect_b32 s0, 2.0, 1.0 -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 -; GPRIDX-NEXT: s_cselect_b32 s0, 0x40400000, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 -; GPRIDX-NEXT: s_cselect_b32 s0, 4.0, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 4 -; GPRIDX-NEXT: s_cselect_b32 s0, 0x40a00000, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 5 -; GPRIDX-NEXT: s_cselect_b32 s0, 0x40c00000, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 6 -; GPRIDX-NEXT: s_cselect_b32 s0, 0x40e00000, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 7 -; GPRIDX-NEXT: s_cselect_b32 s0, 0x41000000, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s4, 1.0 -; MOVREL-NEXT: s_mov_b32 m0, s2 -; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 -; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 -; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 -; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 -; MOVREL-NEXT: s_mov_b32 s7, 4.0 -; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 -; MOVREL-NEXT: s_mov_b32 s5, 2.0 -; MOVREL-NEXT: s_movrels_b32 s0, s4 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f32_const_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s4, 1.0 +; GCN-NEXT: s_mov_b32 m0, s2 +; GCN-NEXT: s_mov_b32 s11, 0x41000000 +; GCN-NEXT: s_mov_b32 s10, 0x40e00000 +; GCN-NEXT: s_mov_b32 s9, 0x40c00000 +; GCN-NEXT: s_mov_b32 s8, 0x40a00000 +; GCN-NEXT: s_mov_b32 s7, 4.0 +; GCN-NEXT: s_mov_b32 s6, 0x40400000 +; GCN-NEXT: s_mov_b32 s5, 2.0 +; GCN-NEXT: s_movrels_b32 s0, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_s: ; GFX10PLUS: ; %bb.0: ; %entry @@ -202,20 +183,9 @@ entry: define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_v_s: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc -; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 -; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_v_s: @@ -235,39 +205,20 @@ entry: } define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f32_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 -; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 -; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 -; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 -; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 -; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 -; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 -; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f32_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 m0, s10 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_movrels_b32 s0, s0 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f32_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 m0, s10 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_movrels_b32 s0, s0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry @@ -292,123 +243,42 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { ; GCN-LABEL: dyn_extract_v8i64_const_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b64 s[16:17], 2 -; GCN-NEXT: s_mov_b64 s[18:19], 1 -; GCN-NEXT: s_mov_b64 s[14:15], 3 -; GCN-NEXT: v_mov_b32_e32 v1, s18 -; GCN-NEXT: v_mov_b32_e32 v2, s19 -; GCN-NEXT: v_mov_b32_e32 v3, s16 -; GCN-NEXT: v_mov_b32_e32 v4, s17 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: s_mov_b64 s[12:13], 4 -; GCN-NEXT: v_mov_b32_e32 v5, s14 -; GCN-NEXT: v_mov_b32_e32 v6, s15 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, 1, 2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: s_mov_b64 s[10:11], 5 -; GCN-NEXT: v_mov_b32_e32 v7, s12 -; GCN-NEXT: v_mov_b32_e32 v8, s13 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: s_mov_b64 s[8:9], 6 -; GCN-NEXT: v_mov_b32_e32 v9, s10 -; GCN-NEXT: v_mov_b32_e32 v10, s11 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: s_mov_b64 s[6:7], 7 -; GCN-NEXT: v_mov_b32_e32 v11, s8 -; GCN-NEXT: v_mov_b32_e32 v12, s9 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: s_mov_b64 s[4:5], 8 -; GCN-NEXT: v_mov_b32_e32 v13, s6 -; GCN-NEXT: v_mov_b32_e32 v14, s7 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GCN-NEXT: v_mov_b32_e32 v15, s4 -; GCN-NEXT: v_mov_b32_e32 v16, s5 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 8, vcc +; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: dyn_extract_v8i64_const_s_v: -; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b64 s[4:5], 2 -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-NEXT: s_mov_b64 s[6:7], 1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 2, v0 -; GFX10-NEXT: s_mov_b64 s[8:9], 3 -; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: s_mov_b64 s[6:7], 4 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, s4 -; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 4, v0 -; GFX10-NEXT: s_mov_b64 s[8:9], 5 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: s_mov_b64 s[6:7], 6 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, s4 -; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 6, v0 -; GFX10-NEXT: s_mov_b64 s[8:9], 7 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10-NEXT: s_mov_b64 s[6:7], 8 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s7, vcc_lo -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: dyn_extract_v8i64_const_s_v: -; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b64 s[0:1], 2 -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1 -; GFX11-NEXT: s_mov_b64 s[2:3], 1 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 2, v0 -; GFX11-NEXT: s_mov_b64 s[4:5], 3 -; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: s_mov_b64 s[2:3], 4 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 4, v0 -; GFX11-NEXT: s_mov_b64 s[4:5], 5 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: s_mov_b64 s[2:3], 6 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 6, v0 -; GFX11-NEXT: s_mov_b64 s[4:5], 7 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: s_mov_b64 s[2:3], 8 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10PLUS-LABEL: dyn_extract_v8i64_const_s_v: +; GFX10PLUS: ; %bb.0: ; %entry +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1, 2, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 3, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 5, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 6, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 7, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 8, vcc_lo +; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> , i32 %sel ret i64 %ext @@ -495,38 +365,38 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { ; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s7 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s9 -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s9 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s11 -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s11 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s12 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s13 -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s13 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s15 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s16 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s17 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 -; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; @@ -537,38 +407,38 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { ; MOVREL-NEXT: v_mov_b32_e32 v3, s4 ; MOVREL-NEXT: v_mov_b32_e32 v4, s5 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; MOVREL-NEXT: v_mov_b32_e32 v5, s6 -; MOVREL-NEXT: v_mov_b32_e32 v6, s7 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; MOVREL-NEXT: v_mov_b32_e32 v3, s6 +; MOVREL-NEXT: v_mov_b32_e32 v4, s7 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; MOVREL-NEXT: v_mov_b32_e32 v7, s8 -; MOVREL-NEXT: v_mov_b32_e32 v8, s9 -; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; MOVREL-NEXT: v_mov_b32_e32 v3, s8 +; MOVREL-NEXT: v_mov_b32_e32 v4, s9 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; MOVREL-NEXT: v_mov_b32_e32 v9, s10 -; MOVREL-NEXT: v_mov_b32_e32 v10, s11 -; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; MOVREL-NEXT: v_mov_b32_e32 v3, s10 +; MOVREL-NEXT: v_mov_b32_e32 v4, s11 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; MOVREL-NEXT: v_mov_b32_e32 v11, s12 -; MOVREL-NEXT: v_mov_b32_e32 v12, s13 -; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; MOVREL-NEXT: v_mov_b32_e32 v3, s12 +; MOVREL-NEXT: v_mov_b32_e32 v4, s13 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; MOVREL-NEXT: v_mov_b32_e32 v13, s14 -; MOVREL-NEXT: v_mov_b32_e32 v14, s15 -; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; MOVREL-NEXT: v_mov_b32_e32 v3, s14 +; MOVREL-NEXT: v_mov_b32_e32 v4, s15 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; MOVREL-NEXT: v_mov_b32_e32 v15, s16 -; MOVREL-NEXT: v_mov_b32_e32 v16, s17 -; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc -; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; MOVREL-NEXT: v_mov_b32_e32 v3, s16 +; MOVREL-NEXT: v_mov_b32_e32 v4, s17 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 -; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc -; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm ; @@ -854,40 +724,20 @@ entry: } define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_add_i32 s10, s10, 3 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 -; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 -; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 -; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 -; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 -; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 -; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 -; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 m0, s10 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_movrels_b32 s0, s3 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f32_s_s_offset3: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 m0, s10 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_movrels_b32 s0, s3 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s_offset3: ; GFX10PLUS: ; %bb.0: ; %entry @@ -1551,20 +1401,16 @@ entry: define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x ptr addrspace(3)> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p3_s_s: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 -; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 -; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 -; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 -; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 -; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 -; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 -; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ds_write_b32 v0, v0 ; GPRIDX-NEXT: s_endpgm @@ -2171,32 +2017,23 @@ entry: } define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) { -; GCN-LABEL: dyn_extract_v6f32_v_s: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GCN-NEXT: ; return to shader part epilog +; GPRIDX-LABEL: dyn_extract_v6f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v6f32_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 +; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v6f32_v_s: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX10PLUS-NEXT: s_mov_b32 m0, s2 +; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel @@ -2206,31 +2043,27 @@ entry: define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v6f32_s_s: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_cmp_eq_u32 s8, 1 -; GCN-NEXT: s_cselect_b32 s0, s3, s2 -; GCN-NEXT: s_cmp_eq_u32 s8, 2 -; GCN-NEXT: s_cselect_b32 s0, s4, s0 -; GCN-NEXT: s_cmp_eq_u32 s8, 3 -; GCN-NEXT: s_cselect_b32 s0, s5, s0 -; GCN-NEXT: s_cmp_eq_u32 s8, 4 -; GCN-NEXT: s_cselect_b32 s0, s6, s0 -; GCN-NEXT: s_cmp_eq_u32 s8, 5 -; GCN-NEXT: s_cselect_b32 s0, s7, s0 +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 m0, s8 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_movrels_b32 s0, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v6f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 1 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 2 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 3 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 4 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 5 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0 +; GFX10PLUS-NEXT: s_mov_b32 s0, s2 +; GFX10PLUS-NEXT: s_mov_b32 m0, s8 +; GFX10PLUS-NEXT: s_mov_b32 s1, s3 +; GFX10PLUS-NEXT: s_mov_b32 s2, s4 +; GFX10PLUS-NEXT: s_mov_b32 s3, s5 +; GFX10PLUS-NEXT: s_mov_b32 s4, s6 +; GFX10PLUS-NEXT: s_mov_b32 s5, s7 +; GFX10PLUS-NEXT: s_movrels_b32 s0, s0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: @@ -2323,36 +2156,23 @@ entry: } define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) { -; GCN-LABEL: dyn_extract_v7f32_v_s: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc -; GCN-NEXT: ; return to shader part epilog +; GPRIDX-LABEL: dyn_extract_v7f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v7f32_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 +; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v7f32_v_s: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX10PLUS-NEXT: s_mov_b32 m0, s2 +; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel @@ -2362,35 +2182,29 @@ entry: define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v7f32_s_s: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_cmp_eq_u32 s9, 1 -; GCN-NEXT: s_cselect_b32 s0, s3, s2 -; GCN-NEXT: s_cmp_eq_u32 s9, 2 -; GCN-NEXT: s_cselect_b32 s0, s4, s0 -; GCN-NEXT: s_cmp_eq_u32 s9, 3 -; GCN-NEXT: s_cselect_b32 s0, s5, s0 -; GCN-NEXT: s_cmp_eq_u32 s9, 4 -; GCN-NEXT: s_cselect_b32 s0, s6, s0 -; GCN-NEXT: s_cmp_eq_u32 s9, 5 -; GCN-NEXT: s_cselect_b32 s0, s7, s0 -; GCN-NEXT: s_cmp_eq_u32 s9, 6 -; GCN-NEXT: s_cselect_b32 s0, s8, s0 +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 m0, s9 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_movrels_b32 s0, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v7f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 1 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 2 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 3 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 4 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 5 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0 -; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 6 -; GFX10PLUS-NEXT: s_cselect_b32 s0, s8, s0 +; GFX10PLUS-NEXT: s_mov_b32 s0, s2 +; GFX10PLUS-NEXT: s_mov_b32 m0, s9 +; GFX10PLUS-NEXT: s_mov_b32 s1, s3 +; GFX10PLUS-NEXT: s_mov_b32 s2, s4 +; GFX10PLUS-NEXT: s_mov_b32 s3, s5 +; GFX10PLUS-NEXT: s_mov_b32 s4, s6 +; GFX10PLUS-NEXT: s_mov_b32 s5, s7 +; GFX10PLUS-NEXT: s_mov_b32 s6, s8 +; GFX10PLUS-NEXT: s_movrels_b32 s0, s0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: @@ -2406,28 +2220,28 @@ define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_mov_b32_e32 v5, s6 -; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s6 +; GCN-NEXT: v_mov_b32_e32 v4, s7 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_mov_b32_e32 v7, s8 -; GCN-NEXT: v_mov_b32_e32 v8, s9 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s8 +; GCN-NEXT: v_mov_b32_e32 v4, s9 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_mov_b32_e32 v9, s10 -; GCN-NEXT: v_mov_b32_e32 v10, s11 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s10 +; GCN-NEXT: v_mov_b32_e32 v4, s11 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_mov_b32_e32 v11, s12 -; GCN-NEXT: v_mov_b32_e32 v12, s13 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s12 +; GCN-NEXT: v_mov_b32_e32 v4, s13 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc ; GCN-NEXT: v_readfirstlane_b32 s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s1, v1 ; GCN-NEXT: ; return to shader part epilog @@ -2620,31 +2434,31 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast(<14 x float> inreg %userD ; GCN-LABEL: dyn_extract_v7f64_s_v_bitcast: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_mov_b32_e32 v1, s2 -; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v6, s7 +; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_mov_b32_e32 v8, s9 +; GCN-NEXT: v_mov_b32_e32 v9, s10 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_mov_b32_e32 v9, s10 ; GCN-NEXT: v_mov_b32_e32 v10, s11 +; GCN-NEXT: v_mov_b32_e32 v11, s12 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_mov_b32_e32 v11, s12 ; GCN-NEXT: v_mov_b32_e32 v12, s13 +; GCN-NEXT: v_mov_b32_e32 v13, s14 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_mov_b32_e32 v13, s14 ; GCN-NEXT: v_mov_b32_e32 v14, s15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc @@ -2652,69 +2466,76 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast(<14 x float> inreg %userD ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 -; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr2 killed $exec -; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr3 killed $exec -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; GCN-NEXT: v_readfirstlane_b32 s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s1, v1 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_extract_v7f64_s_v_bitcast: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-NEXT: v_mov_b32_e32 v2, s5 +; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: v_mov_b32_e32 v3, s4 +; GFX10-NEXT: v_mov_b32_e32 v2, s3 +; GFX10-NEXT: v_mov_b32_e32 v4, s5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10-NEXT: s_mov_b32 s0, s14 -; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v5, s6 +; GFX10-NEXT: v_mov_b32_e32 v6, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v1, s8 +; GFX10-NEXT: v_mov_b32_e32 v2, s9 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, v4, v6, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v3, s10 +; GFX10-NEXT: v_mov_b32_e32 v4, s11 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v1, s12 +; GFX10-NEXT: v_mov_b32_e32 v2, s13 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v4, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v3, s14 +; GFX10-NEXT: v_mov_b32_e32 v4, s15 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_extract_v7f64_s_v_bitcast: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 +; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3 +; GFX11-NEXT: v_dual_mov_b32 v3, s4 :: v_dual_mov_b32 v4, s5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: s_mov_b32 s0, s14 -; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +; GFX11-NEXT: v_dual_mov_b32 v5, s6 :: v_dual_mov_b32 v6, s7 +; GFX11-NEXT: v_dual_cndmask_b32 v3, v1, v3 :: v_dual_cndmask_b32 v4, v2, v4 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo +; GFX11-NEXT: v_dual_mov_b32 v1, s8 :: v_dual_mov_b32 v2, s9 +; GFX11-NEXT: v_dual_cndmask_b32 v5, v3, v5 :: v_dual_cndmask_b32 v6, v4, v6 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo +; GFX11-NEXT: v_dual_mov_b32 v3, s10 :: v_dual_mov_b32 v4, s11 +; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v1 :: v_dual_cndmask_b32 v6, v6, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo +; GFX11-NEXT: v_dual_mov_b32 v1, s12 :: v_dual_mov_b32 v2, s13 +; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v3 :: v_dual_cndmask_b32 v6, v6, v4 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo +; GFX11-NEXT: v_dual_mov_b32 v3, s14 :: v_dual_mov_b32 v4, s15 +; GFX11-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v2, v6, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo +; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v2, v2, v4 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v0 :: v_dual_cndmask_b32 v1, v2, v1 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog @@ -2750,38 +2571,36 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_mov_b32_e32 v5, s6 -; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s6 +; GCN-NEXT: v_mov_b32_e32 v4, s7 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_mov_b32_e32 v7, s8 -; GCN-NEXT: v_mov_b32_e32 v8, s9 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s8 +; GCN-NEXT: v_mov_b32_e32 v4, s9 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_mov_b32_e32 v9, s10 -; GCN-NEXT: v_mov_b32_e32 v10, s11 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s10 +; GCN-NEXT: v_mov_b32_e32 v4, s11 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_mov_b32_e32 v11, s12 -; GCN-NEXT: v_mov_b32_e32 v12, s13 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s12 +; GCN-NEXT: v_mov_b32_e32 v4, s13 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_mov_b32_e32 v13, s14 -; GCN-NEXT: v_mov_b32_e32 v14, s15 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s14 +; GCN-NEXT: v_mov_b32_e32 v4, s15 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 -; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr2 killed $exec -; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr3 killed $exec -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; GCN-NEXT: v_readfirstlane_b32 s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s1, v1 ; GCN-NEXT: ; return to shader part epilog @@ -2791,7 +2610,6 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10-NEXT: s_mov_b32 s0, s14 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 @@ -2807,11 +2625,11 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog @@ -2820,7 +2638,6 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: s_mov_b32 s0, s14 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 @@ -2836,11 +2653,10 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v0 :: v_dual_cndmask_b32 v1, v2, v1 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog @@ -3059,7 +2875,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GPRIDX-NEXT: gds_segment_byte_size = 0 ; GPRIDX-NEXT: kernarg_segment_byte_size = 28 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0 -; GPRIDX-NEXT: wavefront_sgpr_count = 17 +; GPRIDX-NEXT: wavefront_sgpr_count = 20 ; GPRIDX-NEXT: workitem_vgpr_count = 3 ; GPRIDX-NEXT: reserved_vgpr_first = 0 ; GPRIDX-NEXT: reserved_vgpr_count = 0 @@ -3076,20 +2892,17 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GPRIDX-NEXT: s_load_dword s10, s[8:9], 0x8 -; GPRIDX-NEXT: s_mov_b32 s4, 0 -; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000 -; GPRIDX-NEXT: s_mov_b32 s2, 0 -; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000 +; GPRIDX-NEXT: s_load_dword s2, s[8:9], 0x8 +; GPRIDX-NEXT: s_mov_b32 s12, 0 +; GPRIDX-NEXT: s_mov_b32 s8, 0 +; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0 +; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 -; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 -; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 -; GPRIDX-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] -; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 -; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0 +; GPRIDX-NEXT: s_movrels_b64 s[2:3], s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 @@ -3167,25 +2980,22 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; MOVREL-NEXT: s_load_dword s8, s[8:9], 0x8 +; MOVREL-NEXT: s_load_dword s2, s[8:9], 0x8 ; MOVREL-NEXT: s_add_i32 s12, s12, s17 +; MOVREL-NEXT: s_mov_b32 flat_scratch_lo, s13 ; MOVREL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; MOVREL-NEXT: s_mov_b32 s4, 0 -; MOVREL-NEXT: s_mov_b32 s5, 0x40080000 +; MOVREL-NEXT: s_mov_b32 s12, 0 +; MOVREL-NEXT: s_mov_b32 s8, 0 +; MOVREL-NEXT: s_mov_b64 s[4:5], 1.0 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) -; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 -; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 -; MOVREL-NEXT: s_cmp_eq_u32 s8, 2 -; MOVREL-NEXT: s_mov_b32 s2, 0 -; MOVREL-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] -; MOVREL-NEXT: s_cmp_eq_u32 s8, 3 -; MOVREL-NEXT: s_mov_b32 s3, 0x40140000 -; MOVREL-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] -; MOVREL-NEXT: s_cmp_eq_u32 s8, 4 -; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b32 s13, 0x40140000 +; MOVREL-NEXT: s_mov_b64 s[10:11], 4.0 +; MOVREL-NEXT: s_mov_b32 s9, 0x40080000 +; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0 +; MOVREL-NEXT: s_movrels_b64 s[2:3], s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, s2 ; MOVREL-NEXT: v_mov_b32_e32 v3, s1 -; MOVREL-NEXT: s_mov_b32 flat_scratch_lo, s13 ; MOVREL-NEXT: v_mov_b32_e32 v1, s3 ; MOVREL-NEXT: v_mov_b32_e32 v2, s0 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -3245,7 +3055,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX10-NEXT: gds_segment_byte_size = 0 ; GFX10-NEXT: kernarg_segment_byte_size = 28 ; GFX10-NEXT: workgroup_fbarrier_count = 0 -; GFX10-NEXT: wavefront_sgpr_count = 10 +; GFX10-NEXT: wavefront_sgpr_count = 13 ; GFX10-NEXT: workitem_vgpr_count = 3 ; GFX10-NEXT: reserved_vgpr_first = 0 ; GFX10-NEXT: reserved_vgpr_count = 0 @@ -3262,25 +3072,22 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX10-NEXT: .end_amd_kernel_code_t ; GFX10-NEXT: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s6, s[8:9], 0x8 -; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX10-NEXT: s_mov_b32 s2, 0 -; GFX10-NEXT: s_mov_b32 s3, 0x40080000 +; GFX10-NEXT: s_load_dword s12, s[8:9], 0x8 +; GFX10-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 +; GFX10-NEXT: s_mov_b64 s[0:1], 1.0 +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s9, 0x40140000 +; GFX10-NEXT: s_mov_b64 s[6:7], 4.0 +; GFX10-NEXT: s_mov_b32 s5, 0x40080000 +; GFX10-NEXT: s_mov_b64 s[2:3], 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s6, 1 -; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 -; GFX10-NEXT: s_cmp_eq_u32 s6, 2 -; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] -; GFX10-NEXT: s_cmp_eq_u32 s6, 3 -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40140000 -; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] -; GFX10-NEXT: s_cmp_eq_u32 s6, 4 -; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX10-NEXT: s_mov_b32 m0, s12 +; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[10:11] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v5f64_s_s: @@ -3337,7 +3144,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX11-NEXT: gds_segment_byte_size = 0 ; GFX11-NEXT: kernarg_segment_byte_size = 28 ; GFX11-NEXT: workgroup_fbarrier_count = 0 -; GFX11-NEXT: wavefront_sgpr_count = 7 +; GFX11-NEXT: wavefront_sgpr_count = 13 ; GFX11-NEXT: workitem_vgpr_count = 3 ; GFX11-NEXT: reserved_vgpr_first = 0 ; GFX11-NEXT: reserved_vgpr_count = 0 @@ -3354,24 +3161,21 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX11-NEXT: .end_amd_kernel_code_t ; GFX11-NEXT: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x8 -; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-NEXT: s_mov_b32 s2, 0 -; GFX11-NEXT: s_mov_b32 s3, 0x40080000 +; GFX11-NEXT: s_load_b32 s12, s[4:5], 0x8 +; GFX11-NEXT: s_load_b64 s[10:11], s[4:5], 0x0 +; GFX11-NEXT: s_mov_b64 s[0:1], 1.0 +; GFX11-NEXT: s_mov_b32 s8, 0 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s9, 0x40140000 +; GFX11-NEXT: s_mov_b64 s[6:7], 4.0 +; GFX11-NEXT: s_mov_b32 s5, 0x40080000 +; GFX11-NEXT: s_mov_b64 s[2:3], 2.0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s6, 1 -; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 -; GFX11-NEXT: s_cmp_eq_u32 s6, 2 -; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] -; GFX11-NEXT: s_cmp_eq_u32 s6, 3 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40140000 -; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] -; GFX11-NEXT: s_cmp_eq_u32 s6, 4 -; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] -; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_mov_b32 m0, s12 +; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[10:11] ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <5 x double> , i32 %sel @@ -4093,18 +3897,18 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_load_dword s2, s[8:9], 0x8 -; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GPRIDX-NEXT: v_mov_b32_e32 v1, 0 +; GPRIDX-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 +; GPRIDX-NEXT: s_load_dword s6, s[8:9], 0x8 +; GPRIDX-NEXT: s_mov_b32 s0, 1.0 +; GPRIDX-NEXT: s_mov_b32 s3, 4.0 +; GPRIDX-NEXT: s_mov_b32 s2, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s1, 2.0 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 -; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0 -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 -; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3 -; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 -; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 -; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1] +; GPRIDX-NEXT: s_mov_b32 m0, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v1, 0 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: global_store_dword v1, v0, s[4:5] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v4f32_s_s_s: @@ -4177,21 +3981,21 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry -; MOVREL-NEXT: s_load_dword s2, s[8:9], 0x8 -; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; MOVREL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 +; MOVREL-NEXT: s_load_dword s6, s[8:9], 0x8 +; MOVREL-NEXT: s_mov_b32 s0, 1.0 ; MOVREL-NEXT: s_add_i32 s12, s12, s17 -; MOVREL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; MOVREL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; MOVREL-NEXT: s_mov_b32 s3, 4.0 +; MOVREL-NEXT: s_mov_b32 s2, 0x40400000 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) -; MOVREL-NEXT: s_cmp_eq_u32 s2, 1 -; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0 -; MOVREL-NEXT: s_cmp_eq_u32 s2, 2 -; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3 -; MOVREL-NEXT: s_cmp_eq_u32 s2, 3 -; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: v_mov_b32_e32 v2, s2 -; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: s_mov_b32 s1, 2.0 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s4 +; MOVREL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; MOVREL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; MOVREL-NEXT: v_mov_b32_e32 v2, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s5 ; MOVREL-NEXT: flat_store_dword v[0:1], v2 ; MOVREL-NEXT: s_endpgm ; @@ -4266,18 +4070,18 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX10-NEXT: .end_amd_kernel_code_t ; GFX10-NEXT: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 -; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; GFX10-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 +; GFX10-NEXT: s_mov_b32 s0, 1.0 +; GFX10-NEXT: s_mov_b32 s3, 4.0 +; GFX10-NEXT: s_mov_b32 s2, 0x40400000 +; GFX10-NEXT: s_mov_b32 s1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s2, 1 -; GFX10-NEXT: s_cselect_b32 s3, 2.0, 1.0 -; GFX10-NEXT: s_cmp_eq_u32 s2, 2 -; GFX10-NEXT: s_cselect_b32 s3, 0x40400000, s3 -; GFX10-NEXT: s_cmp_eq_u32 s2, 3 -; GFX10-NEXT: s_cselect_b32 s2, 4.0, s3 -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10-NEXT: s_mov_b32 m0, s6 +; GFX10-NEXT: s_movrels_b32 s0, s0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v4f32_s_s_s: @@ -4334,7 +4138,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX11-NEXT: gds_segment_byte_size = 0 ; GFX11-NEXT: kernarg_segment_byte_size = 28 ; GFX11-NEXT: workgroup_fbarrier_count = 0 -; GFX11-NEXT: wavefront_sgpr_count = 6 +; GFX11-NEXT: wavefront_sgpr_count = 7 ; GFX11-NEXT: workitem_vgpr_count = 2 ; GFX11-NEXT: reserved_vgpr_first = 0 ; GFX11-NEXT: reserved_vgpr_count = 0 @@ -4351,18 +4155,18 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX11-NEXT: .end_amd_kernel_code_t ; GFX11-NEXT: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 -; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 +; GFX11-NEXT: s_mov_b32 s0, 1.0 +; GFX11-NEXT: s_mov_b32 s3, 4.0 +; GFX11-NEXT: s_mov_b32 s2, 0x40400000 +; GFX11-NEXT: s_mov_b32 s1, 2.0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s2, 1 -; GFX11-NEXT: s_cselect_b32 s3, 2.0, 1.0 -; GFX11-NEXT: s_cmp_eq_u32 s2, 2 -; GFX11-NEXT: s_cselect_b32 s3, 0x40400000, s3 -; GFX11-NEXT: s_cmp_eq_u32 s2, 3 -; GFX11-NEXT: s_cselect_b32 s2, 4.0, s3 -; GFX11-NEXT: v_mov_b32_e32 v0, s2 -; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-NEXT: s_mov_b32 m0, s6 +; GFX11-NEXT: s_movrels_b32 s0, s0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <4 x float> , i32 %sel @@ -4382,7 +4186,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 -; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 +; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 2 ; GPRIDX-NEXT: priority = 0 ; GPRIDX-NEXT: float_mode = 240 ; GPRIDX-NEXT: priv = 0 @@ -4425,7 +4229,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GPRIDX-NEXT: gds_segment_byte_size = 0 ; GPRIDX-NEXT: kernarg_segment_byte_size = 28 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0 -; GPRIDX-NEXT: wavefront_sgpr_count = 16 +; GPRIDX-NEXT: wavefront_sgpr_count = 19 ; GPRIDX-NEXT: workitem_vgpr_count = 3 ; GPRIDX-NEXT: reserved_vgpr_first = 0 ; GPRIDX-NEXT: reserved_vgpr_count = 0 @@ -4441,21 +4245,20 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_load_dword s6, s[8:9], 0x8 -; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GPRIDX-NEXT: s_mov_b32 s2, 0 -; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 -; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 +; GPRIDX-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 +; GPRIDX-NEXT: s_load_dword s12, s[8:9], 0x8 +; GPRIDX-NEXT: s_mov_b32 s4, 0 +; GPRIDX-NEXT: s_mov_b64 s[0:1], 1.0 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 4.0 +; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) -; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 -; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 -; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 -; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] -; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3 -; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] -; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 -; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GPRIDX-NEXT: s_mov_b32 m0, s12 +; GPRIDX-NEXT: s_mov_b64 s[2:3], 2.0 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 +; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[10:11] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v4f64_s_s_s: @@ -4528,24 +4331,23 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry -; MOVREL-NEXT: s_load_dword s6, s[8:9], 0x8 -; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; MOVREL-NEXT: s_add_i32 s12, s12, s17 -; MOVREL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; MOVREL-NEXT: s_mov_b32 s2, 0 +; MOVREL-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 +; MOVREL-NEXT: s_load_dword s8, s[8:9], 0x8 +; MOVREL-NEXT: s_mov_b32 s4, 0 +; MOVREL-NEXT: s_mov_b64 s[0:1], 1.0 +; MOVREL-NEXT: s_mov_b64 s[6:7], 4.0 +; MOVREL-NEXT: s_mov_b32 s5, 0x40080000 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) -; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 -; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 -; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 -; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 -; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] -; MOVREL-NEXT: s_cmp_eq_u32 s6, 3 -; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] -; MOVREL-NEXT: v_mov_b32_e32 v0, s2 -; MOVREL-NEXT: v_mov_b32_e32 v3, s1 +; MOVREL-NEXT: s_mov_b32 m0, s8 +; MOVREL-NEXT: s_mov_b64 s[2:3], 2.0 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_add_i32 s12, s12, s17 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v2, s10 ; MOVREL-NEXT: s_mov_b32 flat_scratch_lo, s13 -; MOVREL-NEXT: v_mov_b32_e32 v1, s3 -; MOVREL-NEXT: v_mov_b32_e32 v2, s0 +; MOVREL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: v_mov_b32_e32 v3, s11 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; MOVREL-NEXT: s_endpgm ; @@ -4603,7 +4405,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX10-NEXT: gds_segment_byte_size = 0 ; GFX10-NEXT: kernarg_segment_byte_size = 28 ; GFX10-NEXT: workgroup_fbarrier_count = 0 -; GFX10-NEXT: wavefront_sgpr_count = 10 +; GFX10-NEXT: wavefront_sgpr_count = 13 ; GFX10-NEXT: workitem_vgpr_count = 3 ; GFX10-NEXT: reserved_vgpr_first = 0 ; GFX10-NEXT: reserved_vgpr_count = 0 @@ -4620,21 +4422,20 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX10-NEXT: .end_amd_kernel_code_t ; GFX10-NEXT: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s6, s[8:9], 0x8 -; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX10-NEXT: s_mov_b32 s2, 0 -; GFX10-NEXT: s_mov_b32 s3, 0x40080000 +; GFX10-NEXT: s_load_dword s12, s[8:9], 0x8 +; GFX10-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 +; GFX10-NEXT: s_mov_b64 s[0:1], 1.0 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b64 s[6:7], 4.0 +; GFX10-NEXT: s_mov_b32 s5, 0x40080000 +; GFX10-NEXT: s_mov_b64 s[2:3], 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s6, 1 -; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 -; GFX10-NEXT: s_cmp_eq_u32 s6, 2 -; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] -; GFX10-NEXT: s_cmp_eq_u32 s6, 3 -; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX10-NEXT: s_mov_b32 m0, s12 +; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[10:11] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v4f64_s_s_s: @@ -4691,7 +4492,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX11-NEXT: gds_segment_byte_size = 0 ; GFX11-NEXT: kernarg_segment_byte_size = 28 ; GFX11-NEXT: workgroup_fbarrier_count = 0 -; GFX11-NEXT: wavefront_sgpr_count = 7 +; GFX11-NEXT: wavefront_sgpr_count = 11 ; GFX11-NEXT: workitem_vgpr_count = 3 ; GFX11-NEXT: reserved_vgpr_first = 0 ; GFX11-NEXT: reserved_vgpr_count = 0 @@ -4708,20 +4509,19 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX11-NEXT: .end_amd_kernel_code_t ; GFX11-NEXT: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x8 -; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-NEXT: s_mov_b32 s2, 0 -; GFX11-NEXT: s_mov_b32 s3, 0x40080000 +; GFX11-NEXT: s_load_b32 s10, s[4:5], 0x8 +; GFX11-NEXT: s_load_b64 s[8:9], s[4:5], 0x0 +; GFX11-NEXT: s_mov_b64 s[0:1], 1.0 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b64 s[6:7], 4.0 +; GFX11-NEXT: s_mov_b32 s5, 0x40080000 +; GFX11-NEXT: s_mov_b64 s[2:3], 2.0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s6, 1 -; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 -; GFX11-NEXT: s_cmp_eq_u32 s6, 2 -; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] -; GFX11-NEXT: s_cmp_eq_u32 s6, 3 -; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] -; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_mov_b32 m0, s10 +; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[8:9] ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <4 x double> , i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir index de02b426776e..33c545c63477 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -1,8 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE64 %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE64 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE32 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck -check-prefix=WAVE32 %s --- @@ -21,6 +19,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) ; WAVE64-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_ss ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 ; WAVE32-NEXT: {{ $}} @@ -48,7 +47,8 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>) + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<16 x s32>) ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] @@ -94,14 +94,16 @@ body: | ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>) + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<16 x s32>) ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] @@ -147,8 +149,8 @@ body: | ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -171,6 +173,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) ; WAVE64-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vs ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -246,6 +249,7 @@ body: | ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} @@ -321,6 +325,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 ; WAVE64-NEXT: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32) ; WAVE64-NEXT: $sgpr0_sgpr1 = COPY [[EVEC]](s64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_ss ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 ; WAVE32-NEXT: {{ $}} @@ -356,6 +361,7 @@ body: | ; WAVE64-NEXT: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32) ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vs ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -389,76 +395,91 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s64>) + ; WAVE64-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] + ; WAVE64-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV8]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV11]], [[UV9]] ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] + ; WAVE64-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT1]] ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] + ; WAVE64-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV14]], [[SELECT2]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV15]], [[SELECT3]] ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] + ; WAVE64-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV16]], [[SELECT4]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV17]], [[SELECT5]] ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] + ; WAVE64-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV18]], [[SELECT6]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV19]], [[SELECT7]] ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] + ; WAVE64-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV20]], [[SELECT8]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV21]], [[SELECT9]] ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE64-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV22]], [[SELECT10]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV23]], [[SELECT11]] + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT12]](s32), [[SELECT13]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s64>) + ; WAVE32-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] + ; WAVE32-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV8]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV11]], [[UV9]] ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] + ; WAVE32-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT1]] ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] + ; WAVE32-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV14]], [[SELECT2]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV15]], [[SELECT3]] ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] + ; WAVE32-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV16]], [[SELECT4]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV17]], [[SELECT5]] ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] + ; WAVE32-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV18]], [[SELECT6]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV19]], [[SELECT7]] ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] + ; WAVE32-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV20]], [[SELECT8]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV21]], [[SELECT9]] ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE32-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV22]], [[SELECT10]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV23]], [[SELECT11]] + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT12]](s32), [[SELECT13]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 @@ -480,76 +501,89 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) + ; WAVE64-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] + ; WAVE64-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV8]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV11]], [[UV9]] ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] + ; WAVE64-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT1]] ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] + ; WAVE64-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV14]], [[SELECT2]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV15]], [[SELECT3]] ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] + ; WAVE64-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV16]], [[SELECT4]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV17]], [[SELECT5]] ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] + ; WAVE64-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV18]], [[SELECT6]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV19]], [[SELECT7]] ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] + ; WAVE64-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV20]], [[SELECT8]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV21]], [[SELECT9]] ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE64-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV22]], [[SELECT10]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV23]], [[SELECT11]] + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT12]](s32), [[SELECT13]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) + ; WAVE32-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] + ; WAVE32-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV8]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV11]], [[UV9]] ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] + ; WAVE32-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT1]] ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] + ; WAVE32-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV14]], [[SELECT2]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV15]], [[SELECT3]] ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] + ; WAVE32-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV16]], [[SELECT4]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV17]], [[SELECT5]] ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] + ; WAVE32-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV18]], [[SELECT6]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV19]], [[SELECT7]] ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] + ; WAVE32-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV20]], [[SELECT8]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV21]], [[SELECT9]] ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE32-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV22]], [[SELECT10]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV23]], [[SELECT11]] + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT12]](s32), [[SELECT13]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 @@ -575,53 +609,53 @@ body: | ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C]] ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 + ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] + ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 + ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] + ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] + ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 + ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] + ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 + ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] + ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 + ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] + ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 + ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] + ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 + ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_add1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} @@ -631,50 +665,49 @@ body: | ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C]] ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 + ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] + ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 + ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] + ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] + ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 + ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] + ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 + ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] + ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 + ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] + ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 + ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] + ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 + ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) @@ -751,6 +784,7 @@ body: | ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_addm1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} @@ -880,6 +914,7 @@ body: | ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_add16 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} @@ -961,39 +996,45 @@ body: | ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) + ; WAVE64-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C]] + ; WAVE64-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV8]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV11]], [[UV9]] + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] + ; WAVE64-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT1]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] + ; WAVE64-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV14]], [[SELECT2]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV15]], [[SELECT3]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] + ; WAVE64-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV16]], [[SELECT4]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV17]], [[SELECT5]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] + ; WAVE64-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV18]], [[SELECT6]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV19]], [[SELECT7]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] + ; WAVE64-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV20]], [[SELECT8]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV21]], [[SELECT9]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] + ; WAVE64-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV22]], [[SELECT10]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV23]], [[SELECT11]] + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT12]](s32), [[SELECT13]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv_idx_add1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} @@ -1002,38 +1043,43 @@ body: | ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) + ; WAVE32-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C]] + ; WAVE32-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV8]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV11]], [[UV9]] + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] + ; WAVE32-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT1]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] + ; WAVE32-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV14]], [[SELECT2]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV15]], [[SELECT3]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] + ; WAVE32-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV16]], [[SELECT4]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV17]], [[SELECT5]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] + ; WAVE32-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV18]], [[SELECT6]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV19]], [[SELECT7]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] + ; WAVE32-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV20]], [[SELECT8]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV21]], [[SELECT9]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] + ; WAVE32-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV22]], [[SELECT10]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV23]], [[SELECT11]] + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT12]](s32), [[SELECT13]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 @@ -1060,54 +1106,55 @@ body: | ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>) + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<16 x s32>) + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C]] ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] + ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 + ; WAVE64-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] + ; WAVE64-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 + ; WAVE64-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] + ; WAVE64-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; WAVE64-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] + ; WAVE64-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 + ; WAVE64-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] + ; WAVE64-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 + ; WAVE64-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] + ; WAVE64-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 + ; WAVE64-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] + ; WAVE64-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 + ; WAVE64-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] + ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 + ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv_idx_add1 ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -1116,54 +1163,54 @@ body: | ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>) + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<16 x s32>) + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C]] ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV2]], [[SELECT]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV3]], [[SELECT1]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV4]], [[SELECT2]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV5]], [[SELECT3]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV6]], [[SELECT4]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV7]], [[SELECT5]] - ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 - ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] + ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 + ; WAVE32-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[UV8]], [[SELECT6]] - ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 - ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] + ; WAVE32-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 9 + ; WAVE32-NEXT: [[ICMP8:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C8]] ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP8]](s1), [[UV9]], [[SELECT7]] - ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 - ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] + ; WAVE32-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; WAVE32-NEXT: [[ICMP9:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C9]] ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT8]] - ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 - ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] + ; WAVE32-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 11 + ; WAVE32-NEXT: [[ICMP10:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C10]] ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP10]](s1), [[UV11]], [[SELECT9]] - ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 - ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] + ; WAVE32-NEXT: [[C11:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 + ; WAVE32-NEXT: [[ICMP11:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C11]] ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP11]](s1), [[UV12]], [[SELECT10]] - ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 - ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] + ; WAVE32-NEXT: [[C12:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 13 + ; WAVE32-NEXT: [[ICMP12:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C12]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP12]](s1), [[UV13]], [[SELECT11]] - ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 - ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] + ; WAVE32-NEXT: [[C13:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 14 + ; WAVE32-NEXT: [[ICMP13:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C13]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP13]](s1), [[UV14]], [[SELECT12]] - ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 - ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] + ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 + ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C14]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[COPY4]](s32) %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 1 @@ -1189,39 +1236,46 @@ body: | ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE64-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY3]](<8 x s64>) + ; WAVE64-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C]] + ; WAVE64-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; WAVE64-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV8]] + ; WAVE64-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV11]], [[UV9]] + ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; WAVE64-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] + ; WAVE64-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; WAVE64-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT]] + ; WAVE64-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT1]] + ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 + ; WAVE64-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] + ; WAVE64-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; WAVE64-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV14]], [[SELECT2]] + ; WAVE64-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV15]], [[SELECT3]] + ; WAVE64-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; WAVE64-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] + ; WAVE64-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; WAVE64-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV16]], [[SELECT4]] + ; WAVE64-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV17]], [[SELECT5]] + ; WAVE64-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE64-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] + ; WAVE64-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; WAVE64-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV18]], [[SELECT6]] + ; WAVE64-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV19]], [[SELECT7]] + ; WAVE64-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] + ; WAVE64-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; WAVE64-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV20]], [[SELECT8]] + ; WAVE64-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV21]], [[SELECT9]] + ; WAVE64-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 + ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] + ; WAVE64-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV22]], [[SELECT10]] + ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV23]], [[SELECT11]] + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT12]](s32), [[SELECT13]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv_add1 ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -1230,38 +1284,44 @@ body: | ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) - ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] - ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV]] - ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV1]] - ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] - ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV4]], [[SELECT]] - ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV5]], [[SELECT1]] - ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] - ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV6]], [[SELECT2]] - ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV7]], [[SELECT3]] - ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] - ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV8]], [[SELECT4]] - ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV9]], [[SELECT5]] - ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] - ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV10]], [[SELECT6]] - ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV11]], [[SELECT7]] - ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] - ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV12]], [[SELECT8]] - ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV13]], [[SELECT9]] - ; WAVE32-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 - ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] - ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] - ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY3]](<8 x s64>) + ; WAVE32-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C]] + ; WAVE32-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV8]] + ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV11]], [[UV9]] + ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; WAVE32-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] + ; WAVE32-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT]] + ; WAVE32-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT1]] + ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 + ; WAVE32-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C2]] + ; WAVE32-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; WAVE32-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV14]], [[SELECT2]] + ; WAVE32-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[UV15]], [[SELECT3]] + ; WAVE32-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; WAVE32-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C3]] + ; WAVE32-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; WAVE32-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV16]], [[SELECT4]] + ; WAVE32-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[UV17]], [[SELECT5]] + ; WAVE32-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C4]] + ; WAVE32-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; WAVE32-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV18]], [[SELECT6]] + ; WAVE32-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[UV19]], [[SELECT7]] + ; WAVE32-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE32-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C5]] + ; WAVE32-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; WAVE32-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV20]], [[SELECT8]] + ; WAVE32-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[UV21]], [[SELECT9]] + ; WAVE32-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 + ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C6]] + ; WAVE32-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV22]], [[SELECT10]] + ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV23]], [[SELECT11]] + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT12]](s32), [[SELECT13]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0