AMDGPU: Really use AV classes by default for vector classes (#166483)
AMDGPU: Really use AV classes by default for vector classes Update getRegClassFor to use AV classes in place of VGPRs for gfx90a-gfx950. There are a handful of regressions. Most are enabling unprofitable rematerialization which reduce register count by 1 but add an unnecessary instruction.
This commit is contained in:
parent
7aa60b64f3
commit
c7019c7eda
@ -18770,8 +18770,11 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
|
||||
: &AMDGPU::SReg_32RegClass;
|
||||
if (!TRI->isSGPRClass(RC) && !isDivergent)
|
||||
return TRI->getEquivalentSGPRClass(RC);
|
||||
if (TRI->isSGPRClass(RC) && isDivergent)
|
||||
if (TRI->isSGPRClass(RC) && isDivergent) {
|
||||
if (Subtarget->hasGFX90AInsts())
|
||||
return TRI->getEquivalentAVClass(RC);
|
||||
return TRI->getEquivalentVGPRClass(RC);
|
||||
}
|
||||
|
||||
return RC;
|
||||
}
|
||||
|
||||
@ -3640,6 +3640,14 @@ SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const {
|
||||
return ARC;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
SIRegisterInfo::getEquivalentAVClass(const TargetRegisterClass *SRC) const {
|
||||
unsigned Size = getRegSizeInBits(*SRC);
|
||||
const TargetRegisterClass *ARC = getVectorSuperClassForBitWidth(Size);
|
||||
assert(ARC && "Invalid register class size");
|
||||
return ARC;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
|
||||
unsigned Size = getRegSizeInBits(*VRC);
|
||||
|
||||
@ -289,6 +289,10 @@ public:
|
||||
const TargetRegisterClass *
|
||||
getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
|
||||
|
||||
/// \returns An AGPR+VGPR super reg class with the same width as \p SRC
|
||||
const TargetRegisterClass *
|
||||
getEquivalentAVClass(const TargetRegisterClass *SRC) const;
|
||||
|
||||
/// \returns A SGPR reg class with the same width as \p SRC
|
||||
const TargetRegisterClass *
|
||||
getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -43,31 +43,31 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF implicit-def $vgpr16
|
||||
; GFX90A-NEXT: renamable $vgpr3 = IMPLICIT_DEF implicit-def $vgpr2
|
||||
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
|
||||
; GFX90A-NEXT: renamable $vgpr27 = IMPLICIT_DEF implicit-def $vgpr26
|
||||
; GFX90A-NEXT: renamable $vgpr29 = IMPLICIT_DEF implicit-def $vgpr28
|
||||
; GFX90A-NEXT: renamable $vgpr33 = IMPLICIT_DEF implicit-def $vgpr32
|
||||
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.3.Flow17:
|
||||
; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr6 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.57, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.4.bb15:
|
||||
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 $sgpr24, $vgpr0, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 2, $vgpr30, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 2, $vgpr6, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr40, renamable $vcc = V_ADD_CO_U32_e64 $vgpr46, killed $vgpr0, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr41, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr47, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc
|
||||
@ -75,7 +75,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.5:
|
||||
; GFX90A-NEXT: successors: %bb.6(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
|
||||
@ -88,9 +88,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
|
||||
@ -98,32 +98,32 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $vgpr56_vgpr57 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr44_vgpr45 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr42_vgpr43 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.6.Flow20:
|
||||
; GFX90A-NEXT: successors: %bb.7(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr24 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr26 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr28 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr29 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr15 = COPY killed renamable $sgpr18, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr33 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr18, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr25 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr27 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr29 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.7.Flow19:
|
||||
; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: $sgpr18_sgpr19 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -131,7 +131,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.8.Flow32:
|
||||
; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -140,15 +140,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.9.bb89:
|
||||
; GFX90A-NEXT: successors: %bb.10(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.10.Flow33:
|
||||
; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -157,15 +157,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.11.bb84:
|
||||
; GFX90A-NEXT: successors: %bb.12(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.12.Flow34:
|
||||
; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -174,10 +174,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.13.bb79:
|
||||
; GFX90A-NEXT: successors: %bb.14(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.14.Flow35:
|
||||
@ -359,7 +359,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.35.bb20:
|
||||
; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i23)
|
||||
; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
@ -376,37 +376,37 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $vgpr43, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_LT_I16_e64 0, killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr58_vgpr59 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr56_vgpr57 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr44_vgpr45 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.36.Flow21:
|
||||
; GFX90A-NEXT: successors: %bb.6(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
|
||||
; GFX90A-NEXT: S_BRANCH %bb.6
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.37.bb27:
|
||||
; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i30)
|
||||
; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
@ -416,29 +416,29 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr45, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr58_vgpr59 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr56_vgpr57 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.38.Flow22:
|
||||
; GFX90A-NEXT: successors: %bb.36(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
@ -459,7 +459,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.39.bb34:
|
||||
; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i37)
|
||||
; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
@ -469,28 +469,28 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $vgpr57, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr58_vgpr59 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.40.Flow23:
|
||||
; GFX90A-NEXT: successors: %bb.38(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
@ -510,40 +510,39 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.41.bb41:
|
||||
; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
|
||||
; GFX90A-NEXT: renamable $vgpr59, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr58_vgpr59, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i44)
|
||||
; GFX90A-NEXT: renamable $vgpr1, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $vgpr58, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i44)
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr3, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr59, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: $sgpr42_sgpr43 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.46, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.42.Flow24:
|
||||
; GFX90A-NEXT: successors: %bb.40(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr3, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
|
||||
@ -560,11 +559,11 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.43.bb55:
|
||||
; GFX90A-NEXT: successors: %bb.48(0x40000000), %bb.44(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: S_BITCMP1_B32 killed renamable $sgpr17, 16, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_CSELECT_B64 -1, 0, implicit killed $scc
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_XOR_B64 renamable $sgpr66_sgpr67, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_XOR_B64 renamable $sgpr64_sgpr65, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $vgpr62 = V_ADD_CO_U32_e32 6144, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr18_sgpr19, implicit-def dead $scc
|
||||
@ -572,27 +571,27 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.44:
|
||||
; GFX90A-NEXT: successors: %bb.45(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr62, $vgpr56, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr57, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr47, $vgpr46, $vgpr2, $vgpr4, $vgpr5, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr40, $vgpr60, $vgpr63, $vgpr58
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.45.Flow26:
|
||||
; GFX90A-NEXT: successors: %bb.47(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
|
||||
@ -608,7 +607,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.46.bb48:
|
||||
; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
|
||||
@ -620,26 +619,26 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr61, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: $sgpr44_sgpr45 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.43, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.47.Flow25:
|
||||
; GFX90A-NEXT: successors: %bb.42(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr44_sgpr45, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
@ -657,139 +656,139 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.48.bb63:
|
||||
; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.49:
|
||||
; GFX90A-NEXT: successors: %bb.44(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: S_BRANCH %bb.44
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.50.bb68:
|
||||
; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr30, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr6, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.54, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.51:
|
||||
; GFX90A-NEXT: successors: %bb.45(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: S_BRANCH %bb.45
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.52.bb80:
|
||||
; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CMP_EQ_U32 killed renamable $sgpr17, 0, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr9, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr10 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr11, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.59, implicit killed $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.53:
|
||||
; GFX90A-NEXT: successors: %bb.61(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: S_BRANCH %bb.61
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.54.bb73:
|
||||
; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr8 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
|
||||
; GFX90A-NEXT: renamable $vgpr6 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
|
||||
; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr7, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr8, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr9, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr3, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: $sgpr62_sgpr63 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.52, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.55.Flow29:
|
||||
; GFX90A-NEXT: successors: %bb.45(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr62_sgpr63, implicit-def $scc
|
||||
; GFX90A-NEXT: S_BRANCH %bb.45
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.56.bb90:
|
||||
; GFX90A-NEXT: successors: %bb.60(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr12 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr66_sgpr67, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr13 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr14 = COPY renamable $sgpr21, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr14, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr13, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr13 = COPY renamable $sgpr22, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr18_vgpr19 = DS_READ_B64_gfx9 killed renamable $vgpr13, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr7 = COPY renamable $sgpr21, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr24_vgpr25 = DS_READ_B64_gfx9 killed renamable $vgpr7, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr3, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr3 = COPY renamable $sgpr22, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr3, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_LSHR_B64 killed renamable $sgpr56_sgpr57, 1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $vgpr16_vgpr17 = V_LSHRREV_B64_e64 1, $vgpr22_vgpr23, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr17 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr32_vgpr33 = V_LSHRREV_B64_e64 1, $vgpr20_vgpr21, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr18_vgpr19 = V_LSHRREV_B64_e64 1, $vgpr24_vgpr25, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr7 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr14_vgpr15 = V_LSHRREV_B64_e64 1, $vgpr22_vgpr23, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $vgpr14 = COPY renamable $vgpr20, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $vgpr16 = COPY renamable $vgpr22, implicit $exec
|
||||
; GFX90A-NEXT: S_BRANCH %bb.60
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.57:
|
||||
; GFX90A-NEXT: successors: %bb.7(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr22 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr18 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr19 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr12 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr24 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr20 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr16 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr21 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr30 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr22 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr14 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr18 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
|
||||
@ -800,9 +799,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
|
||||
@ -812,7 +811,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $vgpr42_vgpr43 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr40_vgpr41 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr46_vgpr47 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr17 = COPY renamable $vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr7 = COPY renamable $vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: S_BRANCH %bb.7
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
@ -821,62 +820,62 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr26_vgpr27 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr28_vgpr29 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr23, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr24_vgpr25 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.419, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr26_vgpr27 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.419, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2_vgpr3 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr33, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr14_vgpr15 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr28_vgpr29 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr32_vgpr33 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: S_BRANCH %bb.3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.59.bb85:
|
||||
; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 1, $vgpr8, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr11 = COPY renamable $vgpr9, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr12 = FLAT_LOAD_UBYTE renamable $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i86)
|
||||
; GFX90A-NEXT: renamable $vgpr12 = V_OR_B32_e32 1, $vgpr10, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr13 = COPY renamable $vgpr11, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr12_vgpr13, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i86)
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr12, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr32 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr20 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr12 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr3, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr24 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr30 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
|
||||
; GFX90A-NEXT: renamable $sgpr18 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: $sgpr54_sgpr55 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.56, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.60.Flow31:
|
||||
; GFX90A-NEXT: successors: %bb.61(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.61.Flow30:
|
||||
; GFX90A-NEXT: successors: %bb.55(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr52_sgpr53, killed renamable $sgpr56_sgpr57, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_BRANCH %bb.55
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.62.bb140:
|
||||
; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr24_sgpr25 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
|
||||
@ -884,122 +883,122 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.63.Flow13:
|
||||
; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.64.bb159:
|
||||
; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr30, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr6, implicit $exec
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.67, implicit $exec
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.65.Flow10:
|
||||
; GFX90A-NEXT: successors: %bb.66(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.66.Flow14:
|
||||
; GFX90A-NEXT: successors: %bb.8(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec
|
||||
; GFX90A-NEXT: S_BRANCH %bb.8
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.67.bb161:
|
||||
; GFX90A-NEXT: successors: %bb.65(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr25, killed $vgpr27, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr29, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = V_OR_B32_e32 killed $vgpr15, killed $vgpr3, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr27, killed $vgpr29, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr33, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = V_OR_B32_e32 killed $vgpr17, killed $vgpr3, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr3, killed $vgpr2, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr12, 0, $vgpr3, 0, 0, 6, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr30, 0, $vgpr3, 0, 0, 6, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr4 = V_OR_B32_e32 killed $vgpr16, killed $vgpr19, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr4 = V_OR_B32_e32 killed $vgpr18, killed $vgpr21, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr4, killed $vgpr2, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr17, 0, $vgpr3, 0, 0, 6, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr7, 0, $vgpr3, 0, 0, 6, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr32, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr14, implicit $exec
|
||||
; GFX90A-NEXT: DS_WRITE2_B32_gfx9 killed renamable $vgpr3, killed renamable $vgpr2, renamable $vgpr3, 0, 1, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, align 4, addrspace 3)
|
||||
; GFX90A-NEXT: S_BRANCH %bb.65
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.68.bb174:
|
||||
; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $agpr0 = COPY killed renamable $vgpr32, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr32 = V_OR_B32_e32 1, $vgpr28, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr52 = V_OR_B32_e32 $vgpr32, $vgpr26, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr52, $vgpr24, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr34 = V_CNDMASK_B32_e64 0, $vgpr38, 0, 0, $sgpr12_sgpr13, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr50 = V_OR_B32_e32 $vgpr34, $vgpr2, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr50, $vgpr14, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr36 = V_OR_B32_e32 $vgpr48, $vgpr18, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr54 = V_CNDMASK_B32_e64 0, 0, 0, $vgpr36, killed $sgpr12_sgpr13, implicit $exec
|
||||
; GFX90A-NEXT: renamable $agpr0 = COPY killed renamable $vgpr14, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 1, $vgpr32, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 $vgpr34, $vgpr28, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr54, $vgpr26, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr36 = V_CNDMASK_B32_e64 0, $vgpr48, 0, 0, $sgpr12_sgpr13, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr52 = V_OR_B32_e32 $vgpr36, $vgpr2, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr50 = V_OR_B32_e32 $vgpr52, $vgpr16, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr50, $vgpr20, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr14 = V_CNDMASK_B32_e64 0, 0, 0, $vgpr38, killed $sgpr12_sgpr13, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr28_sgpr29, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.72, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.69.Flow:
|
||||
; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.70.bb186:
|
||||
; GFX90A-NEXT: successors: %bb.71(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr4, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr33 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr53 = COPY renamable $vgpr33, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr39 = COPY renamable $vgpr33, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr51 = COPY renamable $vgpr33, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr49 = COPY renamable $vgpr33, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr35 = COPY renamable $vgpr33, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr55 = COPY renamable $vgpr33, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr37 = COPY renamable $vgpr33, implicit $exec
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr33, renamable $vgpr32_vgpr33, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr35 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr55 = COPY renamable $vgpr35, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr49 = COPY renamable $vgpr35, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr53 = COPY renamable $vgpr35, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr51 = COPY renamable $vgpr35, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr37 = COPY renamable $vgpr35, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr15 = COPY renamable $vgpr35, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr39 = COPY renamable $vgpr35, implicit $exec
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr35, renamable $vgpr34_vgpr35, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr5 = COPY renamable $sgpr21, implicit $exec
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr52_vgpr53, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr13 = COPY killed renamable $sgpr22, implicit $exec
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr13, killed renamable $vgpr38_vgpr39, 0, 0, implicit $exec :: (store (s64) into %ir.8, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr33, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr33, killed renamable $vgpr34_vgpr35, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr5, killed renamable $vgpr54_vgpr55, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr33, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr54_vgpr55, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr16 = COPY killed renamable $sgpr22, implicit $exec
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr16, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.8, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr35, killed renamable $vgpr52_vgpr53, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr35, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr5, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr35, killed renamable $vgpr38_vgpr39, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.71.Flow9:
|
||||
; GFX90A-NEXT: successors: %bb.63(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr24_sgpr25 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr32 = COPY killed renamable $agpr0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr14 = COPY killed renamable $agpr0, implicit $exec
|
||||
; GFX90A-NEXT: S_BRANCH %bb.63
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.72.bb196:
|
||||
; GFX90A-NEXT: successors: %bb.69(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x0000000000000003, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 $vgpr54, killed $vgpr22, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr20 = V_OR_B32_e32 killed $vgpr2, killed $vgpr20, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr21 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr21, renamable $vgpr20_vgpr21, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 $vgpr14, killed $vgpr24, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr22 = V_OR_B32_e32 killed $vgpr2, killed $vgpr22, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr23 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr23, renamable $vgpr22_vgpr23, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: S_BRANCH %bb.69
|
||||
bb:
|
||||
|
||||
@ -16,7 +16,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val
|
||||
; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
|
||||
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
|
||||
@ -36,7 +36,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val,
|
||||
; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
|
||||
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
@ -56,7 +56,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val,
|
||||
; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
|
||||
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
|
||||
@ -78,7 +78,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val
|
||||
; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
|
||||
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
|
||||
@ -103,7 +103,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half>
|
||||
; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
|
||||
; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
|
||||
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
|
||||
@ -129,7 +129,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %
|
||||
; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
|
||||
; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
|
||||
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
@ -155,7 +155,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %
|
||||
; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
|
||||
; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
|
||||
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
|
||||
@ -183,7 +183,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half>
|
||||
; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
|
||||
; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
|
||||
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
|
||||
|
||||
@ -258,68 +258,59 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
|
||||
; SDAG-GFX942-NEXT: .LBB0_1: ; %load-store-loop
|
||||
; SDAG-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SDAG-GFX942-NEXT: s_add_i32 s1, s0, s16
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v60, s1
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[8:11], v60, s[4:7], 0 offen
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[4:7], v60, s[4:7], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[12:15], v60, s[4:7], 0 offen offset:32
|
||||
; SDAG-GFX942-NEXT: s_add_i32 s2, s8, s16
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s2
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s1
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v0, s[4:7], 0 offen
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v0, s[4:7], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v0, s[4:7], 0 offen offset:32
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v0, s[4:7], 0 offen offset:48
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v0, s[4:7], 0 offen offset:64
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v0, s[4:7], 0 offen offset:80
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v0, s[4:7], 0 offen offset:96
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v0, s[4:7], 0 offen offset:112
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v0, s[4:7], 0 offen offset:128
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v0, s[4:7], 0 offen offset:144
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v0, s[4:7], 0 offen offset:160
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v0, s[4:7], 0 offen offset:176
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v0, s[4:7], 0 offen offset:192
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v0, s[4:7], 0 offen offset:208
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v0, s[4:7], 0 offen offset:224
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 a[0:3], v0, s[4:7], 0 offen offset:240
|
||||
; SDAG-GFX942-NEXT: s_add_i32 s1, s8, s16
|
||||
; SDAG-GFX942-NEXT: s_addk_i32 s16, 0x100
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s1
|
||||
; SDAG-GFX942-NEXT: s_cmpk_lt_u32 s16, 0x2000
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a0, v15 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a1, v14 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a2, v13 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a3, v12 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[12:15], v60, s[4:7], 0 offen offset:48
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[16:19], v60, s[4:7], 0 offen offset:64
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[20:23], v60, s[4:7], 0 offen offset:80
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[24:27], v60, s[4:7], 0 offen offset:96
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[28:31], v60, s[4:7], 0 offen offset:112
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[32:35], v60, s[4:7], 0 offen offset:128
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[36:39], v60, s[4:7], 0 offen offset:144
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[40:43], v60, s[4:7], 0 offen offset:160
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[44:47], v60, s[4:7], 0 offen offset:176
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[48:51], v60, s[4:7], 0 offen offset:192
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[52:55], v60, s[4:7], 0 offen offset:208
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[56:59], v60, s[4:7], 0 offen offset:224
|
||||
; SDAG-GFX942-NEXT: s_nop 0
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[60:63], v60, s[4:7], 0 offen offset:240
|
||||
; SDAG-GFX942-NEXT: s_nop 0
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[8:11], v0, s[12:15], 0 offen
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[4:7], v0, s[12:15], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: s_nop 1
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v5, a0 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v4, a1 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v3, a2 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v2, a3 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v0, s[12:15], 0 offen offset:32
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[12:15], v0, s[12:15], 0 offen offset:48
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v0, s[12:15], 0 offen
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[16:19], v0, s[12:15], 0 offen offset:64
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v0, s[12:15], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[20:23], v0, s[12:15], 0 offen offset:80
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[10:13], v0, s[12:15], 0 offen offset:32
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[24:27], v0, s[12:15], 0 offen offset:96
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v0, s[12:15], 0 offen offset:48
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[28:31], v0, s[12:15], 0 offen offset:112
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v0, s[12:15], 0 offen offset:64
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[32:35], v0, s[12:15], 0 offen offset:128
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v0, s[12:15], 0 offen offset:80
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[36:39], v0, s[12:15], 0 offen offset:144
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v0, s[12:15], 0 offen offset:96
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[40:43], v0, s[12:15], 0 offen offset:160
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v0, s[12:15], 0 offen offset:112
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[44:47], v0, s[12:15], 0 offen offset:176
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v0, s[12:15], 0 offen offset:128
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[48:51], v0, s[12:15], 0 offen offset:192
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v0, s[12:15], 0 offen offset:144
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[52:55], v0, s[12:15], 0 offen offset:208
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v0, s[12:15], 0 offen offset:160
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[56:59], v0, s[12:15], 0 offen offset:224
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v0, s[12:15], 0 offen offset:176
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[60:63], v0, s[12:15], 0 offen offset:240
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v0, s[12:15], 0 offen offset:192
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v0, s[12:15], 0 offen offset:208
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v0, s[12:15], 0 offen offset:224
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 a[0:3], v0, s[12:15], 0 offen offset:240
|
||||
; SDAG-GFX942-NEXT: s_cbranch_scc1 .LBB0_1
|
||||
; SDAG-GFX942-NEXT: ; %bb.2: ; %memcpy-split
|
||||
; SDAG-GFX942-NEXT: s_endpgm
|
||||
@ -799,68 +790,59 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
|
||||
; SDAG-GFX942-NEXT: .LBB1_1: ; %load-store-loop
|
||||
; SDAG-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SDAG-GFX942-NEXT: s_add_i32 s1, s0, s16
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v60, s1
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[8:11], v60, s[4:7], 0 offen
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[4:7], v60, s[4:7], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[12:15], v60, s[4:7], 0 offen offset:32
|
||||
; SDAG-GFX942-NEXT: s_add_i32 s2, s8, s16
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s2
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s1
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v0, s[4:7], 0 offen
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v0, s[4:7], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v0, s[4:7], 0 offen offset:32
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v0, s[4:7], 0 offen offset:48
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v0, s[4:7], 0 offen offset:64
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v0, s[4:7], 0 offen offset:80
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v0, s[4:7], 0 offen offset:96
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v0, s[4:7], 0 offen offset:112
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v0, s[4:7], 0 offen offset:128
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v0, s[4:7], 0 offen offset:144
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v0, s[4:7], 0 offen offset:160
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v0, s[4:7], 0 offen offset:176
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v0, s[4:7], 0 offen offset:192
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v0, s[4:7], 0 offen offset:208
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v0, s[4:7], 0 offen offset:224
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 a[0:3], v0, s[4:7], 0 offen offset:240
|
||||
; SDAG-GFX942-NEXT: s_add_i32 s1, s8, s16
|
||||
; SDAG-GFX942-NEXT: s_addk_i32 s16, 0x100
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s1
|
||||
; SDAG-GFX942-NEXT: s_cmpk_lt_u32 s16, 0x100
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a0, v15 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a1, v14 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a2, v13 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a3, v12 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[12:15], v60, s[4:7], 0 offen offset:48
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[16:19], v60, s[4:7], 0 offen offset:64
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[20:23], v60, s[4:7], 0 offen offset:80
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[24:27], v60, s[4:7], 0 offen offset:96
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[28:31], v60, s[4:7], 0 offen offset:112
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[32:35], v60, s[4:7], 0 offen offset:128
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[36:39], v60, s[4:7], 0 offen offset:144
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[40:43], v60, s[4:7], 0 offen offset:160
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[44:47], v60, s[4:7], 0 offen offset:176
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[48:51], v60, s[4:7], 0 offen offset:192
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[52:55], v60, s[4:7], 0 offen offset:208
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[56:59], v60, s[4:7], 0 offen offset:224
|
||||
; SDAG-GFX942-NEXT: s_nop 0
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[60:63], v60, s[4:7], 0 offen offset:240
|
||||
; SDAG-GFX942-NEXT: s_nop 0
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[8:11], v0, s[12:15], 0 offen
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[4:7], v0, s[12:15], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: s_nop 1
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v5, a0 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v4, a1 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v3, a2 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v2, a3 ; Reload Reuse
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v0, s[12:15], 0 offen offset:32
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[12:15], v0, s[12:15], 0 offen offset:48
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v0, s[12:15], 0 offen
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[16:19], v0, s[12:15], 0 offen offset:64
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v0, s[12:15], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[20:23], v0, s[12:15], 0 offen offset:80
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[10:13], v0, s[12:15], 0 offen offset:32
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[24:27], v0, s[12:15], 0 offen offset:96
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v0, s[12:15], 0 offen offset:48
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[28:31], v0, s[12:15], 0 offen offset:112
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v0, s[12:15], 0 offen offset:64
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[32:35], v0, s[12:15], 0 offen offset:128
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v0, s[12:15], 0 offen offset:80
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[36:39], v0, s[12:15], 0 offen offset:144
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v0, s[12:15], 0 offen offset:96
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[40:43], v0, s[12:15], 0 offen offset:160
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v0, s[12:15], 0 offen offset:112
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[44:47], v0, s[12:15], 0 offen offset:176
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v0, s[12:15], 0 offen offset:128
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[48:51], v0, s[12:15], 0 offen offset:192
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v0, s[12:15], 0 offen offset:144
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[52:55], v0, s[12:15], 0 offen offset:208
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v0, s[12:15], 0 offen offset:160
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[56:59], v0, s[12:15], 0 offen offset:224
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v0, s[12:15], 0 offen offset:176
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[60:63], v0, s[12:15], 0 offen offset:240
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v0, s[12:15], 0 offen offset:192
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v0, s[12:15], 0 offen offset:208
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v0, s[12:15], 0 offen offset:224
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 a[0:3], v0, s[12:15], 0 offen offset:240
|
||||
; SDAG-GFX942-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; SDAG-GFX942-NEXT: ; %bb.2: ; %memcpy-split
|
||||
; SDAG-GFX942-NEXT: s_endpgm
|
||||
@ -1158,8 +1140,8 @@ define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspa
|
||||
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
||||
; SDAG-GFX942-NEXT: s_mov_b32 s3, s12
|
||||
; SDAG-GFX942-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13]
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v4, s0
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v0, s[8:11], 0 offen
|
||||
; SDAG-GFX942-NEXT: s_load_dword s13, s[4:5], 0x54
|
||||
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44
|
||||
; SDAG-GFX942-NEXT: s_mov_b32 s5, s12
|
||||
@ -1170,12 +1152,12 @@ define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspa
|
||||
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
||||
; SDAG-GFX942-NEXT: s_mov_b32 s3, s12
|
||||
; SDAG-GFX942-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13]
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v5, s0
|
||||
; SDAG-GFX942-NEXT: v_mov_b32_e32 v1, s0
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v1, s[4:7], 0 offen
|
||||
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v0, s[8:11], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v1, s[4:7], 0 offen offset:16
|
||||
; SDAG-GFX942-NEXT: s_endpgm
|
||||
;
|
||||
; SDAG-GFX1100-LABEL: memcpy_known_small:
|
||||
|
||||
@ -1553,13 +1553,13 @@ define void @too_many_args_use_workitem_id_xyz(
|
||||
; GFX90A-LABEL: too_many_args_use_workitem_id_xyz:
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_load_dword v32, off, s[0:3], s32
|
||||
; GFX90A-NEXT: v_and_b32_e32 v33, 0x3ff, v31
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v33, off
|
||||
; GFX90A-NEXT: buffer_load_dword v33, off, s[0:3], s32
|
||||
; GFX90A-NEXT: v_and_b32_e32 v32, 0x3ff, v31
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v32, off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_bfe_u32 v33, v31, 10, 10
|
||||
; GFX90A-NEXT: v_bfe_u32 v32, v31, 10, 10
|
||||
; GFX90A-NEXT: v_bfe_u32 v31, v31, 20, 10
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v33, off
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v32, off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v31, off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -1625,7 +1625,7 @@ define void @too_many_args_use_workitem_id_xyz(
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v30, off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v32, off
|
||||
; GFX90A-NEXT: global_store_dword v[0:1], v33, off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: s_setpc_b64 s[30:31]
|
||||
i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
|
||||
|
||||
@ -43,25 +43,26 @@ define void @phi_with_alloca_and_divergent_copy_to_reg(ptr addrspace(5) %diverge
|
||||
; CHECK-LABEL: phi_with_alloca_and_divergent_copy_to_reg:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_lshr_b32 s6, s32, 6
|
||||
; CHECK-NEXT: v_mov_b32_e32 v7, v2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v6, v1
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], 0
|
||||
; CHECK-NEXT: v_lshrrev_b32_e64 v2, 6, s32
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s6
|
||||
; CHECK-NEXT: .LBB1_1: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, v2
|
||||
; CHECK-NEXT: v_lshl_add_u32 v2, v3, 2, v1
|
||||
; CHECK-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen
|
||||
; CHECK-NEXT: v_add_u32_e32 v2, 1, v3
|
||||
; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, 15, v2
|
||||
; CHECK-NEXT: v_add_u32_e32 v8, 1, v3
|
||||
; CHECK-NEXT: v_lshl_add_u32 v5, v3, 2, v1
|
||||
; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, 15, v8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, v1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, v0
|
||||
; CHECK-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
|
||||
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, v4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, v0
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB1_1
|
||||
; CHECK-NEXT: ; %bb.2: ; %done
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: buffer_load_dword v0, v1, s[0:3], 0 offen
|
||||
; CHECK-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_store_dword v[6:7], v0, off
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
|
||||
@ -120,19 +120,19 @@ define protected amdgpu_kernel void @nand(ptr addrspace(1) %p, ptr addrspace(1)
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s6
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s6
|
||||
; CHECK-NEXT: .LBB5_1: ; %atomicrmw.start
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, v0
|
||||
; CHECK-NEXT: v_bfi_b32 v2, v3, -2, -1
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc
|
||||
; CHECK-NEXT: v_bfi_b32 v0, v1, -2, -1
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
|
||||
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, v0
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB5_1
|
||||
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -3626,7 +3626,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr3
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX950-SDAG-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
|
||||
; GFX950-SDAG-NEXT: s_cbranch_execz .LBB36_2
|
||||
; GFX950-SDAG-NEXT: .LBB36_4: ; %atomicrmw.private
|
||||
@ -3794,7 +3794,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr3
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX950-SDAG-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
|
||||
; GFX950-SDAG-NEXT: s_cbranch_execz .LBB37_2
|
||||
; GFX950-SDAG-NEXT: .LBB37_4: ; %atomicrmw.private
|
||||
@ -4512,7 +4512,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr3
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX950-SDAG-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
|
||||
; GFX950-SDAG-NEXT: s_cbranch_execz .LBB44_2
|
||||
; GFX950-SDAG-NEXT: .LBB44_4: ; %atomicrmw.private
|
||||
@ -4680,7 +4680,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr3
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX950-SDAG-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
|
||||
; GFX950-SDAG-NEXT: s_cbranch_execz .LBB45_2
|
||||
; GFX950-SDAG-NEXT: .LBB45_4: ; %atomicrmw.private
|
||||
@ -5398,7 +5398,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr3
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX950-SDAG-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
|
||||
; GFX950-SDAG-NEXT: s_cbranch_execz .LBB52_2
|
||||
; GFX950-SDAG-NEXT: .LBB52_4: ; %atomicrmw.private
|
||||
@ -5566,7 +5566,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr3
|
||||
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX950-SDAG-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
|
||||
; GFX950-SDAG-NEXT: s_cbranch_execz .LBB53_2
|
||||
; GFX950-SDAG-NEXT: .LBB53_4: ; %atomicrmw.private
|
||||
@ -13018,21 +13018,20 @@ define <2 x half> @flat_atomic_fmax_v2f16_saddr_rtn(ptr inreg %ptr, <2 x half> %
|
||||
; GFX950-SDAG: ; %bb.0:
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX950-SDAG-NEXT: flat_load_dword v0, v[2:3] offset:40
|
||||
; GFX950-SDAG-NEXT: flat_load_dword v1, v[2:3] offset:40
|
||||
; GFX950-SDAG-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v0, v0
|
||||
; GFX950-SDAG-NEXT: .LBB124_1: ; %atomicrmw.start
|
||||
; GFX950-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v0
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v5, v5
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v1, v1
|
||||
; GFX950-SDAG-NEXT: s_nop 0
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v0, v1
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap v0, v[2:3], v[4:5] offset:40 sc0
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v4
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
|
||||
; GFX950-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
|
||||
; GFX950-SDAG-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX950-SDAG-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX950-SDAG-NEXT: s_cbranch_execnz .LBB124_1
|
||||
; GFX950-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
@ -13184,21 +13183,20 @@ define <2 x half> @flat_atomic_fmin_v2f16_saddr_rtn(ptr inreg %ptr, <2 x half> %
|
||||
; GFX950-SDAG: ; %bb.0:
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX950-SDAG-NEXT: flat_load_dword v0, v[2:3] offset:40
|
||||
; GFX950-SDAG-NEXT: flat_load_dword v1, v[2:3] offset:40
|
||||
; GFX950-SDAG-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v0, v0
|
||||
; GFX950-SDAG-NEXT: .LBB126_1: ; %atomicrmw.start
|
||||
; GFX950-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v0
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v5, v5
|
||||
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v1, v1
|
||||
; GFX950-SDAG-NEXT: s_nop 0
|
||||
; GFX950-SDAG-NEXT: v_pk_min_f16 v4, v0, v1
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap v0, v[2:3], v[4:5] offset:40 sc0
|
||||
; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v4
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5
|
||||
; GFX950-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
|
||||
; GFX950-SDAG-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX950-SDAG-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX950-SDAG-NEXT: s_cbranch_execnz .LBB126_1
|
||||
; GFX950-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
@ -13396,24 +13394,23 @@ define <2 x bfloat> @flat_atomic_fmax_v2bf16_saddr_rtn(ptr inreg %ptr, <2 x bflo
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
|
||||
; GFX950-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX950-NEXT: flat_load_dword v0, v[2:3] offset:40
|
||||
; GFX950-NEXT: flat_load_dword v1, v[2:3] offset:40
|
||||
; GFX950-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
|
||||
; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
|
||||
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v0
|
||||
; GFX950-NEXT: .LBB130_1: ; %atomicrmw.start
|
||||
; GFX950-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_mov_b32_e32 v7, v0
|
||||
; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v7
|
||||
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
||||
; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v1
|
||||
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v1
|
||||
; GFX950-NEXT: v_max_f32_e32 v0, v0, v4
|
||||
; GFX950-NEXT: v_max_f32_e32 v5, v5, v1
|
||||
; GFX950-NEXT: v_cvt_pk_bf16_f32 v6, v5, v0
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v0, v[2:3], v[6:7] offset:40 sc0
|
||||
; GFX950-NEXT: v_max_f32_e32 v6, v6, v5
|
||||
; GFX950-NEXT: v_cvt_pk_bf16_f32 v0, v6, v0
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7
|
||||
; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
|
||||
; GFX950-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX950-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX950-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX950-NEXT: s_cbranch_execnz .LBB130_1
|
||||
; GFX950-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
@ -13513,24 +13510,23 @@ define <2 x bfloat> @flat_atomic_fmin_v2bf16_saddr_rtn(ptr inreg %ptr, <2 x bflo
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
|
||||
; GFX950-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX950-NEXT: flat_load_dword v0, v[2:3] offset:40
|
||||
; GFX950-NEXT: flat_load_dword v1, v[2:3] offset:40
|
||||
; GFX950-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
|
||||
; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
|
||||
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v0
|
||||
; GFX950-NEXT: .LBB132_1: ; %atomicrmw.start
|
||||
; GFX950-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_mov_b32_e32 v7, v0
|
||||
; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v7
|
||||
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
||||
; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v1
|
||||
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v1
|
||||
; GFX950-NEXT: v_min_f32_e32 v0, v0, v4
|
||||
; GFX950-NEXT: v_min_f32_e32 v5, v5, v1
|
||||
; GFX950-NEXT: v_cvt_pk_bf16_f32 v6, v5, v0
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v0, v[2:3], v[6:7] offset:40 sc0
|
||||
; GFX950-NEXT: v_min_f32_e32 v6, v6, v5
|
||||
; GFX950-NEXT: v_cvt_pk_bf16_f32 v0, v6, v0
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:40 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7
|
||||
; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
|
||||
; GFX950-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX950-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX950-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX950-NEXT: s_cbranch_execnz .LBB132_1
|
||||
; GFX950-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
|
||||
@ -36,37 +36,36 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %
|
||||
; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
|
||||
; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX90A-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY6]], 0, 0, implicit $exec :: (load (s64) from %ir.ptr, addrspace 1)
|
||||
; GFX90A-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:av_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY6]], 0, 0, implicit $exec :: (load (s64) from %ir.ptr, addrspace 1)
|
||||
; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.1.atomicrmw.start:
|
||||
; GFX90A-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %4, %bb.1
|
||||
; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[COPY7]], %bb.0, %3, %bb.1
|
||||
; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vreg_64_align2 = PHI [[GLOBAL_LOAD_DWORDX2_]], %bb.0, %3, %bb.1
|
||||
; GFX90A-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI1]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub1
|
||||
; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_]].sub0
|
||||
; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub1
|
||||
; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]].sub0
|
||||
; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY11]], %subreg.sub2, killed [[COPY10]], %subreg.sub3
|
||||
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[REG_SEQUENCE2]]
|
||||
; GFX90A-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY5]], killed [[COPY12]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic monotonic (s64) on %ir.ptr, addrspace 1)
|
||||
; GFX90A-NEXT: [[COPY7:%[0-9]+]]:av_32 = COPY [[V_ADD_F64_e64_]].sub1
|
||||
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:av_32 = COPY [[V_ADD_F64_e64_]].sub0
|
||||
; GFX90A-NEXT: [[COPY9:%[0-9]+]]:av_32 = COPY [[PHI1]].sub1
|
||||
; GFX90A-NEXT: [[COPY10:%[0-9]+]]:av_32 = COPY [[PHI1]].sub0
|
||||
; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY8]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
|
||||
; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[REG_SEQUENCE2]]
|
||||
; GFX90A-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY5]], killed [[COPY11]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic monotonic (s64) on %ir.ptr, addrspace 1)
|
||||
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]], [[PHI1]], implicit $exec
|
||||
; GFX90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX90A-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_EQ_U64_e64_]], [[PHI]], implicit-def dead $scc
|
||||
; GFX90A-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
; GFX90A-NEXT: S_BRANCH %bb.2
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.2.atomicrmw.end:
|
||||
; GFX90A-NEXT: [[PHI2:%[0-9]+]]:av_64_align2 = PHI [[COPY13]], %bb.1
|
||||
; GFX90A-NEXT: [[PHI2:%[0-9]+]]:av_64_align2 = PHI [[COPY12]], %bb.1
|
||||
; GFX90A-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[SI_IF_BREAK]], %bb.1
|
||||
; GFX90A-NEXT: SI_END_CF [[PHI3]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
|
||||
; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
|
||||
; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
|
||||
; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
|
||||
; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
|
||||
; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
|
||||
; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
|
||||
; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
|
||||
; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
|
||||
; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
|
||||
; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -19,11 +19,11 @@ define amdgpu_kernel void @half8(ptr addrspace(1) nocapture readonly %0, ptr add
|
||||
; GFX90A-LABEL: half8:
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dwordx4 v[0:3], v4, s[0:1]
|
||||
; GFX90A-NEXT: global_load_dwordx4 v[2:5], v0, s[0:1]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
|
||||
; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[2:3]
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1030-LABEL: half8:
|
||||
@ -85,11 +85,11 @@ define amdgpu_kernel void @half6(ptr addrspace(1) nocapture readonly %0, ptr add
|
||||
; GFX90A-LABEL: half6:
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dwordx3 v[0:2], v3, s[0:1]
|
||||
; GFX90A-NEXT: global_load_dwordx3 v[2:4], v0, s[0:1]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
|
||||
; GFX90A-NEXT: global_store_dwordx3 v0, v[2:4], s[2:3]
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1030-LABEL: half6:
|
||||
|
||||
@ -162,12 +162,11 @@ define i32 @atomic_nand_i32_global(ptr addrspace(1) %ptr) nounwind {
|
||||
; GFX90A-LABEL: atomic_nand_i32_global:
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dword v2, v[0:1], off
|
||||
; GFX90A-NEXT: global_load_dword v3, v[0:1], off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: .LBB1_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX90A-NEXT: v_bfi_b32 v2, v3, -5, -1
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
@ -176,6 +175,7 @@ define i32 @atomic_nand_i32_global(ptr addrspace(1) %ptr) nounwind {
|
||||
; GFX90A-NEXT: buffer_wbinvl1_vol
|
||||
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB1_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
|
||||
@ -49,10 +49,10 @@ define amdgpu_kernel void @barrier_release(<4 x i32> inreg %rsrc,
|
||||
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_barrier
|
||||
; GFX90A-NEXT: ds_read_b32 v0, v0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX90A-NEXT: ds_read_b32 v1, v0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; GFX90A-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
;
|
||||
; GFX90A-TGSPLIT-LABEL: barrier_release:
|
||||
@ -72,10 +72,10 @@ define amdgpu_kernel void @barrier_release(<4 x i32> inreg %rsrc,
|
||||
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-TGSPLIT-NEXT: s_barrier
|
||||
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
|
||||
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0
|
||||
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
||||
; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-TGSPLIT-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX90A-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX942-LABEL: barrier_release:
|
||||
@ -94,10 +94,10 @@ define amdgpu_kernel void @barrier_release(<4 x i32> inreg %rsrc,
|
||||
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x3c
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: s_barrier
|
||||
; GFX942-NEXT: ds_read_b32 v0, v0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: ds_read_b32 v1, v0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; GFX942-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
;
|
||||
; GFX942-TGSPLIT-LABEL: barrier_release:
|
||||
@ -117,10 +117,10 @@ define amdgpu_kernel void @barrier_release(<4 x i32> inreg %rsrc,
|
||||
; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-TGSPLIT-NEXT: s_barrier
|
||||
; GFX942-TGSPLIT-NEXT: buffer_inv sc0
|
||||
; GFX942-TGSPLIT-NEXT: ds_read_b32 v0, v0
|
||||
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0
|
||||
; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-TGSPLIT-NEXT: global_store_dword v1, v0, s[0:1]
|
||||
; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX942-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10WGP-LABEL: barrier_release:
|
||||
|
||||
@ -37,11 +37,11 @@ entry:
|
||||
define amdgpu_ps void @ds_read_b96_tr_b6(ptr addrspace(3) %addr, ptr addrspace(1) %use) {
|
||||
; GFX950-SDAG-LABEL: ds_read_b96_tr_b6:
|
||||
; GFX950-SDAG: ; %bb.0: ; %entry
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v2
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, v1
|
||||
; GFX950-SDAG-NEXT: ds_read_b96_tr_b6 v[0:2], v0 offset:32
|
||||
; GFX950-SDAG-NEXT: ds_read_b96_tr_b6 v[4:6], v0 offset:32
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: global_store_dwordx3 v[4:5], v[0:2], off
|
||||
; GFX950-SDAG-NEXT: global_store_dwordx3 v[2:3], v[4:6], off
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX950-GISEL-LABEL: ds_read_b96_tr_b6:
|
||||
|
||||
@ -159,100 +159,100 @@ define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 2.0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_u32_e32 v3, s0, v0
|
||||
; GCN-NEXT: ds_read_b128 a[28:31], v3 offset:112
|
||||
; GCN-NEXT: ds_read_b128 a[24:27], v3 offset:96
|
||||
; GCN-NEXT: ds_read_b128 a[20:23], v3 offset:80
|
||||
; GCN-NEXT: ds_read_b128 a[16:19], v3 offset:64
|
||||
; GCN-NEXT: ds_read_b128 a[0:3], v3
|
||||
; GCN-NEXT: ds_read_b128 a[4:7], v3 offset:16
|
||||
; GCN-NEXT: ds_read_b128 a[8:11], v3 offset:32
|
||||
; GCN-NEXT: ds_read_b128 a[12:15], v3 offset:48
|
||||
; GCN-NEXT: ds_read_b128 a[156:159], v3 offset:112
|
||||
; GCN-NEXT: ds_read_b128 a[152:155], v3 offset:96
|
||||
; GCN-NEXT: ds_read_b128 a[148:151], v3 offset:80
|
||||
; GCN-NEXT: ds_read_b128 a[144:147], v3 offset:64
|
||||
; GCN-NEXT: ds_read_b128 a[128:131], v3
|
||||
; GCN-NEXT: ds_read_b128 a[132:135], v3 offset:16
|
||||
; GCN-NEXT: ds_read_b128 a[136:139], v3 offset:32
|
||||
; GCN-NEXT: ds_read_b128 a[140:143], v3 offset:48
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; GCN-NEXT: ds_read_b128 a[156:159], v3 offset:8304
|
||||
; GCN-NEXT: ds_read_b128 a[152:155], v3 offset:8288
|
||||
; GCN-NEXT: ds_read_b128 a[148:151], v3 offset:8272
|
||||
; GCN-NEXT: ds_read_b128 a[144:147], v3 offset:8256
|
||||
; GCN-NEXT: ds_read_b128 a[140:143], v3 offset:8240
|
||||
; GCN-NEXT: ds_read_b128 a[136:139], v3 offset:8224
|
||||
; GCN-NEXT: ds_read_b128 a[132:135], v3 offset:8208
|
||||
; GCN-NEXT: ds_read_b128 a[128:131], v3 offset:8192
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v2, v1, a[128:159]
|
||||
; GCN-NEXT: ds_read_b128 a[124:127], v3 offset:8304
|
||||
; GCN-NEXT: ds_read_b128 a[120:123], v3 offset:8288
|
||||
; GCN-NEXT: ds_read_b128 a[116:119], v3 offset:8272
|
||||
; GCN-NEXT: ds_read_b128 a[112:115], v3 offset:8256
|
||||
; GCN-NEXT: ds_read_b128 a[108:111], v3 offset:8240
|
||||
; GCN-NEXT: ds_read_b128 a[104:107], v3 offset:8224
|
||||
; GCN-NEXT: ds_read_b128 a[100:103], v3 offset:8208
|
||||
; GCN-NEXT: ds_read_b128 a[96:99], v3 offset:8192
|
||||
; GCN-NEXT: v_add_u32_e32 v4, 0x6000, v3
|
||||
; GCN-NEXT: v_add_u32_e32 v0, s1, v0
|
||||
; GCN-NEXT: ; iglp_opt mask(0x00000001)
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v2, v1, a[128:159]
|
||||
; GCN-NEXT: ds_read_b128 a[124:127], v3 offset:24688
|
||||
; GCN-NEXT: ds_read_b128 a[120:123], v3 offset:24672
|
||||
; GCN-NEXT: ds_read_b128 a[116:119], v3 offset:24656
|
||||
; GCN-NEXT: ds_read_b128 a[112:115], v3 offset:24640
|
||||
; GCN-NEXT: ds_read_b128 a[108:111], v3 offset:24624
|
||||
; GCN-NEXT: ds_read_b128 a[104:107], v3 offset:24608
|
||||
; GCN-NEXT: ds_read_b128 a[100:103], v3 offset:24592
|
||||
; GCN-NEXT: ds_read_b128 a[96:99], v3 offset:24576
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[96:127], v2, v1, a[96:127]
|
||||
; GCN-NEXT: ds_read_b128 a[92:95], v3 offset:49264
|
||||
; GCN-NEXT: ds_read_b128 a[88:91], v3 offset:49248
|
||||
; GCN-NEXT: ds_read_b128 a[84:87], v3 offset:49232
|
||||
; GCN-NEXT: ds_read_b128 a[80:83], v3 offset:49216
|
||||
; GCN-NEXT: ds_read_b128 a[76:79], v3 offset:49200
|
||||
; GCN-NEXT: ds_read_b128 a[72:75], v3 offset:49184
|
||||
; GCN-NEXT: ds_read_b128 a[68:71], v3 offset:49168
|
||||
; GCN-NEXT: ds_read_b128 a[64:67], v3 offset:49152
|
||||
; GCN-NEXT: ds_read_b128 a[92:95], v3 offset:24688
|
||||
; GCN-NEXT: ds_read_b128 a[88:91], v3 offset:24672
|
||||
; GCN-NEXT: ds_read_b128 a[84:87], v3 offset:24656
|
||||
; GCN-NEXT: ds_read_b128 a[80:83], v3 offset:24640
|
||||
; GCN-NEXT: ds_read_b128 a[76:79], v3 offset:24624
|
||||
; GCN-NEXT: ds_read_b128 a[72:75], v3 offset:24608
|
||||
; GCN-NEXT: ds_read_b128 a[68:71], v3 offset:24592
|
||||
; GCN-NEXT: ds_read_b128 a[64:67], v3 offset:24576
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v2, v1, a[64:95]
|
||||
; GCN-NEXT: ds_read_b128 a[60:63], v4 offset:57456
|
||||
; GCN-NEXT: ds_read_b128 a[56:59], v4 offset:57440
|
||||
; GCN-NEXT: ds_read_b128 a[52:55], v4 offset:57424
|
||||
; GCN-NEXT: ds_read_b128 a[48:51], v4 offset:57408
|
||||
; GCN-NEXT: ds_read_b128 a[32:35], v4 offset:57344
|
||||
; GCN-NEXT: ds_read_b128 a[36:39], v4 offset:57360
|
||||
; GCN-NEXT: ds_read_b128 a[40:43], v4 offset:57376
|
||||
; GCN-NEXT: ds_read_b128 a[44:47], v4 offset:57392
|
||||
; GCN-NEXT: ds_read_b128 a[60:63], v3 offset:49264
|
||||
; GCN-NEXT: ds_read_b128 a[56:59], v3 offset:49248
|
||||
; GCN-NEXT: ds_read_b128 a[52:55], v3 offset:49232
|
||||
; GCN-NEXT: ds_read_b128 a[48:51], v3 offset:49216
|
||||
; GCN-NEXT: ds_read_b128 a[44:47], v3 offset:49200
|
||||
; GCN-NEXT: ds_read_b128 a[40:43], v3 offset:49184
|
||||
; GCN-NEXT: ds_read_b128 a[36:39], v3 offset:49168
|
||||
; GCN-NEXT: ds_read_b128 a[32:35], v3 offset:49152
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v2, v1, a[32:63]
|
||||
; GCN-NEXT: ds_write_b128 v0, a[28:31] offset:112
|
||||
; GCN-NEXT: ds_write_b128 v0, a[24:27] offset:96
|
||||
; GCN-NEXT: ds_write_b128 v0, a[20:23] offset:80
|
||||
; GCN-NEXT: ds_write_b128 v0, a[16:19] offset:64
|
||||
; GCN-NEXT: ds_write_b128 v0, a[12:15] offset:48
|
||||
; GCN-NEXT: ds_write_b128 v0, a[8:11] offset:32
|
||||
; GCN-NEXT: ds_write_b128 v0, a[4:7] offset:16
|
||||
; GCN-NEXT: ds_write_b128 v0, a[0:3]
|
||||
; GCN-NEXT: ds_read_b128 a[28:31], v4 offset:57456
|
||||
; GCN-NEXT: ds_read_b128 a[24:27], v4 offset:57440
|
||||
; GCN-NEXT: ds_read_b128 a[20:23], v4 offset:57424
|
||||
; GCN-NEXT: ds_read_b128 a[16:19], v4 offset:57408
|
||||
; GCN-NEXT: ds_read_b128 a[0:3], v4 offset:57344
|
||||
; GCN-NEXT: ds_read_b128 a[4:7], v4 offset:57360
|
||||
; GCN-NEXT: ds_read_b128 a[8:11], v4 offset:57376
|
||||
; GCN-NEXT: ds_read_b128 a[12:15], v4 offset:57392
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; GCN-NEXT: ds_write_b128 v0, a[156:159] offset:112
|
||||
; GCN-NEXT: ds_write_b128 v0, a[152:155] offset:96
|
||||
; GCN-NEXT: ds_write_b128 v0, a[148:151] offset:80
|
||||
; GCN-NEXT: ds_write_b128 v0, a[144:147] offset:64
|
||||
; GCN-NEXT: ds_write_b128 v0, a[140:143] offset:48
|
||||
; GCN-NEXT: ds_write_b128 v0, a[136:139] offset:32
|
||||
; GCN-NEXT: ds_write_b128 v0, a[132:135] offset:16
|
||||
; GCN-NEXT: ds_write_b128 v0, a[128:131]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GCN-NEXT: ds_write_b128 v0, a[152:155] offset:8288
|
||||
; GCN-NEXT: ds_write_b128 v0, a[156:159] offset:8304
|
||||
; GCN-NEXT: ds_write_b128 v0, a[144:147] offset:8256
|
||||
; GCN-NEXT: ds_write_b128 v0, a[148:151] offset:8272
|
||||
; GCN-NEXT: ds_write_b128 v0, a[136:139] offset:8224
|
||||
; GCN-NEXT: ds_write_b128 v0, a[140:143] offset:8240
|
||||
; GCN-NEXT: ds_write_b128 v0, a[128:131] offset:8192
|
||||
; GCN-NEXT: ds_write_b128 v0, a[132:135] offset:8208
|
||||
; GCN-NEXT: ds_write_b128 v0, a[120:123] offset:16480
|
||||
; GCN-NEXT: ds_write_b128 v0, a[124:127] offset:16496
|
||||
; GCN-NEXT: ds_write_b128 v0, a[112:115] offset:16448
|
||||
; GCN-NEXT: ds_write_b128 v0, a[116:119] offset:16464
|
||||
; GCN-NEXT: ds_write_b128 v0, a[104:107] offset:16416
|
||||
; GCN-NEXT: ds_write_b128 v0, a[108:111] offset:16432
|
||||
; GCN-NEXT: ds_write_b128 v0, a[96:99] offset:16384
|
||||
; GCN-NEXT: ds_write_b128 v0, a[100:103] offset:16400
|
||||
; GCN-NEXT: ds_write_b128 v0, a[88:91] offset:24672
|
||||
; GCN-NEXT: ds_write_b128 v0, a[92:95] offset:24688
|
||||
; GCN-NEXT: ds_write_b128 v0, a[80:83] offset:24640
|
||||
; GCN-NEXT: ds_write_b128 v0, a[84:87] offset:24656
|
||||
; GCN-NEXT: ds_write_b128 v0, a[72:75] offset:24608
|
||||
; GCN-NEXT: ds_write_b128 v0, a[76:79] offset:24624
|
||||
; GCN-NEXT: ds_write_b128 v0, a[64:67] offset:24576
|
||||
; GCN-NEXT: ds_write_b128 v0, a[68:71] offset:24592
|
||||
; GCN-NEXT: ds_write_b128 v0, a[56:59] offset:32864
|
||||
; GCN-NEXT: ds_write_b128 v0, a[60:63] offset:32880
|
||||
; GCN-NEXT: ds_write_b128 v0, a[48:51] offset:32832
|
||||
; GCN-NEXT: ds_write_b128 v0, a[52:55] offset:32848
|
||||
; GCN-NEXT: ds_write_b128 v0, a[40:43] offset:32800
|
||||
; GCN-NEXT: ds_write_b128 v0, a[44:47] offset:32816
|
||||
; GCN-NEXT: ds_write_b128 v0, a[32:35] offset:32768
|
||||
; GCN-NEXT: ds_write_b128 v0, a[36:39] offset:32784
|
||||
; GCN-NEXT: ds_write_b128 v0, a[120:123] offset:8288
|
||||
; GCN-NEXT: ds_write_b128 v0, a[124:127] offset:8304
|
||||
; GCN-NEXT: ds_write_b128 v0, a[112:115] offset:8256
|
||||
; GCN-NEXT: ds_write_b128 v0, a[116:119] offset:8272
|
||||
; GCN-NEXT: ds_write_b128 v0, a[104:107] offset:8224
|
||||
; GCN-NEXT: ds_write_b128 v0, a[108:111] offset:8240
|
||||
; GCN-NEXT: ds_write_b128 v0, a[96:99] offset:8192
|
||||
; GCN-NEXT: ds_write_b128 v0, a[100:103] offset:8208
|
||||
; GCN-NEXT: ds_write_b128 v0, a[88:91] offset:16480
|
||||
; GCN-NEXT: ds_write_b128 v0, a[92:95] offset:16496
|
||||
; GCN-NEXT: ds_write_b128 v0, a[80:83] offset:16448
|
||||
; GCN-NEXT: ds_write_b128 v0, a[84:87] offset:16464
|
||||
; GCN-NEXT: ds_write_b128 v0, a[72:75] offset:16416
|
||||
; GCN-NEXT: ds_write_b128 v0, a[76:79] offset:16432
|
||||
; GCN-NEXT: ds_write_b128 v0, a[64:67] offset:16384
|
||||
; GCN-NEXT: ds_write_b128 v0, a[68:71] offset:16400
|
||||
; GCN-NEXT: ds_write_b128 v0, a[56:59] offset:24672
|
||||
; GCN-NEXT: ds_write_b128 v0, a[60:63] offset:24688
|
||||
; GCN-NEXT: ds_write_b128 v0, a[48:51] offset:24640
|
||||
; GCN-NEXT: ds_write_b128 v0, a[52:55] offset:24656
|
||||
; GCN-NEXT: ds_write_b128 v0, a[40:43] offset:24608
|
||||
; GCN-NEXT: ds_write_b128 v0, a[44:47] offset:24624
|
||||
; GCN-NEXT: ds_write_b128 v0, a[32:35] offset:24576
|
||||
; GCN-NEXT: ds_write_b128 v0, a[36:39] offset:24592
|
||||
; GCN-NEXT: ds_write_b128 v0, a[24:27] offset:32864
|
||||
; GCN-NEXT: ds_write_b128 v0, a[28:31] offset:32880
|
||||
; GCN-NEXT: ds_write_b128 v0, a[16:19] offset:32832
|
||||
; GCN-NEXT: ds_write_b128 v0, a[20:23] offset:32848
|
||||
; GCN-NEXT: ds_write_b128 v0, a[8:11] offset:32800
|
||||
; GCN-NEXT: ds_write_b128 v0, a[12:15] offset:32816
|
||||
; GCN-NEXT: ds_write_b128 v0, a[0:3] offset:32768
|
||||
; GCN-NEXT: ds_write_b128 v0, a[4:7] offset:32784
|
||||
; GCN-NEXT: s_endpgm
|
||||
entry:
|
||||
call void @llvm.amdgcn.iglp.opt(i32 1)
|
||||
@ -294,17 +294,17 @@ define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias
|
||||
; GCN-NEXT: ; iglp_opt mask(0x00000000)
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_u32_e32 v1, s0, v0
|
||||
; GCN-NEXT: ds_read_b32 v1, v1
|
||||
; GCN-NEXT: ds_read_b32 v2, v1
|
||||
; GCN-NEXT: v_add_u32_e32 v0, s1, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v0, v1
|
||||
; GCN-NEXT: ds_write_b32 v0, v2
|
||||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: ds_read_b32 v0, v2 offset:256
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GCN-NEXT: ds_read_b32 v1, v1 offset:256
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v1, v0 offset:256
|
||||
; GCN-NEXT: ds_write_b32 v0, v1 offset:256
|
||||
; GCN-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
||||
@ -26,7 +26,6 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-MINREG-NEXT: v_add_u32_e32 v5, s1, v0
|
||||
; GCN-MINREG-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GCN-MINREG-NEXT: v_add_u32_e32 v3, 0x6000, v4
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: ds_write_b128 v5, a[28:31] offset:112
|
||||
@ -48,7 +47,6 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-MINREG-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: s_nop 2
|
||||
@ -71,7 +69,6 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-MINREG-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: s_nop 2
|
||||
@ -94,7 +91,6 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-MINREG-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: s_nop 2
|
||||
@ -117,7 +113,6 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-MINREG-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: s_nop 2
|
||||
@ -130,6 +125,11 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-MINREG-NEXT: ds_write_b128 v0, a[0:3] offset:32768
|
||||
; GCN-MINREG-NEXT: ds_write_b128 v0, a[4:7] offset:32784
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_endpgm
|
||||
;
|
||||
; GCN-MAXOCC-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
|
||||
@ -495,8 +495,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave_spl
|
||||
; GCN-MINREG-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
|
||||
; GCN-MINREG-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: s_nop 1
|
||||
; GCN-MINREG-NEXT: ds_write_b128 v2, a[24:27] offset:8288
|
||||
@ -520,7 +520,6 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave_spl
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-MINREG-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
|
||||
; GCN-MINREG-NEXT: v_add_u32_e32 v4, 0x6000, v3
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: s_nop 1
|
||||
@ -543,7 +542,6 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave_spl
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-MINREG-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: s_nop 2
|
||||
@ -566,7 +564,6 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave_spl
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-MINREG-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_nop 15
|
||||
; GCN-MINREG-NEXT: s_nop 2
|
||||
@ -579,6 +576,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave_spl
|
||||
; GCN-MINREG-NEXT: ds_write_b128 v2, a[4:7] offset:32784
|
||||
; GCN-MINREG-NEXT: ds_write_b128 v2, a[0:3] offset:32768
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000200) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: ; sched_group_barrier mask(0x00000100) size(8) SyncID(0)
|
||||
; GCN-MINREG-NEXT: s_endpgm
|
||||
;
|
||||
; GCN-MAXOCC-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave_split_region:
|
||||
|
||||
@ -636,48 +636,48 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr ad
|
||||
; GCN-NEXT: ds_read_b128 a[136:139], v3 offset:32
|
||||
; GCN-NEXT: ds_read_b128 a[140:143], v3 offset:48
|
||||
; GCN-NEXT: v_add_u32_e32 v4, 0x6000, v3
|
||||
; GCN-NEXT: ds_read_b128 a[28:31], v3 offset:8304
|
||||
; GCN-NEXT: ds_read_b128 a[24:27], v3 offset:8288
|
||||
; GCN-NEXT: ds_read_b128 a[20:23], v3 offset:8272
|
||||
; GCN-NEXT: ds_read_b128 a[16:19], v3 offset:8256
|
||||
; GCN-NEXT: ds_read_b128 a[12:15], v3 offset:8240
|
||||
; GCN-NEXT: ds_read_b128 a[8:11], v3 offset:8224
|
||||
; GCN-NEXT: ds_read_b128 a[4:7], v3 offset:8208
|
||||
; GCN-NEXT: ds_read_b128 a[0:3], v3 offset:8192
|
||||
; GCN-NEXT: ds_read_b128 a[124:127], v3 offset:24688
|
||||
; GCN-NEXT: ds_read_b128 a[120:123], v3 offset:24672
|
||||
; GCN-NEXT: ds_read_b128 a[116:119], v3 offset:24656
|
||||
; GCN-NEXT: ds_read_b128 a[112:115], v3 offset:24640
|
||||
; GCN-NEXT: ds_read_b128 a[108:111], v3 offset:24624
|
||||
; GCN-NEXT: ds_read_b128 a[104:107], v3 offset:24608
|
||||
; GCN-NEXT: ds_read_b128 a[100:103], v3 offset:24592
|
||||
; GCN-NEXT: ds_read_b128 a[96:99], v3 offset:24576
|
||||
; GCN-NEXT: ds_read_b128 a[92:95], v3 offset:49264
|
||||
; GCN-NEXT: ds_read_b128 a[88:91], v3 offset:49248
|
||||
; GCN-NEXT: ds_read_b128 a[84:87], v3 offset:49232
|
||||
; GCN-NEXT: ds_read_b128 a[80:83], v3 offset:49216
|
||||
; GCN-NEXT: ds_read_b128 a[76:79], v3 offset:49200
|
||||
; GCN-NEXT: ds_read_b128 a[72:75], v3 offset:49184
|
||||
; GCN-NEXT: ds_read_b128 a[68:71], v3 offset:49168
|
||||
; GCN-NEXT: ds_read_b128 a[64:67], v3 offset:49152
|
||||
; GCN-NEXT: ds_read_b128 a[60:63], v4 offset:57456
|
||||
; GCN-NEXT: ds_read_b128 a[56:59], v4 offset:57440
|
||||
; GCN-NEXT: ds_read_b128 a[52:55], v4 offset:57424
|
||||
; GCN-NEXT: ds_read_b128 a[48:51], v4 offset:57408
|
||||
; GCN-NEXT: ds_read_b128 a[32:35], v4 offset:57344
|
||||
; GCN-NEXT: ds_read_b128 a[36:39], v4 offset:57360
|
||||
; GCN-NEXT: ds_read_b128 a[40:43], v4 offset:57376
|
||||
; GCN-NEXT: ds_read_b128 a[44:47], v4 offset:57392
|
||||
; GCN-NEXT: ds_read_b128 a[124:127], v3 offset:8304
|
||||
; GCN-NEXT: ds_read_b128 a[120:123], v3 offset:8288
|
||||
; GCN-NEXT: ds_read_b128 a[116:119], v3 offset:8272
|
||||
; GCN-NEXT: ds_read_b128 a[112:115], v3 offset:8256
|
||||
; GCN-NEXT: ds_read_b128 a[108:111], v3 offset:8240
|
||||
; GCN-NEXT: ds_read_b128 a[104:107], v3 offset:8224
|
||||
; GCN-NEXT: ds_read_b128 a[100:103], v3 offset:8208
|
||||
; GCN-NEXT: ds_read_b128 a[96:99], v3 offset:8192
|
||||
; GCN-NEXT: ds_read_b128 a[92:95], v3 offset:24688
|
||||
; GCN-NEXT: ds_read_b128 a[88:91], v3 offset:24672
|
||||
; GCN-NEXT: ds_read_b128 a[84:87], v3 offset:24656
|
||||
; GCN-NEXT: ds_read_b128 a[80:83], v3 offset:24640
|
||||
; GCN-NEXT: ds_read_b128 a[76:79], v3 offset:24624
|
||||
; GCN-NEXT: ds_read_b128 a[72:75], v3 offset:24608
|
||||
; GCN-NEXT: ds_read_b128 a[68:71], v3 offset:24592
|
||||
; GCN-NEXT: ds_read_b128 a[64:67], v3 offset:24576
|
||||
; GCN-NEXT: ds_read_b128 a[60:63], v3 offset:49264
|
||||
; GCN-NEXT: ds_read_b128 a[56:59], v3 offset:49248
|
||||
; GCN-NEXT: ds_read_b128 a[52:55], v3 offset:49232
|
||||
; GCN-NEXT: ds_read_b128 a[48:51], v3 offset:49216
|
||||
; GCN-NEXT: ds_read_b128 a[44:47], v3 offset:49200
|
||||
; GCN-NEXT: ds_read_b128 a[40:43], v3 offset:49184
|
||||
; GCN-NEXT: ds_read_b128 a[36:39], v3 offset:49168
|
||||
; GCN-NEXT: ds_read_b128 a[32:35], v3 offset:49152
|
||||
; GCN-NEXT: ds_read_b128 a[28:31], v4 offset:57456
|
||||
; GCN-NEXT: ds_read_b128 a[24:27], v4 offset:57440
|
||||
; GCN-NEXT: ds_read_b128 a[20:23], v4 offset:57424
|
||||
; GCN-NEXT: ds_read_b128 a[16:19], v4 offset:57408
|
||||
; GCN-NEXT: ds_read_b128 a[0:3], v4 offset:57344
|
||||
; GCN-NEXT: ds_read_b128 a[4:7], v4 offset:57360
|
||||
; GCN-NEXT: ds_read_b128 a[8:11], v4 offset:57376
|
||||
; GCN-NEXT: ds_read_b128 a[12:15], v4 offset:57392
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(14)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v2, v1, a[128:159]
|
||||
; GCN-NEXT: v_add_u32_e32 v0, s1, v0
|
||||
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(40) SyncID(0)
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(8)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v2, v1, a[64:95]
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[96:127], v2, v1, a[96:127]
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v2, v1, a[64:95]
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(8)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v2, v1, a[32:63]
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; GCN-NEXT: s_nop 11
|
||||
; GCN-NEXT: ds_write_b128 v0, a[156:159] offset:112
|
||||
; GCN-NEXT: ds_write_b128 v0, a[152:155] offset:96
|
||||
@ -688,38 +688,38 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr ad
|
||||
; GCN-NEXT: ds_write_b128 v0, a[132:135] offset:16
|
||||
; GCN-NEXT: ds_write_b128 v0, a[128:131]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GCN-NEXT: ds_write_b128 v0, a[24:27] offset:8288
|
||||
; GCN-NEXT: ds_write_b128 v0, a[28:31] offset:8304
|
||||
; GCN-NEXT: ds_write_b128 v0, a[16:19] offset:8256
|
||||
; GCN-NEXT: ds_write_b128 v0, a[20:23] offset:8272
|
||||
; GCN-NEXT: ds_write_b128 v0, a[8:11] offset:8224
|
||||
; GCN-NEXT: ds_write_b128 v0, a[12:15] offset:8240
|
||||
; GCN-NEXT: ds_write_b128 v0, a[0:3] offset:8192
|
||||
; GCN-NEXT: ds_write_b128 v0, a[4:7] offset:8208
|
||||
; GCN-NEXT: ds_write_b128 v0, a[120:123] offset:16480
|
||||
; GCN-NEXT: ds_write_b128 v0, a[124:127] offset:16496
|
||||
; GCN-NEXT: ds_write_b128 v0, a[112:115] offset:16448
|
||||
; GCN-NEXT: ds_write_b128 v0, a[116:119] offset:16464
|
||||
; GCN-NEXT: ds_write_b128 v0, a[104:107] offset:16416
|
||||
; GCN-NEXT: ds_write_b128 v0, a[108:111] offset:16432
|
||||
; GCN-NEXT: ds_write_b128 v0, a[96:99] offset:16384
|
||||
; GCN-NEXT: ds_write_b128 v0, a[100:103] offset:16400
|
||||
; GCN-NEXT: ds_write_b128 v0, a[88:91] offset:24672
|
||||
; GCN-NEXT: ds_write_b128 v0, a[92:95] offset:24688
|
||||
; GCN-NEXT: ds_write_b128 v0, a[80:83] offset:24640
|
||||
; GCN-NEXT: ds_write_b128 v0, a[84:87] offset:24656
|
||||
; GCN-NEXT: ds_write_b128 v0, a[72:75] offset:24608
|
||||
; GCN-NEXT: ds_write_b128 v0, a[76:79] offset:24624
|
||||
; GCN-NEXT: ds_write_b128 v0, a[64:67] offset:24576
|
||||
; GCN-NEXT: ds_write_b128 v0, a[68:71] offset:24592
|
||||
; GCN-NEXT: ds_write_b128 v0, a[56:59] offset:32864
|
||||
; GCN-NEXT: ds_write_b128 v0, a[60:63] offset:32880
|
||||
; GCN-NEXT: ds_write_b128 v0, a[48:51] offset:32832
|
||||
; GCN-NEXT: ds_write_b128 v0, a[52:55] offset:32848
|
||||
; GCN-NEXT: ds_write_b128 v0, a[40:43] offset:32800
|
||||
; GCN-NEXT: ds_write_b128 v0, a[44:47] offset:32816
|
||||
; GCN-NEXT: ds_write_b128 v0, a[32:35] offset:32768
|
||||
; GCN-NEXT: ds_write_b128 v0, a[36:39] offset:32784
|
||||
; GCN-NEXT: ds_write_b128 v0, a[120:123] offset:8288
|
||||
; GCN-NEXT: ds_write_b128 v0, a[124:127] offset:8304
|
||||
; GCN-NEXT: ds_write_b128 v0, a[112:115] offset:8256
|
||||
; GCN-NEXT: ds_write_b128 v0, a[116:119] offset:8272
|
||||
; GCN-NEXT: ds_write_b128 v0, a[104:107] offset:8224
|
||||
; GCN-NEXT: ds_write_b128 v0, a[108:111] offset:8240
|
||||
; GCN-NEXT: ds_write_b128 v0, a[96:99] offset:8192
|
||||
; GCN-NEXT: ds_write_b128 v0, a[100:103] offset:8208
|
||||
; GCN-NEXT: ds_write_b128 v0, a[88:91] offset:16480
|
||||
; GCN-NEXT: ds_write_b128 v0, a[92:95] offset:16496
|
||||
; GCN-NEXT: ds_write_b128 v0, a[80:83] offset:16448
|
||||
; GCN-NEXT: ds_write_b128 v0, a[84:87] offset:16464
|
||||
; GCN-NEXT: ds_write_b128 v0, a[72:75] offset:16416
|
||||
; GCN-NEXT: ds_write_b128 v0, a[76:79] offset:16432
|
||||
; GCN-NEXT: ds_write_b128 v0, a[64:67] offset:16384
|
||||
; GCN-NEXT: ds_write_b128 v0, a[68:71] offset:16400
|
||||
; GCN-NEXT: ds_write_b128 v0, a[56:59] offset:24672
|
||||
; GCN-NEXT: ds_write_b128 v0, a[60:63] offset:24688
|
||||
; GCN-NEXT: ds_write_b128 v0, a[48:51] offset:24640
|
||||
; GCN-NEXT: ds_write_b128 v0, a[52:55] offset:24656
|
||||
; GCN-NEXT: ds_write_b128 v0, a[40:43] offset:24608
|
||||
; GCN-NEXT: ds_write_b128 v0, a[44:47] offset:24624
|
||||
; GCN-NEXT: ds_write_b128 v0, a[32:35] offset:24576
|
||||
; GCN-NEXT: ds_write_b128 v0, a[36:39] offset:24592
|
||||
; GCN-NEXT: ds_write_b128 v0, a[24:27] offset:32864
|
||||
; GCN-NEXT: ds_write_b128 v0, a[28:31] offset:32880
|
||||
; GCN-NEXT: ds_write_b128 v0, a[16:19] offset:32832
|
||||
; GCN-NEXT: ds_write_b128 v0, a[20:23] offset:32848
|
||||
; GCN-NEXT: ds_write_b128 v0, a[8:11] offset:32800
|
||||
; GCN-NEXT: ds_write_b128 v0, a[12:15] offset:32816
|
||||
; GCN-NEXT: ds_write_b128 v0, a[0:3] offset:32768
|
||||
; GCN-NEXT: ds_write_b128 v0, a[4:7] offset:32784
|
||||
; GCN-NEXT: ; sched_group_barrier mask(0x00000008) size(5) SyncID(0)
|
||||
; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(40) SyncID(0)
|
||||
; GCN-NEXT: s_endpgm
|
||||
@ -742,48 +742,48 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr ad
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[136:139], v3 offset:32
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[140:143], v3 offset:48
|
||||
; EXACTCUTOFF-NEXT: v_add_u32_e32 v4, 0x6000, v3
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[28:31], v3 offset:8304
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[24:27], v3 offset:8288
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[20:23], v3 offset:8272
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[16:19], v3 offset:8256
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[12:15], v3 offset:8240
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[8:11], v3 offset:8224
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[4:7], v3 offset:8208
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[0:3], v3 offset:8192
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[124:127], v3 offset:24688
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[120:123], v3 offset:24672
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[116:119], v3 offset:24656
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[112:115], v3 offset:24640
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[108:111], v3 offset:24624
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[104:107], v3 offset:24608
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[100:103], v3 offset:24592
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[96:99], v3 offset:24576
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[92:95], v3 offset:49264
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[88:91], v3 offset:49248
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[84:87], v3 offset:49232
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[80:83], v3 offset:49216
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[76:79], v3 offset:49200
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[72:75], v3 offset:49184
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[68:71], v3 offset:49168
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[64:67], v3 offset:49152
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[60:63], v4 offset:57456
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[56:59], v4 offset:57440
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[52:55], v4 offset:57424
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[48:51], v4 offset:57408
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[32:35], v4 offset:57344
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[36:39], v4 offset:57360
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[40:43], v4 offset:57376
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[44:47], v4 offset:57392
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[124:127], v3 offset:8304
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[120:123], v3 offset:8288
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[116:119], v3 offset:8272
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[112:115], v3 offset:8256
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[108:111], v3 offset:8240
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[104:107], v3 offset:8224
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[100:103], v3 offset:8208
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[96:99], v3 offset:8192
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[92:95], v3 offset:24688
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[88:91], v3 offset:24672
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[84:87], v3 offset:24656
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[80:83], v3 offset:24640
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[76:79], v3 offset:24624
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[72:75], v3 offset:24608
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[68:71], v3 offset:24592
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[64:67], v3 offset:24576
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[60:63], v3 offset:49264
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[56:59], v3 offset:49248
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[52:55], v3 offset:49232
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[48:51], v3 offset:49216
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[44:47], v3 offset:49200
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[40:43], v3 offset:49184
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[36:39], v3 offset:49168
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[32:35], v3 offset:49152
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[28:31], v4 offset:57456
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[24:27], v4 offset:57440
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[20:23], v4 offset:57424
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[16:19], v4 offset:57408
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[0:3], v4 offset:57344
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[4:7], v4 offset:57360
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[8:11], v4 offset:57376
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[12:15], v4 offset:57392
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(14)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v2, v1, a[128:159]
|
||||
; EXACTCUTOFF-NEXT: v_add_u32_e32 v0, s1, v0
|
||||
; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000100) size(40) SyncID(0)
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(8)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v2, v1, a[64:95]
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[96:127], v2, v1, a[96:127]
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v2, v1, a[64:95]
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(8)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v2, v1, a[32:63]
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v1, a[0:31]
|
||||
; EXACTCUTOFF-NEXT: s_nop 11
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[156:159] offset:112
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[152:155] offset:96
|
||||
@ -794,38 +794,38 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr ad
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[132:135] offset:16
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[128:131]
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v0, s1
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[24:27] offset:8288
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[28:31] offset:8304
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[16:19] offset:8256
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[20:23] offset:8272
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[8:11] offset:8224
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[12:15] offset:8240
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[0:3] offset:8192
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[4:7] offset:8208
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[120:123] offset:16480
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[124:127] offset:16496
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[112:115] offset:16448
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[116:119] offset:16464
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[104:107] offset:16416
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[108:111] offset:16432
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[96:99] offset:16384
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[100:103] offset:16400
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[88:91] offset:24672
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[92:95] offset:24688
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[80:83] offset:24640
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[84:87] offset:24656
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[72:75] offset:24608
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[76:79] offset:24624
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[64:67] offset:24576
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[68:71] offset:24592
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[56:59] offset:32864
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[60:63] offset:32880
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[48:51] offset:32832
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[52:55] offset:32848
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[40:43] offset:32800
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[44:47] offset:32816
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[32:35] offset:32768
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[36:39] offset:32784
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[120:123] offset:8288
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[124:127] offset:8304
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[112:115] offset:8256
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[116:119] offset:8272
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[104:107] offset:8224
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[108:111] offset:8240
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[96:99] offset:8192
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[100:103] offset:8208
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[88:91] offset:16480
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[92:95] offset:16496
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[80:83] offset:16448
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[84:87] offset:16464
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[72:75] offset:16416
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[76:79] offset:16432
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[64:67] offset:16384
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[68:71] offset:16400
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[56:59] offset:24672
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[60:63] offset:24688
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[48:51] offset:24640
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[52:55] offset:24656
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[40:43] offset:24608
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[44:47] offset:24624
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[32:35] offset:24576
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[36:39] offset:24592
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[24:27] offset:32864
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[28:31] offset:32880
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[16:19] offset:32832
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[20:23] offset:32848
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[8:11] offset:32800
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[12:15] offset:32816
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[0:3] offset:32768
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[4:7] offset:32784
|
||||
; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000008) size(5) SyncID(0)
|
||||
; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000200) size(40) SyncID(0)
|
||||
; EXACTCUTOFF-NEXT: s_endpgm
|
||||
@ -1202,57 +1202,57 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v5, v5
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; GCN-NEXT: v_add_u32_e32 v1, s6, v0
|
||||
; GCN-NEXT: ds_read_b128 a[124:127], v1 offset:112
|
||||
; GCN-NEXT: ds_read_b128 a[120:123], v1 offset:96
|
||||
; GCN-NEXT: ds_read_b128 a[116:119], v1 offset:80
|
||||
; GCN-NEXT: ds_read_b128 a[112:115], v1 offset:64
|
||||
; GCN-NEXT: ds_read_b128 a[96:99], v1
|
||||
; GCN-NEXT: ds_read_b128 a[100:103], v1 offset:16
|
||||
; GCN-NEXT: ds_read_b128 a[104:107], v1 offset:32
|
||||
; GCN-NEXT: ds_read_b128 a[108:111], v1 offset:48
|
||||
; GCN-NEXT: v_mov_b32_e32 v9, 1.0
|
||||
; GCN-NEXT: ds_read_b128 a[156:159], v1 offset:112
|
||||
; GCN-NEXT: ds_read_b128 a[152:155], v1 offset:96
|
||||
; GCN-NEXT: ds_read_b128 a[148:151], v1 offset:80
|
||||
; GCN-NEXT: ds_read_b128 a[144:147], v1 offset:64
|
||||
; GCN-NEXT: ds_read_b128 a[128:131], v1
|
||||
; GCN-NEXT: ds_read_b128 a[132:135], v1 offset:16
|
||||
; GCN-NEXT: ds_read_b128 a[136:139], v1 offset:32
|
||||
; GCN-NEXT: ds_read_b128 a[140:143], v1 offset:48
|
||||
; GCN-NEXT: v_mul_f32_e32 v9, s1, v3
|
||||
; GCN-NEXT: v_mov_b32_e32 v12, 1.0
|
||||
; GCN-NEXT: v_ldexp_f32 v4, v4, v5
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
|
||||
; GCN-NEXT: v_mul_f32_e32 v10, s1, v3
|
||||
; GCN-NEXT: v_rndne_f32_e32 v10, v9
|
||||
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v5
|
||||
; GCN-NEXT: v_mov_b32_e32 v6, 0x42b17218
|
||||
; GCN-NEXT: v_rndne_f32_e32 v11, v10
|
||||
; GCN-NEXT: v_sub_f32_e32 v11, v9, v10
|
||||
; GCN-NEXT: v_fma_f32 v9, s1, v3, -v9
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
|
||||
; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v6
|
||||
; GCN-NEXT: v_mov_b32_e32 v8, 0x7f800000
|
||||
; GCN-NEXT: v_sub_f32_e32 v12, v10, v11
|
||||
; GCN-NEXT: v_fma_f32 v10, s1, v3, -v10
|
||||
; GCN-NEXT: v_fmac_f32_e32 v9, s1, v7
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
|
||||
; GCN-NEXT: v_fmac_f32_e32 v10, s1, v7
|
||||
; GCN-NEXT: ds_read_b128 a[28:31], v1 offset:8304
|
||||
; GCN-NEXT: v_add_f32_e32 v9, v11, v9
|
||||
; GCN-NEXT: ds_read_b128 a[124:127], v1 offset:8304
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(1)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[96:127], v9, v4, a[96:127]
|
||||
; GCN-NEXT: v_add_f32_e32 v4, v12, v10
|
||||
; GCN-NEXT: v_exp_f32_e32 v4, v4
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v10, v11
|
||||
; GCN-NEXT: ds_read_b128 a[24:27], v1 offset:8288
|
||||
; GCN-NEXT: ds_read_b128 a[20:23], v1 offset:8272
|
||||
; GCN-NEXT: ds_read_b128 a[16:19], v1 offset:8256
|
||||
; GCN-NEXT: ds_read_b128 a[12:15], v1 offset:8240
|
||||
; GCN-NEXT: ds_read_b128 a[8:11], v1 offset:8224
|
||||
; GCN-NEXT: ds_read_b128 a[4:7], v1 offset:8208
|
||||
; GCN-NEXT: ds_read_b128 a[0:3], v1 offset:8192
|
||||
; GCN-NEXT: v_ldexp_f32 v4, v4, v10
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v12, v4, a[128:159]
|
||||
; GCN-NEXT: v_exp_f32_e32 v4, v9
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v9, v10
|
||||
; GCN-NEXT: ds_read_b128 a[120:123], v1 offset:8288
|
||||
; GCN-NEXT: ds_read_b128 a[116:119], v1 offset:8272
|
||||
; GCN-NEXT: ds_read_b128 a[112:115], v1 offset:8256
|
||||
; GCN-NEXT: ds_read_b128 a[108:111], v1 offset:8240
|
||||
; GCN-NEXT: ds_read_b128 a[104:107], v1 offset:8224
|
||||
; GCN-NEXT: ds_read_b128 a[100:103], v1 offset:8208
|
||||
; GCN-NEXT: ds_read_b128 a[96:99], v1 offset:8192
|
||||
; GCN-NEXT: v_ldexp_f32 v4, v4, v9
|
||||
; GCN-NEXT: v_mul_f32_e32 v9, s2, v3
|
||||
; GCN-NEXT: v_rndne_f32_e32 v10, v9
|
||||
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v5
|
||||
; GCN-NEXT: v_sub_f32_e32 v11, v9, v10
|
||||
; GCN-NEXT: v_fma_f32 v9, s2, v3, -v9
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
|
||||
; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v6
|
||||
; GCN-NEXT: v_fmac_f32_e32 v9, s2, v7
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
|
||||
; GCN-NEXT: v_mul_f32_e32 v10, s2, v3
|
||||
; GCN-NEXT: v_rndne_f32_e32 v11, v10
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v9, v4, a[0:31]
|
||||
; GCN-NEXT: v_fma_f32 v4, s2, v3, -v10
|
||||
; GCN-NEXT: v_sub_f32_e32 v12, v10, v11
|
||||
; GCN-NEXT: v_fmac_f32_e32 v4, s2, v7
|
||||
; GCN-NEXT: v_add_f32_e32 v4, v12, v4
|
||||
; GCN-NEXT: v_exp_f32_e32 v4, v4
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v10, v11
|
||||
; GCN-NEXT: v_add_f32_e32 v9, v11, v9
|
||||
; GCN-NEXT: ds_read_b128 a[92:95], v1 offset:24688
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(1)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[96:127], v12, v4, a[96:127]
|
||||
; GCN-NEXT: v_exp_f32_e32 v4, v9
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v9, v10
|
||||
; GCN-NEXT: ds_read_b128 a[88:91], v1 offset:24672
|
||||
; GCN-NEXT: ds_read_b128 a[84:87], v1 offset:24656
|
||||
; GCN-NEXT: ds_read_b128 a[80:83], v1 offset:24640
|
||||
@ -1269,60 +1269,60 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; GCN-NEXT: ds_read_b128 a[40:43], v1 offset:49184
|
||||
; GCN-NEXT: ds_read_b128 a[36:39], v1 offset:49168
|
||||
; GCN-NEXT: ds_read_b128 a[32:35], v1 offset:49152
|
||||
; GCN-NEXT: v_ldexp_f32 v1, v4, v10
|
||||
; GCN-NEXT: v_ldexp_f32 v1, v4, v9
|
||||
; GCN-NEXT: v_mul_f32_e32 v4, s3, v3
|
||||
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5
|
||||
; GCN-NEXT: v_rndne_f32_e32 v9, v4
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
|
||||
; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v6
|
||||
; GCN-NEXT: v_mul_f32_e32 v4, s3, v3
|
||||
; GCN-NEXT: v_sub_f32_e32 v10, v4, v9
|
||||
; GCN-NEXT: v_fma_f32 v4, s3, v3, -v4
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||
; GCN-NEXT: v_rndne_f32_e32 v10, v4
|
||||
; GCN-NEXT: v_fmac_f32_e32 v4, s3, v7
|
||||
; GCN-NEXT: s_load_dword s8, s[4:5], 0x54
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v9, v1, a[64:95]
|
||||
; GCN-NEXT: v_sub_f32_e32 v1, v4, v10
|
||||
; GCN-NEXT: v_fma_f32 v4, s3, v3, -v4
|
||||
; GCN-NEXT: v_fmac_f32_e32 v4, s3, v7
|
||||
; GCN-NEXT: v_add_f32_e32 v1, v1, v4
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v12, v1, a[64:95]
|
||||
; GCN-NEXT: v_add_f32_e32 v1, v10, v4
|
||||
; GCN-NEXT: v_exp_f32_e32 v1, v1
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v4, v10
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v4, v9
|
||||
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5
|
||||
; GCN-NEXT: ds_read_b128 a[156:159], v2 offset:57456
|
||||
; GCN-NEXT: ds_read_b128 a[152:155], v2 offset:57440
|
||||
; GCN-NEXT: ds_read_b128 a[28:31], v2 offset:57456
|
||||
; GCN-NEXT: ds_read_b128 a[24:27], v2 offset:57440
|
||||
; GCN-NEXT: v_ldexp_f32 v1, v1, v4
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
|
||||
; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v6
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||
; GCN-NEXT: v_mul_f32_e32 v4, s8, v3
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||
; GCN-NEXT: v_rndne_f32_e32 v9, v4
|
||||
; GCN-NEXT: v_fma_f32 v3, s8, v3, -v4
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v9, v1, a[32:63]
|
||||
; GCN-NEXT: v_rndne_f32_e32 v1, v4
|
||||
; GCN-NEXT: v_sub_f32_e32 v10, v4, v1
|
||||
; GCN-NEXT: v_fmac_f32_e32 v3, s8, v7
|
||||
; GCN-NEXT: v_add_f32_e32 v3, v10, v3
|
||||
; GCN-NEXT: v_exp_f32_e32 v3, v3
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v1, v1
|
||||
; GCN-NEXT: ds_read_b128 a[148:151], v2 offset:57424
|
||||
; GCN-NEXT: ds_read_b128 a[144:147], v2 offset:57408
|
||||
; GCN-NEXT: ds_read_b128 a[128:131], v2 offset:57344
|
||||
; GCN-NEXT: ds_read_b128 a[132:135], v2 offset:57360
|
||||
; GCN-NEXT: ds_read_b128 a[136:139], v2 offset:57376
|
||||
; GCN-NEXT: ds_read_b128 a[140:143], v2 offset:57392
|
||||
; GCN-NEXT: v_ldexp_f32 v1, v3, v1
|
||||
; GCN-NEXT: ds_read_b128 a[20:23], v2 offset:57424
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v12, v1, a[32:63]
|
||||
; GCN-NEXT: v_sub_f32_e32 v1, v4, v9
|
||||
; GCN-NEXT: v_add_f32_e32 v1, v1, v3
|
||||
; GCN-NEXT: v_exp_f32_e32 v1, v1
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v3, v9
|
||||
; GCN-NEXT: ds_read_b128 a[16:19], v2 offset:57408
|
||||
; GCN-NEXT: ds_read_b128 a[0:3], v2 offset:57344
|
||||
; GCN-NEXT: ds_read_b128 a[4:7], v2 offset:57360
|
||||
; GCN-NEXT: ds_read_b128 a[8:11], v2 offset:57376
|
||||
; GCN-NEXT: ds_read_b128 a[12:15], v2 offset:57392
|
||||
; GCN-NEXT: v_ldexp_f32 v1, v1, v3
|
||||
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s8, v5
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
|
||||
; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, s8, v6
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||
; GCN-NEXT: v_add_u32_e32 v0, s7, v0
|
||||
; GCN-NEXT: ds_write_b128 v0, a[124:127] offset:112
|
||||
; GCN-NEXT: ds_write_b128 v0, a[156:159] offset:112
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(1)
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v9, v1, a[128:159]
|
||||
; GCN-NEXT: ds_write_b128 v0, a[120:123] offset:96
|
||||
; GCN-NEXT: ds_write_b128 v0, a[116:119] offset:80
|
||||
; GCN-NEXT: ds_write_b128 v0, a[112:115] offset:64
|
||||
; GCN-NEXT: ds_write_b128 v0, a[108:111] offset:48
|
||||
; GCN-NEXT: ds_write_b128 v0, a[104:107] offset:32
|
||||
; GCN-NEXT: ds_write_b128 v0, a[100:103] offset:16
|
||||
; GCN-NEXT: ds_write_b128 v0, a[96:99]
|
||||
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v12, v1, a[0:31]
|
||||
; GCN-NEXT: ds_write_b128 v0, a[152:155] offset:96
|
||||
; GCN-NEXT: ds_write_b128 v0, a[148:151] offset:80
|
||||
; GCN-NEXT: ds_write_b128 v0, a[144:147] offset:64
|
||||
; GCN-NEXT: ds_write_b128 v0, a[140:143] offset:48
|
||||
; GCN-NEXT: ds_write_b128 v0, a[136:139] offset:32
|
||||
; GCN-NEXT: ds_write_b128 v0, a[132:135] offset:16
|
||||
; GCN-NEXT: ds_write_b128 v0, a[128:131]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s7
|
||||
; GCN-NEXT: ; kill: killed $sgpr4_sgpr5
|
||||
; GCN-NEXT: ; sched_group_barrier mask(0x00000400) size(1) SyncID(0)
|
||||
@ -1335,14 +1335,14 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; GCN-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-NEXT: ; sched_group_barrier mask(0x00000400) size(1) SyncID(0)
|
||||
; GCN-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; GCN-NEXT: ds_write_b128 v0, a[24:27] offset:8288
|
||||
; GCN-NEXT: ds_write_b128 v0, a[28:31] offset:8304
|
||||
; GCN-NEXT: ds_write_b128 v0, a[16:19] offset:8256
|
||||
; GCN-NEXT: ds_write_b128 v0, a[20:23] offset:8272
|
||||
; GCN-NEXT: ds_write_b128 v0, a[8:11] offset:8224
|
||||
; GCN-NEXT: ds_write_b128 v0, a[12:15] offset:8240
|
||||
; GCN-NEXT: ds_write_b128 v0, a[0:3] offset:8192
|
||||
; GCN-NEXT: ds_write_b128 v0, a[4:7] offset:8208
|
||||
; GCN-NEXT: ds_write_b128 v0, a[120:123] offset:8288
|
||||
; GCN-NEXT: ds_write_b128 v0, a[124:127] offset:8304
|
||||
; GCN-NEXT: ds_write_b128 v0, a[112:115] offset:8256
|
||||
; GCN-NEXT: ds_write_b128 v0, a[116:119] offset:8272
|
||||
; GCN-NEXT: ds_write_b128 v0, a[104:107] offset:8224
|
||||
; GCN-NEXT: ds_write_b128 v0, a[108:111] offset:8240
|
||||
; GCN-NEXT: ds_write_b128 v0, a[96:99] offset:8192
|
||||
; GCN-NEXT: ds_write_b128 v0, a[100:103] offset:8208
|
||||
; GCN-NEXT: ds_write_b128 v0, a[88:91] offset:16480
|
||||
; GCN-NEXT: ds_write_b128 v0, a[92:95] offset:16496
|
||||
; GCN-NEXT: ds_write_b128 v0, a[80:83] offset:16448
|
||||
@ -1359,14 +1359,14 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; GCN-NEXT: ds_write_b128 v0, a[44:47] offset:24624
|
||||
; GCN-NEXT: ds_write_b128 v0, a[32:35] offset:24576
|
||||
; GCN-NEXT: ds_write_b128 v0, a[36:39] offset:24592
|
||||
; GCN-NEXT: ds_write_b128 v0, a[152:155] offset:32864
|
||||
; GCN-NEXT: ds_write_b128 v0, a[156:159] offset:32880
|
||||
; GCN-NEXT: ds_write_b128 v0, a[144:147] offset:32832
|
||||
; GCN-NEXT: ds_write_b128 v0, a[148:151] offset:32848
|
||||
; GCN-NEXT: ds_write_b128 v0, a[136:139] offset:32800
|
||||
; GCN-NEXT: ds_write_b128 v0, a[140:143] offset:32816
|
||||
; GCN-NEXT: ds_write_b128 v0, a[128:131] offset:32768
|
||||
; GCN-NEXT: ds_write_b128 v0, a[132:135] offset:32784
|
||||
; GCN-NEXT: ds_write_b128 v0, a[24:27] offset:32864
|
||||
; GCN-NEXT: ds_write_b128 v0, a[28:31] offset:32880
|
||||
; GCN-NEXT: ds_write_b128 v0, a[16:19] offset:32832
|
||||
; GCN-NEXT: ds_write_b128 v0, a[20:23] offset:32848
|
||||
; GCN-NEXT: ds_write_b128 v0, a[8:11] offset:32800
|
||||
; GCN-NEXT: ds_write_b128 v0, a[12:15] offset:32816
|
||||
; GCN-NEXT: ds_write_b128 v0, a[0:3] offset:32768
|
||||
; GCN-NEXT: ds_write_b128 v0, a[4:7] offset:32784
|
||||
; GCN-NEXT: s_endpgm
|
||||
;
|
||||
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_interleave_EXP_MFMA:
|
||||
@ -1387,57 +1387,57 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v5, v5
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; EXACTCUTOFF-NEXT: v_add_u32_e32 v1, s6, v0
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[124:127], v1 offset:112
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[120:123], v1 offset:96
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[116:119], v1 offset:80
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[112:115], v1 offset:64
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[96:99], v1
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[100:103], v1 offset:16
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[104:107], v1 offset:32
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[108:111], v1 offset:48
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v9, 1.0
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[156:159], v1 offset:112
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[152:155], v1 offset:96
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[148:151], v1 offset:80
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[144:147], v1 offset:64
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[128:131], v1
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[132:135], v1 offset:16
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[136:139], v1 offset:32
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[140:143], v1 offset:48
|
||||
; EXACTCUTOFF-NEXT: v_mul_f32_e32 v9, s1, v3
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v12, 1.0
|
||||
; EXACTCUTOFF-NEXT: v_ldexp_f32 v4, v4, v5
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
|
||||
; EXACTCUTOFF-NEXT: v_mul_f32_e32 v10, s1, v3
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v10, v9
|
||||
; EXACTCUTOFF-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v5
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v6, 0x42b17218
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v11, v10
|
||||
; EXACTCUTOFF-NEXT: v_sub_f32_e32 v11, v9, v10
|
||||
; EXACTCUTOFF-NEXT: v_fma_f32 v9, s1, v3, -v9
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
|
||||
; EXACTCUTOFF-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v6
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v8, 0x7f800000
|
||||
; EXACTCUTOFF-NEXT: v_sub_f32_e32 v12, v10, v11
|
||||
; EXACTCUTOFF-NEXT: v_fma_f32 v10, s1, v3, -v10
|
||||
; EXACTCUTOFF-NEXT: v_fmac_f32_e32 v9, s1, v7
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
|
||||
; EXACTCUTOFF-NEXT: v_fmac_f32_e32 v10, s1, v7
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[28:31], v1 offset:8304
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v9, v11, v9
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[124:127], v1 offset:8304
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(1)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[96:127], v9, v4, a[96:127]
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v4, v12, v10
|
||||
; EXACTCUTOFF-NEXT: v_exp_f32_e32 v4, v4
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v10, v11
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[24:27], v1 offset:8288
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[20:23], v1 offset:8272
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[16:19], v1 offset:8256
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[12:15], v1 offset:8240
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[8:11], v1 offset:8224
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[4:7], v1 offset:8208
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[0:3], v1 offset:8192
|
||||
; EXACTCUTOFF-NEXT: v_ldexp_f32 v4, v4, v10
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v12, v4, a[128:159]
|
||||
; EXACTCUTOFF-NEXT: v_exp_f32_e32 v4, v9
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v9, v10
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[120:123], v1 offset:8288
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[116:119], v1 offset:8272
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[112:115], v1 offset:8256
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[108:111], v1 offset:8240
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[104:107], v1 offset:8224
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[100:103], v1 offset:8208
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[96:99], v1 offset:8192
|
||||
; EXACTCUTOFF-NEXT: v_ldexp_f32 v4, v4, v9
|
||||
; EXACTCUTOFF-NEXT: v_mul_f32_e32 v9, s2, v3
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v10, v9
|
||||
; EXACTCUTOFF-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v5
|
||||
; EXACTCUTOFF-NEXT: v_sub_f32_e32 v11, v9, v10
|
||||
; EXACTCUTOFF-NEXT: v_fma_f32 v9, s2, v3, -v9
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
|
||||
; EXACTCUTOFF-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v6
|
||||
; EXACTCUTOFF-NEXT: v_fmac_f32_e32 v9, s2, v7
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
|
||||
; EXACTCUTOFF-NEXT: v_mul_f32_e32 v10, s2, v3
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v11, v10
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v9, v4, a[0:31]
|
||||
; EXACTCUTOFF-NEXT: v_fma_f32 v4, s2, v3, -v10
|
||||
; EXACTCUTOFF-NEXT: v_sub_f32_e32 v12, v10, v11
|
||||
; EXACTCUTOFF-NEXT: v_fmac_f32_e32 v4, s2, v7
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v4, v12, v4
|
||||
; EXACTCUTOFF-NEXT: v_exp_f32_e32 v4, v4
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v10, v11
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v9, v11, v9
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[92:95], v1 offset:24688
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(1)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[96:127], v12, v4, a[96:127]
|
||||
; EXACTCUTOFF-NEXT: v_exp_f32_e32 v4, v9
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v9, v10
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[88:91], v1 offset:24672
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[84:87], v1 offset:24656
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[80:83], v1 offset:24640
|
||||
@ -1454,60 +1454,60 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[40:43], v1 offset:49184
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[36:39], v1 offset:49168
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[32:35], v1 offset:49152
|
||||
; EXACTCUTOFF-NEXT: v_ldexp_f32 v1, v4, v10
|
||||
; EXACTCUTOFF-NEXT: v_ldexp_f32 v1, v4, v9
|
||||
; EXACTCUTOFF-NEXT: v_mul_f32_e32 v4, s3, v3
|
||||
; EXACTCUTOFF-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v9, v4
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
|
||||
; EXACTCUTOFF-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v6
|
||||
; EXACTCUTOFF-NEXT: v_mul_f32_e32 v4, s3, v3
|
||||
; EXACTCUTOFF-NEXT: v_sub_f32_e32 v10, v4, v9
|
||||
; EXACTCUTOFF-NEXT: v_fma_f32 v4, s3, v3, -v4
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v10, v4
|
||||
; EXACTCUTOFF-NEXT: v_fmac_f32_e32 v4, s3, v7
|
||||
; EXACTCUTOFF-NEXT: s_load_dword s8, s[4:5], 0x54
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v9, v1, a[64:95]
|
||||
; EXACTCUTOFF-NEXT: v_sub_f32_e32 v1, v4, v10
|
||||
; EXACTCUTOFF-NEXT: v_fma_f32 v4, s3, v3, -v4
|
||||
; EXACTCUTOFF-NEXT: v_fmac_f32_e32 v4, s3, v7
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v1, v1, v4
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v12, v1, a[64:95]
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v1, v10, v4
|
||||
; EXACTCUTOFF-NEXT: v_exp_f32_e32 v1, v1
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v4, v10
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v4, v9
|
||||
; EXACTCUTOFF-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[156:159], v2 offset:57456
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[152:155], v2 offset:57440
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[28:31], v2 offset:57456
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[24:27], v2 offset:57440
|
||||
; EXACTCUTOFF-NEXT: v_ldexp_f32 v1, v1, v4
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
|
||||
; EXACTCUTOFF-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v6
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||
; EXACTCUTOFF-NEXT: v_mul_f32_e32 v4, s8, v3
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v9, v4
|
||||
; EXACTCUTOFF-NEXT: v_fma_f32 v3, s8, v3, -v4
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v9, v1, a[32:63]
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v1, v4
|
||||
; EXACTCUTOFF-NEXT: v_sub_f32_e32 v10, v4, v1
|
||||
; EXACTCUTOFF-NEXT: v_fmac_f32_e32 v3, s8, v7
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v3, v10, v3
|
||||
; EXACTCUTOFF-NEXT: v_exp_f32_e32 v3, v3
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v1, v1
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[148:151], v2 offset:57424
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[144:147], v2 offset:57408
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[128:131], v2 offset:57344
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[132:135], v2 offset:57360
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[136:139], v2 offset:57376
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[140:143], v2 offset:57392
|
||||
; EXACTCUTOFF-NEXT: v_ldexp_f32 v1, v3, v1
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[20:23], v2 offset:57424
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v12, v1, a[32:63]
|
||||
; EXACTCUTOFF-NEXT: v_sub_f32_e32 v1, v4, v9
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v1, v1, v3
|
||||
; EXACTCUTOFF-NEXT: v_exp_f32_e32 v1, v1
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v3, v9
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[16:19], v2 offset:57408
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[0:3], v2 offset:57344
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[4:7], v2 offset:57360
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[8:11], v2 offset:57376
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[12:15], v2 offset:57392
|
||||
; EXACTCUTOFF-NEXT: v_ldexp_f32 v1, v1, v3
|
||||
; EXACTCUTOFF-NEXT: v_cmp_nlt_f32_e32 vcc, s8, v5
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
|
||||
; EXACTCUTOFF-NEXT: v_cmp_ngt_f32_e32 vcc, s8, v6
|
||||
; EXACTCUTOFF-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||
; EXACTCUTOFF-NEXT: v_add_u32_e32 v0, s7, v0
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[124:127] offset:112
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[156:159] offset:112
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(1)
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v9, v1, a[128:159]
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[120:123] offset:96
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[116:119] offset:80
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[112:115] offset:64
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[108:111] offset:48
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[104:107] offset:32
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[100:103] offset:16
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[96:99]
|
||||
; EXACTCUTOFF-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v12, v1, a[0:31]
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[152:155] offset:96
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[148:151] offset:80
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[144:147] offset:64
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[140:143] offset:48
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[136:139] offset:32
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[132:135] offset:16
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[128:131]
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v0, s7
|
||||
; EXACTCUTOFF-NEXT: ; kill: killed $sgpr4_sgpr5
|
||||
; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000400) size(1) SyncID(0)
|
||||
@ -1520,14 +1520,14 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000400) size(1) SyncID(0)
|
||||
; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000008) size(1) SyncID(0)
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[24:27] offset:8288
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[28:31] offset:8304
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[16:19] offset:8256
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[20:23] offset:8272
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[8:11] offset:8224
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[12:15] offset:8240
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[0:3] offset:8192
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[4:7] offset:8208
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[120:123] offset:8288
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[124:127] offset:8304
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[112:115] offset:8256
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[116:119] offset:8272
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[104:107] offset:8224
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[108:111] offset:8240
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[96:99] offset:8192
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[100:103] offset:8208
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[88:91] offset:16480
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[92:95] offset:16496
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[80:83] offset:16448
|
||||
@ -1544,14 +1544,14 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[44:47] offset:24624
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[32:35] offset:24576
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[36:39] offset:24592
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[152:155] offset:32864
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[156:159] offset:32880
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[144:147] offset:32832
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[148:151] offset:32848
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[136:139] offset:32800
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[140:143] offset:32816
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[128:131] offset:32768
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[132:135] offset:32784
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[24:27] offset:32864
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[28:31] offset:32880
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[16:19] offset:32832
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[20:23] offset:32848
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[8:11] offset:32800
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[12:15] offset:32816
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[0:3] offset:32768
|
||||
; EXACTCUTOFF-NEXT: ds_write_b128 v0, a[4:7] offset:32784
|
||||
; EXACTCUTOFF-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
||||
@ -20,19 +20,19 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) %
|
||||
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
|
||||
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
|
||||
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[6:7]
|
||||
; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[2:3]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[0:1]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[8:9]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[10:11]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[12:13]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[14:15]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v5, s16
|
||||
; SDAG-NEXT: v_mov_b32_e32 v4, 0
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[8:9]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[10:11]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[12:13]
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[14:15]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v1, s16
|
||||
; SDAG-NEXT: v_mov_b32_e32 v0, 0
|
||||
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[0:3], v[14:17], v[6:13], v5 cbsz:1 abid:2
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
|
||||
; SDAG-NEXT: s_nop 7
|
||||
; SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
|
||||
; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
|
||||
; SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: test_smfmac_f32_16x16x64_f16__vgpr:
|
||||
@ -518,19 +518,19 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_bf16__vgpr(ptr addrspace(1)
|
||||
; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
|
||||
; GCN-NEXT: s_load_dword s16, s[4:5], 0x64
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: global_load_dwordx4 v[0:3], v0, s[6:7]
|
||||
; GCN-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[16:17], s[2:3]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[14:15], s[0:1]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[6:7], s[8:9]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[8:9], s[10:11]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[10:11], s[12:13]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[12:13], s[14:15]
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s16
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[8:9]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[4:5], s[10:11]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[6:7], s[12:13]
|
||||
; GCN-NEXT: v_mov_b64_e32 v[8:9], s[14:15]
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[0:3], v[14:17], v[6:13], v5 cbsz:1 abid:2
|
||||
; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
|
||||
; GCN-NEXT: s_nop 7
|
||||
; GCN-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
|
||||
; GCN-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
|
||||
; GCN-NEXT: s_endpgm
|
||||
bb:
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -797,12 +797,12 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) %
|
||||
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
|
||||
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
|
||||
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v16, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v14, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v15, s11
|
||||
; SDAG-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v8, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v9, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v10, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v11, s11
|
||||
; SDAG-NEXT: v_mov_b32_e32 v0, s12
|
||||
; SDAG-NEXT: v_mov_b32_e32 v1, s13
|
||||
; SDAG-NEXT: v_mov_b32_e32 v2, s14
|
||||
@ -811,12 +811,12 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) %
|
||||
; SDAG-NEXT: v_mov_b32_e32 v5, s1
|
||||
; SDAG-NEXT: v_mov_b32_e32 v6, s2
|
||||
; SDAG-NEXT: v_mov_b32_e32 v7, s3
|
||||
; SDAG-NEXT: v_mov_b32_e32 v17, s16
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s16
|
||||
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-NEXT: s_nop 0
|
||||
; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
|
||||
; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
|
||||
; SDAG-NEXT: s_nop 7
|
||||
; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
|
||||
; SDAG-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
|
||||
; SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__vgpr:
|
||||
@ -1308,12 +1308,12 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace
|
||||
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
|
||||
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
|
||||
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v16, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v14, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v15, s11
|
||||
; SDAG-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v8, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v9, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v10, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v11, s11
|
||||
; SDAG-NEXT: v_mov_b32_e32 v0, s12
|
||||
; SDAG-NEXT: v_mov_b32_e32 v1, s13
|
||||
; SDAG-NEXT: v_mov_b32_e32 v2, s14
|
||||
@ -1322,12 +1322,12 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace
|
||||
; SDAG-NEXT: v_mov_b32_e32 v5, s1
|
||||
; SDAG-NEXT: v_mov_b32_e32 v6, s2
|
||||
; SDAG-NEXT: v_mov_b32_e32 v7, s3
|
||||
; SDAG-NEXT: v_mov_b32_e32 v17, s16
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s16
|
||||
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-NEXT: s_nop 0
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
|
||||
; SDAG-NEXT: s_nop 7
|
||||
; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
|
||||
; SDAG-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
|
||||
; SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_bf8__vgpr:
|
||||
@ -1470,12 +1470,12 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace
|
||||
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
|
||||
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
|
||||
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v16, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v14, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v15, s11
|
||||
; SDAG-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v8, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v9, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v10, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v11, s11
|
||||
; SDAG-NEXT: v_mov_b32_e32 v0, s12
|
||||
; SDAG-NEXT: v_mov_b32_e32 v1, s13
|
||||
; SDAG-NEXT: v_mov_b32_e32 v2, s14
|
||||
@ -1484,12 +1484,12 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace
|
||||
; SDAG-NEXT: v_mov_b32_e32 v5, s1
|
||||
; SDAG-NEXT: v_mov_b32_e32 v6, s2
|
||||
; SDAG-NEXT: v_mov_b32_e32 v7, s3
|
||||
; SDAG-NEXT: v_mov_b32_e32 v17, s16
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s16
|
||||
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-NEXT: s_nop 0
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
|
||||
; SDAG-NEXT: s_nop 7
|
||||
; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
|
||||
; SDAG-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
|
||||
; SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_fp8__vgpr:
|
||||
@ -1632,12 +1632,12 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace
|
||||
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
|
||||
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
|
||||
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v16, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v14, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v15, s11
|
||||
; SDAG-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v8, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v9, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v10, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v11, s11
|
||||
; SDAG-NEXT: v_mov_b32_e32 v0, s12
|
||||
; SDAG-NEXT: v_mov_b32_e32 v1, s13
|
||||
; SDAG-NEXT: v_mov_b32_e32 v2, s14
|
||||
@ -1646,12 +1646,12 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace
|
||||
; SDAG-NEXT: v_mov_b32_e32 v5, s1
|
||||
; SDAG-NEXT: v_mov_b32_e32 v6, s2
|
||||
; SDAG-NEXT: v_mov_b32_e32 v7, s3
|
||||
; SDAG-NEXT: v_mov_b32_e32 v17, s16
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s16
|
||||
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-NEXT: s_nop 0
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
|
||||
; SDAG-NEXT: s_nop 7
|
||||
; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
|
||||
; SDAG-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
|
||||
; SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_bf8__vgpr:
|
||||
@ -1794,12 +1794,12 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace
|
||||
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
|
||||
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
|
||||
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v16, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v14, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v15, s11
|
||||
; SDAG-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v12, 0
|
||||
; SDAG-NEXT: v_mov_b32_e32 v8, s8
|
||||
; SDAG-NEXT: v_mov_b32_e32 v9, s9
|
||||
; SDAG-NEXT: v_mov_b32_e32 v10, s10
|
||||
; SDAG-NEXT: v_mov_b32_e32 v11, s11
|
||||
; SDAG-NEXT: v_mov_b32_e32 v0, s12
|
||||
; SDAG-NEXT: v_mov_b32_e32 v1, s13
|
||||
; SDAG-NEXT: v_mov_b32_e32 v2, s14
|
||||
@ -1808,12 +1808,12 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace
|
||||
; SDAG-NEXT: v_mov_b32_e32 v5, s1
|
||||
; SDAG-NEXT: v_mov_b32_e32 v6, s2
|
||||
; SDAG-NEXT: v_mov_b32_e32 v7, s3
|
||||
; SDAG-NEXT: v_mov_b32_e32 v17, s16
|
||||
; SDAG-NEXT: v_mov_b32_e32 v13, s16
|
||||
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; SDAG-NEXT: s_nop 0
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
|
||||
; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
|
||||
; SDAG-NEXT: s_nop 7
|
||||
; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
|
||||
; SDAG-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
|
||||
; SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_fp8__vgpr:
|
||||
|
||||
@ -76,7 +76,7 @@ define amdgpu_kernel void @buffer_nontemporal_load_store(ptr addrspace(7) %in, p
|
||||
; GFX942-SDAG-NEXT: s_mov_b32 s3, s12
|
||||
; GFX942-SDAG-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13]
|
||||
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX942-SDAG-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen nt
|
||||
; GFX942-SDAG-NEXT: buffer_load_dword v1, v0, s[8:11], 0 offen nt
|
||||
; GFX942-SDAG-NEXT: s_load_dword s13, s[4:5], 0x30
|
||||
; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20
|
||||
; GFX942-SDAG-NEXT: s_mov_b32 s5, s12
|
||||
@ -87,9 +87,9 @@ define amdgpu_kernel void @buffer_nontemporal_load_store(ptr addrspace(7) %in, p
|
||||
; GFX942-SDAG-NEXT: s_mov_b32 s2, s1
|
||||
; GFX942-SDAG-NEXT: s_mov_b32 s3, s12
|
||||
; GFX942-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13]
|
||||
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen nt
|
||||
; GFX942-SDAG-NEXT: buffer_store_dword v1, v0, s[4:7], 0 offen nt
|
||||
; GFX942-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX942-GISEL-LABEL: buffer_nontemporal_load_store:
|
||||
@ -357,7 +357,7 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp
|
||||
; GFX942-SDAG-NEXT: s_mov_b32 s3, s12
|
||||
; GFX942-SDAG-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13]
|
||||
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX942-SDAG-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen sc0 sc1
|
||||
; GFX942-SDAG-NEXT: buffer_load_dword v1, v0, s[8:11], 0 offen sc0 sc1
|
||||
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-SDAG-NEXT: s_load_dword s13, s[4:5], 0x30
|
||||
; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20
|
||||
@ -369,8 +369,8 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp
|
||||
; GFX942-SDAG-NEXT: s_mov_b32 s2, s1
|
||||
; GFX942-SDAG-NEXT: s_mov_b32 s3, s12
|
||||
; GFX942-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13]
|
||||
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX942-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 sc1
|
||||
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX942-SDAG-NEXT: buffer_store_dword v1, v0, s[4:7], 0 offen sc0 sc1
|
||||
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
|
||||
@ -236,8 +236,7 @@ define <16 x i8> @uniform_masked_load_ptr1_mask_v16i8(ptr addrspace(1) inreg noc
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB8_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %cond.load
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: global_load_dwordx4 v[16:19], v0, s[0:1]
|
||||
; GFX942-NEXT: global_load_dwordx4 v[16:19], v16, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v15, 24, v19
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v14, 16, v19
|
||||
|
||||
@ -12,19 +12,19 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) %in, ptr addrspace(1)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3]
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1]
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v12, s4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v13, s5
|
||||
; GFX942-NEXT: v_mov_b32_e32 v8, s4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v9, s5
|
||||
; GFX942-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GFX942-NEXT: v_mov_b32_e32 v5, s7
|
||||
; GFX942-NEXT: v_mov_b32_e32 v6, s7
|
||||
; GFX942-NEXT: v_mov_b32_e32 v7, s7
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[8:11], v[12:13], v[4:7], v13
|
||||
; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[10:13], v[8:9], v[4:7], v9
|
||||
; GFX942-NEXT: s_nop 6
|
||||
; GFX942-NEXT: global_store_dword v0, v11, s[2:3] offset:12
|
||||
; GFX942-NEXT: global_store_dword v0, v13, s[2:3] offset:12
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 0
|
||||
|
||||
@ -8,34 +8,33 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[46:47], 0, 0
|
||||
; CHECK-NEXT: flat_load_dword v42, v[46:47]
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[44:45], 0, 0
|
||||
; CHECK-NEXT: flat_load_dword v42, v[44:45]
|
||||
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
|
||||
; CHECK-NEXT: s_load_dwordx4 s[64:67], s[34:35], 0x8
|
||||
; CHECK-NEXT: s_load_dword s68, s[34:35], 0x0
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s17
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], src_private_base
|
||||
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
|
||||
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s68, -1
|
||||
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: s_cselect_b32 s5, s9, 0
|
||||
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
|
||||
; CHECK-NEXT: s_cselect_b32 s6, s68, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v57, s5
|
||||
; CHECK-NEXT: s_mov_b32 s5, s4
|
||||
; CHECK-NEXT: s_add_u32 s50, s34, 48
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a33, s5
|
||||
; CHECK-NEXT: v_mov_b32_e32 v47, s5
|
||||
; CHECK-NEXT: s_mov_b32 s5, s4
|
||||
; CHECK-NEXT: s_addc_u32 s51, s35, 0
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a32, s4
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[62:63], s[4:5], s[4:5] op_sel:[0,1]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, G@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, G@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
|
||||
; CHECK-NEXT: s_mov_b32 s53, s14
|
||||
; CHECK-NEXT: v_mov_b32_e32 v56, s6
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[58:59], s[64:65], s[64:65] op_sel:[0,1]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v46, s6
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[56:57], s[64:65], s[64:65] op_sel:[0,1]
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[50:51]
|
||||
@ -48,15 +47,15 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
|
||||
; CHECK-NEXT: s_mov_b32 s52, s15
|
||||
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v40, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v62, s66
|
||||
; CHECK-NEXT: v_mov_b32_e32 v63, s67
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[58:59], a[32:33]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v60, s66
|
||||
; CHECK-NEXT: v_mov_b32_e32 v61, s67
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[62:63]
|
||||
; CHECK-NEXT: ; kill: def $sgpr15 killed $sgpr15
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
|
||||
; CHECK-NEXT: flat_load_dwordx2 v[60:61], v[58:59]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v44, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v45, 0x3ff00000
|
||||
; CHECK-NEXT: flat_load_dwordx2 a[32:33], v[56:57]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v58, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v59, 0x3ff00000
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[50:51]
|
||||
@ -65,29 +64,31 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
|
||||
; CHECK-NEXT: s_mov_b32 s13, s52
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[46:47], v[44:45]
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[58:59], a[32:33]
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[44:45], v[58:59]
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[62:63]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: ; kill: def $sgpr15 killed $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
|
||||
; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[56:57] glc
|
||||
; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[46:47] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s67
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s68
|
||||
; CHECK-NEXT: v_cmp_lt_i32_e32 vcc, 0, v42
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[60:61]
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[56:57], a[32:33]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[62:63]
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[60:61]
|
||||
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
|
||||
; CHECK-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen
|
||||
; CHECK-NEXT: buffer_store_dword v58, v0, s[0:3], 0 offen
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_4
|
||||
; CHECK-NEXT: ; %bb.1: ; %LeafBlock5
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v42
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; CHECK-NEXT: ; %bb.2: ; %sw.bb17.i.i.i.i
|
||||
; CHECK-NEXT: v_mov_b32_e32 v44, 1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 1
|
||||
; CHECK-NEXT: ; %bb.3: ; %Flow
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: .LBB0_4: ; %Flow8
|
||||
@ -105,10 +106,10 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: ; %bb.7: ; %Flow7
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v44, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: .LBB0_8: ; %bb.1
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v44
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB0_10
|
||||
; CHECK-NEXT: ; %bb.9: ; %sw.bb.i.i.i.i.i
|
||||
|
||||
@ -6,34 +6,35 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
||||
; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v4
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 8
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dword v3, v1, s[0:1]
|
||||
; GFX942-NEXT: s_mov_b32 s4, 0xff0000
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_or_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
||||
; GFX942-NEXT: v_and_or_b32 v3, v3, s4, v5
|
||||
; GFX942-NEXT: v_or_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v3, v3, s4, v4
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dword v1, v1, s[2:3]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX942-NEXT: global_load_dword v0, v0, s[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v3, v1, s4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v3, v0, s4, v2
|
||||
; GFX942-NEXT: .LBB0_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: global_store_byte_d16_hi v0, v3, s[6:7] offset:2
|
||||
; GFX942-NEXT: global_store_short v0, v3, s[6:7]
|
||||
; GFX942-NEXT: global_store_byte_d16_hi v1, v3, s[6:7] offset:2
|
||||
; GFX942-NEXT: global_store_short v1, v3, s[6:7]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -57,20 +58,21 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
||||
; GFX942-NEXT: v_and_b32_e32 v3, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v3
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dword v2, v1, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v3
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dword v2, v1, s[2:3]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX942-NEXT: global_load_dword v2, v0, s[2:3]
|
||||
; GFX942-NEXT: .LBB1_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_store_dword v0, v2, s[6:7]
|
||||
; GFX942-NEXT: global_store_dword v1, v2, s[6:7]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -94,24 +96,25 @@ define amdgpu_kernel void @v5i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
||||
; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v2
|
||||
; GFX942-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB2_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[2:3]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v2
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||
; GFX942-NEXT: .LBB2_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: global_store_byte v2, v1, s[6:7] offset:4
|
||||
; GFX942-NEXT: global_store_dword v2, v0, s[6:7]
|
||||
; GFX942-NEXT: global_store_byte v3, v1, s[6:7] offset:4
|
||||
; GFX942-NEXT: global_store_dword v3, v0, s[6:7]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -135,20 +138,21 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
||||
; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[2:3]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[2:3]
|
||||
; GFX942-NEXT: .LBB3_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[6:7]
|
||||
; GFX942-NEXT: global_store_dwordx2 v1, v[2:3], s[6:7]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -172,20 +176,21 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
||||
; GFX942-NEXT: v_and_b32_e32 v6, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 4, v6
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 4, v0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v6
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB4_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[2:3]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; GFX942-NEXT: global_load_dwordx4 v[2:5], v0, s[2:3]
|
||||
; GFX942-NEXT: .LBB4_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[6:7]
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[2:5], s[6:7]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -209,24 +214,25 @@ define amdgpu_kernel void @v32i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
||||
; GFX942-NEXT: v_and_b32_e32 v10, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 5, v10
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 5, v0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx4 v[6:9], v1, s[0:1] offset:16
|
||||
; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v10
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB5_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx4 v[6:9], v1, s[2:3] offset:16
|
||||
; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[2:3]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 5, v0
|
||||
; GFX942-NEXT: global_load_dwordx4 v[6:9], v0, s[2:3] offset:16
|
||||
; GFX942-NEXT: global_load_dwordx4 v[2:5], v0, s[2:3]
|
||||
; GFX942-NEXT: .LBB5_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(1)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[6:9], s[6:7] offset:16
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[6:9], s[6:7] offset:16
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(1)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[6:7]
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[2:5], s[6:7]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -250,9 +256,9 @@ define amdgpu_kernel void @v256i8_liveout(ptr addrspace(1) %src1, ptr addrspace(
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
||||
; GFX942-NEXT: v_and_b32_e32 v62, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v62
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx4 v[30:33], v1, s[0:1] offset:240
|
||||
; GFX942-NEXT: global_load_dwordx4 v[26:29], v1, s[0:1] offset:224
|
||||
@ -270,52 +276,53 @@ define amdgpu_kernel void @v256i8_liveout(ptr addrspace(1) %src1, ptr addrspace(
|
||||
; GFX942-NEXT: global_load_dwordx4 v[42:45], v1, s[0:1] offset:32
|
||||
; GFX942-NEXT: global_load_dwordx4 v[38:41], v1, s[0:1] offset:16
|
||||
; GFX942-NEXT: global_load_dwordx4 v[34:37], v1, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v62
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB6_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx4 v[30:33], v1, s[2:3] offset:240
|
||||
; GFX942-NEXT: global_load_dwordx4 v[26:29], v1, s[2:3] offset:224
|
||||
; GFX942-NEXT: global_load_dwordx4 v[22:25], v1, s[2:3] offset:208
|
||||
; GFX942-NEXT: global_load_dwordx4 v[18:21], v1, s[2:3] offset:192
|
||||
; GFX942-NEXT: global_load_dwordx4 v[14:17], v1, s[2:3] offset:176
|
||||
; GFX942-NEXT: global_load_dwordx4 v[10:13], v1, s[2:3] offset:160
|
||||
; GFX942-NEXT: global_load_dwordx4 v[6:9], v1, s[2:3] offset:144
|
||||
; GFX942-NEXT: global_load_dwordx4 v[2:5], v1, s[2:3] offset:128
|
||||
; GFX942-NEXT: global_load_dwordx4 a[0:3], v1, s[2:3] offset:112
|
||||
; GFX942-NEXT: global_load_dwordx4 v[58:61], v1, s[2:3] offset:96
|
||||
; GFX942-NEXT: global_load_dwordx4 v[54:57], v1, s[2:3] offset:80
|
||||
; GFX942-NEXT: global_load_dwordx4 v[50:53], v1, s[2:3] offset:64
|
||||
; GFX942-NEXT: global_load_dwordx4 v[46:49], v1, s[2:3] offset:48
|
||||
; GFX942-NEXT: global_load_dwordx4 v[42:45], v1, s[2:3] offset:32
|
||||
; GFX942-NEXT: global_load_dwordx4 v[38:41], v1, s[2:3] offset:16
|
||||
; GFX942-NEXT: global_load_dwordx4 v[34:37], v1, s[2:3]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
||||
; GFX942-NEXT: global_load_dwordx4 v[30:33], v0, s[2:3] offset:240
|
||||
; GFX942-NEXT: global_load_dwordx4 v[26:29], v0, s[2:3] offset:224
|
||||
; GFX942-NEXT: global_load_dwordx4 v[22:25], v0, s[2:3] offset:208
|
||||
; GFX942-NEXT: global_load_dwordx4 v[18:21], v0, s[2:3] offset:192
|
||||
; GFX942-NEXT: global_load_dwordx4 v[14:17], v0, s[2:3] offset:176
|
||||
; GFX942-NEXT: global_load_dwordx4 v[10:13], v0, s[2:3] offset:160
|
||||
; GFX942-NEXT: global_load_dwordx4 v[6:9], v0, s[2:3] offset:144
|
||||
; GFX942-NEXT: global_load_dwordx4 v[2:5], v0, s[2:3] offset:128
|
||||
; GFX942-NEXT: global_load_dwordx4 a[0:3], v0, s[2:3] offset:112
|
||||
; GFX942-NEXT: global_load_dwordx4 v[58:61], v0, s[2:3] offset:96
|
||||
; GFX942-NEXT: global_load_dwordx4 v[54:57], v0, s[2:3] offset:80
|
||||
; GFX942-NEXT: global_load_dwordx4 v[50:53], v0, s[2:3] offset:64
|
||||
; GFX942-NEXT: global_load_dwordx4 v[46:49], v0, s[2:3] offset:48
|
||||
; GFX942-NEXT: global_load_dwordx4 v[42:45], v0, s[2:3] offset:32
|
||||
; GFX942-NEXT: global_load_dwordx4 v[38:41], v0, s[2:3] offset:16
|
||||
; GFX942-NEXT: global_load_dwordx4 v[34:37], v0, s[2:3]
|
||||
; GFX942-NEXT: .LBB6_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(7)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] offset:112
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[6:7] offset:112
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(7)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[58:61], s[6:7] offset:96
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[58:61], s[6:7] offset:96
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(7)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[54:57], s[6:7] offset:80
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[54:57], s[6:7] offset:80
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(7)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[50:53], s[6:7] offset:64
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[50:53], s[6:7] offset:64
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(7)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[46:49], s[6:7] offset:48
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[46:49], s[6:7] offset:48
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(7)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[42:45], s[6:7] offset:32
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[42:45], s[6:7] offset:32
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(7)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[38:41], s[6:7] offset:16
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[38:41], s[6:7] offset:16
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(7)
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[34:37], s[6:7]
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[30:33], s[6:7] offset:240
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[26:29], s[6:7] offset:224
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[22:25], s[6:7] offset:208
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[18:21], s[6:7] offset:192
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[14:17], s[6:7] offset:176
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7] offset:160
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[6:9], s[6:7] offset:144
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[6:7] offset:128
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[34:37], s[6:7]
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[30:33], s[6:7] offset:240
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[26:29], s[6:7] offset:224
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[22:25], s[6:7] offset:208
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[18:21], s[6:7] offset:192
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[14:17], s[6:7] offset:176
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[10:13], s[6:7] offset:160
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[6:9], s[6:7] offset:144
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, v[2:5], s[6:7] offset:128
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -400,6 +407,7 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB8_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[10:11]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
|
||||
; GFX942-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
@ -457,6 +465,7 @@ define amdgpu_kernel void @v8i8_phi_zeroinit(ptr addrspace(1) %src1, ptr addrspa
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB9_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
||||
; GFX942-NEXT: global_load_dwordx2 v[4:5], v1, s[10:11]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(1)
|
||||
@ -507,85 +516,86 @@ define amdgpu_kernel void @v8i8_phi_const(ptr addrspace(1) %src1, ptr addrspace(
|
||||
; GFX942-LABEL: v8i8_phi_const:
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
|
||||
; GFX942-NEXT: v_and_b32_e32 v16, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v16
|
||||
; GFX942-NEXT: v_cmp_lt_u32_e64 s[0:1], 14, v16
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v16
|
||||
; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v4
|
||||
; GFX942-NEXT: v_cmp_lt_u32_e64 s[0:1], 14, v4
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[8:9]
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[8:9]
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr2
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr12
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr10
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr13
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr14
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr11
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr14
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr15
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr12
|
||||
; GFX942-NEXT: ; implicit-def: $vgpr16
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v4, 24, v1
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v1
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v6, 8, v1
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v7, 24, v0
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v0
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v9, 8, v0
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v5, 24, v1
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v1
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v7, 8, v1
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v8, 24, v0
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v0
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v10, 8, v0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB10_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v3, s[10:11]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v16
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v4
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[10:11]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v4
|
||||
; GFX942-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX942-NEXT: s_and_b64 s[4:5], vcc, exec
|
||||
; GFX942-NEXT: v_mov_b32_e32 v4, 8
|
||||
; GFX942-NEXT: v_mov_b32_e32 v5, 7
|
||||
; GFX942-NEXT: v_mov_b32_e32 v6, 6
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 5
|
||||
; GFX942-NEXT: v_mov_b32_e32 v7, 4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v8, 3
|
||||
; GFX942-NEXT: v_mov_b32_e32 v9, 2
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 1
|
||||
; GFX942-NEXT: v_mov_b32_e32 v10, 2
|
||||
; GFX942-NEXT: v_mov_b32_e32 v9, 3
|
||||
; GFX942-NEXT: v_mov_b32_e32 v8, 4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 5
|
||||
; GFX942-NEXT: v_mov_b32_e32 v7, 6
|
||||
; GFX942-NEXT: v_mov_b32_e32 v6, 7
|
||||
; GFX942-NEXT: v_mov_b32_e32 v5, 8
|
||||
; GFX942-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v15, 24, v3
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v11, 16, v3
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v14, 8, v3
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v13, 24, v2
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v10, 16, v2
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v12, 8, v2
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v16, 24, v3
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v12, 16, v3
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v15, 8, v3
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v14, 24, v2
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v11, 16, v2
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v13, 8, v2
|
||||
; GFX942-NEXT: .LBB10_2: ; %Flow
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB10_4
|
||||
; GFX942-NEXT: ; %bb.3: ; %bb.2
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v2, 8, v9
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v3, 8, v7
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v2, 8, v10
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v3, 8, v8
|
||||
; GFX942-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_or_b32_sdwa v3, v8, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v11, 8, v4
|
||||
; GFX942-NEXT: v_or_b32_sdwa v3, v9, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v11, 8, v5
|
||||
; GFX942-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v3, 8, v6
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v3, 8, v7
|
||||
; GFX942-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_or_b32_sdwa v11, v5, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_mov_b32_e32 v10, 0
|
||||
; GFX942-NEXT: v_or_b32_sdwa v11, v6, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX942-NEXT: v_or_b32_sdwa v3, v3, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[12:13]
|
||||
; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[12:13]
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v12, v9
|
||||
; GFX942-NEXT: v_mov_b32_e32 v10, v8
|
||||
; GFX942-NEXT: v_mov_b32_e32 v13, v7
|
||||
; GFX942-NEXT: v_mov_b32_e32 v13, v10
|
||||
; GFX942-NEXT: v_mov_b32_e32 v11, v9
|
||||
; GFX942-NEXT: v_mov_b32_e32 v14, v8
|
||||
; GFX942-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX942-NEXT: v_mov_b32_e32 v14, v6
|
||||
; GFX942-NEXT: v_mov_b32_e32 v11, v5
|
||||
; GFX942-NEXT: v_mov_b32_e32 v15, v4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v15, v7
|
||||
; GFX942-NEXT: v_mov_b32_e32 v12, v6
|
||||
; GFX942-NEXT: v_mov_b32_e32 v16, v5
|
||||
; GFX942-NEXT: .LBB10_4: ; %bb.3
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v0, 8, v12
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v1, 8, v13
|
||||
; GFX942-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_or_b32_sdwa v1, v10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v2, 8, v15
|
||||
; GFX942-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v0, 8, v13
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v1, 8, v14
|
||||
; GFX942-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_or_b32_sdwa v1, v11, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v2, 8, v16
|
||||
; GFX942-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_lshlrev_b16_e32 v1, 8, v15
|
||||
; GFX942-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_or_b32_sdwa v2, v11, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_or_b32_sdwa v2, v12, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX942-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[14:15]
|
||||
@ -617,30 +627,31 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
|
||||
; GFX942-LABEL: v8i8_multi_block:
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
|
||||
; GFX942-NEXT: v_and_b32_e32 v3, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v3
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v3
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v4, s[8:9]
|
||||
; GFX942-NEXT: global_load_dwordx2 v[4:5], v1, s[8:9]
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB11_4
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx2 v[6:7], v4, s[10:11]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v3
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v2, s[10:11]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB11_3
|
||||
; GFX942-NEXT: ; %bb.2: ; %bb.2
|
||||
; GFX942-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[12:13]
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: global_store_dwordx2 v0, v[4:5], s[12:13]
|
||||
; GFX942-NEXT: .LBB11_3: ; %Flow
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX942-NEXT: .LBB11_4: ; %bb.3
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_store_dwordx2 v2, v[6:7], s[14:15]
|
||||
; GFX942-NEXT: global_store_dwordx2 v1, v[2:3], s[14:15]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -858,16 +869,17 @@ define amdgpu_kernel void @v8i8_mfma_i8(ptr addrspace(1) %src1, ptr addrspace(1)
|
||||
; GFX942-LABEL: v8i8_mfma_i8:
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
|
||||
; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[8:9]
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB14_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[10:11]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[10:11]
|
||||
; GFX942-NEXT: .LBB14_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[14:15], 0x0
|
||||
@ -880,7 +892,7 @@ define amdgpu_kernel void @v8i8_mfma_i8(ptr addrspace(1) %src1, ptr addrspace(1)
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
|
||||
; GFX942-NEXT: s_nop 6
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[0:3], s[12:13]
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[12:13]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -908,16 +920,17 @@ define amdgpu_kernel void @v8i8_mfma_half(ptr addrspace(1) %src1, ptr addrspace(
|
||||
; GFX942-LABEL: v8i8_mfma_half:
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx8 s[36:43], s[4:5], 0x24
|
||||
; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[36:37]
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB15_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v1, s[38:39]
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
||||
; GFX942-NEXT: global_load_dwordx2 v[2:3], v0, s[38:39]
|
||||
; GFX942-NEXT: .LBB15_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_load_dwordx16 s[16:31], s[42:43], 0x0
|
||||
@ -960,14 +973,14 @@ define amdgpu_kernel void @v8i8_mfma_half(ptr addrspace(1) %src1, ptr addrspace(
|
||||
; GFX942-NEXT: v_mfma_f32_32x32x4_2b_f16 a[0:31], v[2:3], v[2:3], a[0:31] cbsz:1 abid:2 blgp:3
|
||||
; GFX942-NEXT: s_nop 15
|
||||
; GFX942-NEXT: s_nop 2
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[28:31], s[40:41] offset:112
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[24:27], s[40:41] offset:96
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[20:23], s[40:41] offset:80
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[16:19], s[40:41] offset:64
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[12:15], s[40:41] offset:48
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[8:11], s[40:41] offset:32
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[4:7], s[40:41] offset:16
|
||||
; GFX942-NEXT: global_store_dwordx4 v0, a[0:3], s[40:41]
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[28:31], s[40:41] offset:112
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[24:27], s[40:41] offset:96
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[20:23], s[40:41] offset:80
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[16:19], s[40:41] offset:64
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[12:15], s[40:41] offset:48
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[8:11], s[40:41] offset:32
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[4:7], s[40:41] offset:16
|
||||
; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[40:41]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -995,21 +1008,23 @@ define amdgpu_kernel void @v8i8_intrinsic(ptr addrspace(1) %src1, ptr addrspace(
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
||||
; GFX942-NEXT: v_and_b32_e32 v4, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v4
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: v_and_b32_e32 v2, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v2
|
||||
; GFX942-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v4
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1]
|
||||
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 15, v2
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB16_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %bb.1
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v3, s[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v2
|
||||
; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3]
|
||||
; GFX942-NEXT: .LBB16_2: ; %bb.2
|
||||
; GFX942-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[0:1], v[0:1]
|
||||
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
|
||||
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[6:7]
|
||||
; GFX942-NEXT: s_endpgm
|
||||
entry:
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user