
[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). For SWDEV-512631
123 lines
8.0 KiB
LLVM
123 lines
8.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
|
|
|
|
|
|
define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
|
|
; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn
|
|
; GFX908: bb.0 (%ir-block.0):
|
|
; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GFX908-NEXT: {{ $}}
|
|
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
|
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
|
|
; GFX908-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn
|
|
; GFX90A_GFX942: bb.0 (%ir-block.0):
|
|
; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GFX90A_GFX942-NEXT: {{ $}}
|
|
; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
|
|
; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
|
|
; GFX90A_GFX942-NEXT: S_ENDPGM 0
|
|
%ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
|
|
; GFX908-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn
|
|
; GFX908: bb.0 (%ir-block.0):
|
|
; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0
|
|
; GFX908-NEXT: {{ $}}
|
|
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
|
|
; GFX908-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn
|
|
; GFX90A_GFX942: bb.0 (%ir-block.0):
|
|
; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0
|
|
; GFX90A_GFX942-NEXT: {{ $}}
|
|
; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
|
|
; GFX90A_GFX942-NEXT: S_ENDPGM 0
|
|
%ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat(ptr addrspace(1) %ptr, <2 x half> %data) {
|
|
; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat
|
|
; GFX908: bb.0 (%ir-block.0):
|
|
; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GFX908-NEXT: {{ $}}
|
|
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
|
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
|
|
; GFX908-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat
|
|
; GFX90A_GFX942: bb.0 (%ir-block.0):
|
|
; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GFX90A_GFX942-NEXT: {{ $}}
|
|
; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
|
|
; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
|
|
; GFX90A_GFX942-NEXT: S_ENDPGM 0
|
|
%ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
|
|
; GFX908-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat
|
|
; GFX908: bb.0 (%ir-block.0):
|
|
; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0
|
|
; GFX908-NEXT: {{ $}}
|
|
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
|
|
; GFX908-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat
|
|
; GFX90A_GFX942: bb.0 (%ir-block.0):
|
|
; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0
|
|
; GFX90A_GFX942-NEXT: {{ $}}
|
|
; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
|
|
; GFX90A_GFX942-NEXT: S_ENDPGM 0
|
|
%ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
|
ret void
|
|
}
|
|
|
|
!0 = !{}
|