The main improvement is to the mfma tests. There are some mild regressions scattered around, and a few major ones. The worst regressions are in some of the bitcast tests; these are cases where the SGPR argument list runs out and uses VGPRs, and the copies-from-VGPR are misidentified as divergent. Most of the shufflevector tests are also regressions. These end up with cleaner MIR, but then get poor regalloc decisions.
155 lines
5.9 KiB
LLVM
155 lines
5.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
|
|
|
|
define amdgpu_ps void @atomic_swap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
|
|
; GFX90A-LABEL: atomic_swap_1d_agpr:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a0
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm glc
|
|
; GFX90A-NEXT: s_endpgm
|
|
%data = call i32 asm "; def $0", "=a"()
|
|
%v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
call void asm "; use $0", "a"(i32 %v)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_add_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
|
|
; GFX90A-LABEL: atomic_add_2d_agpr:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a0
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm glc
|
|
; GFX90A-NEXT: s_endpgm
|
|
%data = call i32 asm "; def $0", "=a"()
|
|
%v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
|
call void asm "; use $0", "a"(i32 %v)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: This should directly use the AGPRs
|
|
define amdgpu_ps void @atomic_cmpswap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
|
|
; GFX90A-LABEL: atomic_cmpswap_1d_agpr:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a0
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a1
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_cmpswap a[0:1], v0, s[0:7] dmask:0x3 unorm glc
|
|
; GFX90A-NEXT: s_endpgm
|
|
%cmp = call i32 asm "; def $0", "=a"()
|
|
%swap = call i32 asm "; def $0", "=a"()
|
|
%v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
call void asm "; use $0", "a"(i32 %v)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_swap_1d_i64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
|
|
; GFX90A-LABEL: atomic_swap_1d_i64_agpr:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[0:1]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm glc
|
|
; GFX90A-NEXT: s_endpgm
|
|
%data = call i64 asm "; def $0", "=a"()
|
|
%v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
call void asm "; use $0", "a"(i64 %v)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_cmpswap_1d_64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
|
|
; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[0:1]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[2:3]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_cmpswap a[0:3], v0, s[0:7] dmask:0xf unorm glc
|
|
; GFX90A-NEXT: s_endpgm
|
|
%cmp = call i64 asm "; def $0", "=a"()
|
|
%swap = call i64 asm "; def $0", "=a"()
|
|
%v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
call void asm "; use $0", "a"(i64 %v)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
|
|
; GFX90A-LABEL: atomic_swap_1d_agpr_noret:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a0
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm
|
|
; GFX90A-NEXT: s_endpgm
|
|
%data = call i32 asm "; def $0", "=a"()
|
|
%unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
|
|
; GFX90A-LABEL: atomic_add_2d_agpr_noret:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a0
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm
|
|
; GFX90A-NEXT: s_endpgm
|
|
%data = call i32 asm "; def $0", "=a"()
|
|
%unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
|
|
; GFX90A-LABEL: atomic_cmpswap_1d_agpr_noret:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a0
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a1
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_cmpswap a[0:1], v0, s[0:7] dmask:0x3 unorm
|
|
; GFX90A-NEXT: s_endpgm
|
|
%cmp = call i32 asm "; def $0", "=a"()
|
|
%swap = call i32 asm "; def $0", "=a"()
|
|
%unused = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
|
|
; GFX90A-LABEL: atomic_swap_1d_i64_agpr_noret:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[0:1]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm
|
|
; GFX90A-NEXT: s_endpgm
|
|
%data = call i64 asm "; def $0", "=a"()
|
|
%unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
|
|
; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr_noret:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[0:1]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[2:3]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: image_atomic_cmpswap a[0:3], v0, s[0:7] dmask:0xf unorm
|
|
; GFX90A-NEXT: s_endpgm
|
|
%cmp = call i64 asm "; def $0", "=a"()
|
|
%swap = call i64 asm "; def $0", "=a"()
|
|
%unused = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
ret void
|
|
}
|