AMDGPU/GlobalISel: Use B32 for readfirstlane (#187809)

Using B32 would also add missing pointer support to readfirstlane
intrinsic rule.
This commit is contained in:
vangthao95 2026-03-23 09:04:49 -07:00 committed by GitHub
parent 44df4116c8
commit 0e0dc535d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 126 additions and 68 deletions

View File

@ -1534,7 +1534,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
addRulesForIOpcs({amdgcn_readfirstlane})
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
.Any({{UniB32, _, DivB32}, {{}, {SgprB32, None, VgprB32}}})
// this should not exist in the first place, it is from call lowering
// readfirstlaning just in case register is not in sgpr.
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});

View File

@ -1613,10 +1613,10 @@ define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
; CHECK-GISEL-LABEL: test_readfirstlane_v8i16:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:7]
; CHECK-GISEL-NEXT: ;;#ASMEND
@ -1646,14 +1646,14 @@ define void @test_readfirstlane_v16i16(ptr addrspace(1) %out, <16 x i16> %src) {
; CHECK-GISEL-LABEL: test_readfirstlane_v16i16:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:11]
; CHECK-GISEL-NEXT: ;;#ASMEND
@ -1691,22 +1691,22 @@ define void @test_readfirstlane_v32i16(ptr addrspace(1) %out, <32 x i16> %src) {
; CHECK-GISEL-LABEL: test_readfirstlane_v32i16:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:19]
; CHECK-GISEL-NEXT: ;;#ASMEND
@ -1745,22 +1745,22 @@ define void @test_readfirstlane_v32f16(ptr addrspace(1) %out, <32 x half> %src)
; CHECK-GISEL-LABEL: test_readfirstlane_v32f16:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:19]
; CHECK-GISEL-NEXT: ;;#ASMEND

View File

@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1030 -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1030 -global-isel -new-reg-bank-select -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX11 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -global-isel -new-reg-bank-select -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX11 %s
; Test codegen with readfirstlane used by M0.
;
; M0 can only be written to by SALU instructions so we can't emit

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,CHECK-SDAG -enable-var-scope %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,CHECK-GISEL -enable-var-scope %s
define void @test_readfirstlane_p0(ptr addrspace(1) %out, ptr %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_p0:
@ -11,6 +12,16 @@ define void @test_readfirstlane_p0(ptr addrspace(1) %out, ptr %src) {
; CHECK-SDAG-NEXT: ; use s[4:5]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_p0:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:5]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call ptr @llvm.amdgcn.readfirstlane.p0(ptr %src)
call void asm sideeffect "; use $0", "s"(ptr %x)
ret void
@ -30,20 +41,34 @@ define void @test_readfirstlane_v3p0(ptr addrspace(1) %out, <3 x ptr> %src) {
; CHECK-SDAG-NEXT: ; use s[4:9]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_v3p0:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:9]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call <3 x ptr> @llvm.amdgcn.readfirstlane.v3p0(<3 x ptr> %src)
call void asm sideeffect "; use $0", "s"(<3 x ptr> %x)
ret void
}
define void @test_readfirstlane_p3(ptr addrspace(1) %out, ptr addrspace(3) %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_p3:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: test_readfirstlane_p3:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s4
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%x = call ptr addrspace(3) @llvm.amdgcn.readfirstlane.p3(ptr addrspace(3) %src)
call void asm sideeffect "; use $0", "s"(ptr addrspace(3) %x)
ret void
@ -60,20 +85,31 @@ define void @test_readfirstlane_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3
; CHECK-SDAG-NEXT: ; use s[4:6]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_v3p3:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:6]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call <3 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v3p3(<3 x ptr addrspace(3)> %src)
call void asm sideeffect "; use $0", "s"(<3 x ptr addrspace(3)> %x)
ret void
}
define void @test_readfirstlane_p5(ptr addrspace(1) %out, ptr addrspace(5) %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_p5:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: test_readfirstlane_p5:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s4
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%x = call ptr addrspace(5) @llvm.amdgcn.readfirstlane.p5(ptr addrspace(5) %src)
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %x)
ret void
@ -90,20 +126,31 @@ define void @test_readfirstlane_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5
; CHECK-SDAG-NEXT: ; use s[4:6]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_v3p5:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:6]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call <3 x ptr addrspace(5)> @llvm.amdgcn.readfirstlane.v3p5(<3 x ptr addrspace(5)> %src)
call void asm sideeffect "; use $0", "s"(<3 x ptr addrspace(5)> %x)
ret void
}
define void @test_readfirstlane_p6(ptr addrspace(1) %out, ptr addrspace(6) %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_p6:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: test_readfirstlane_p6:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s4
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%x = call ptr addrspace(6) @llvm.amdgcn.readfirstlane.p6(ptr addrspace(6) %src)
call void asm sideeffect "; use $0", "s"(ptr addrspace(6) %x)
ret void
@ -120,6 +167,17 @@ define void @test_readfirstlane_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6
; CHECK-SDAG-NEXT: ; use s[4:6]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_v3p6:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:6]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call <3 x ptr addrspace(6)> @llvm.amdgcn.readfirstlane.v3p6(<3 x ptr addrspace(6)> %src)
call void asm sideeffect "; use $0", "s"(<3 x ptr addrspace(6)> %x)
ret void