From 2ff08ce0e784f771e01b6f9c9277781e3a5eb211 Mon Sep 17 00:00:00 2001 From: vangthao95 Date: Fri, 13 Mar 2026 10:42:38 -0700 Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for amdgcn_endpgm (#186217) --- .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 1 + llvm/test/CodeGen/AMDGPU/amd.endpgm.ll | 153 ++++++++++++------ 2 files changed, 104 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 1459ee5e7681..0c7c823b7875 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -1424,6 +1424,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}}); addRulesForIOpcs({amdgcn_groupstaticsize}).Any({{S32}, {{Sgpr32}, {IntrId}}}); + addRulesForIOpcs({amdgcn_endpgm}).Any({{}, {{}, {}}}); // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir. addRulesForIOpcs({amdgcn_end_cf}) diff --git a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll index 3b9682ec7b10..306942b9997e 100644 --- a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll +++ b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll @@ -1,7 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s --check-prefix=GFX9 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s --check-prefix=GFX10 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s --check-prefix=GFX11 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=GFX9,GFX9-SDAG +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=GFX9,GFX9-GISEL +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s --check-prefixes=GFX10,GFX10-SDAG +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s --check-prefixes=GFX10,GFX10-GISEL +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s --check-prefixes=GFX11,GFX11-SDAG +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s --check-prefixes=GFX11,GFX11-GISEL define amdgpu_kernel void @test0() { ; GFX9-LABEL: test0: @@ -39,55 +42,105 @@ define void @test1() { } define amdgpu_kernel void @test2(ptr %p, i32 %x) { -; GFX9-LABEL: test2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_cmp_lt_i32 s0, 1 -; GFX9-NEXT: s_cbranch_scc0 .LBB2_2 -; GFX9-NEXT: ; %bb.1: ; %else -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: flat_store_dword v[0:1], v2 -; GFX9-NEXT: s_endpgm -; GFX9-NEXT: .LBB2_2: ; %then -; GFX9-NEXT: s_endpgm +; GFX9-SDAG-LABEL: test2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s0, 1 +; GFX9-SDAG-NEXT: s_cbranch_scc0 .LBB2_2 +; GFX9-SDAG-NEXT: ; %bb.1: ; %else +; GFX9-SDAG-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-SDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX9-SDAG-NEXT: s_endpgm +; GFX9-SDAG-NEXT: .LBB2_2: ; %then +; GFX9-SDAG-NEXT: s_endpgm ; -; GFX10-LABEL: test2: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_cmp_lt_i32 s0, 1 -; GFX10-NEXT: s_cbranch_scc0 .LBB2_2 -; GFX10-NEXT: ; %bb.1: ; %else -; GFX10-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: flat_store_dword v[0:1], v2 -; GFX10-NEXT: s_endpgm -; GFX10-NEXT: .LBB2_2: ; %then -; GFX10-NEXT: s_endpgm +; GFX9-GISEL-LABEL: test2: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_cmp_le_i32 s0, 0 +; GFX9-GISEL-NEXT: s_cbranch_scc0 .LBB2_2 +; GFX9-GISEL-NEXT: ; %bb.1: ; %else +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-GISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX9-GISEL-NEXT: s_endpgm +; GFX9-GISEL-NEXT: .LBB2_2: ; %then +; GFX9-GISEL-NEXT: s_endpgm ; -; GFX11-LABEL: test2: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_cmp_lt_i32 s0, 1 -; GFX11-NEXT: s_cbranch_scc0 .LBB2_2 -; GFX11-NEXT: ; %bb.1: ; %else -; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 -; GFX11-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_endpgm -; GFX11-NEXT: .LBB2_2: ; %then -; GFX11-NEXT: s_endpgm +; GFX10-SDAG-LABEL: test2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_lt_i32 s0, 1 +; GFX10-SDAG-NEXT: s_cbranch_scc0 .LBB2_2 +; GFX10-SDAG-NEXT: ; %bb.1: ; %else +; GFX10-SDAG-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-SDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX10-SDAG-NEXT: s_endpgm +; GFX10-SDAG-NEXT: .LBB2_2: ; %then +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: test2: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_le_i32 s0, 0 +; GFX10-GISEL-NEXT: s_cbranch_scc0 .LBB2_2 +; GFX10-GISEL-NEXT: ; %bb.1: ; %else +; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-GISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX10-GISEL-NEXT: s_endpgm +; GFX10-GISEL-NEXT: .LBB2_2: ; %then +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: test2: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_lt_i32 s0, 1 +; GFX11-SDAG-NEXT: s_cbranch_scc0 .LBB2_2 +; GFX11-SDAG-NEXT: ; %bb.1: ; %else +; GFX11-SDAG-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 +; GFX11-SDAG-NEXT: flat_store_b32 v[0:1], v2 +; GFX11-SDAG-NEXT: s_endpgm +; GFX11-SDAG-NEXT: .LBB2_2: ; %then +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: test2: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_le_i32 s0, 0 +; GFX11-GISEL-NEXT: s_cbranch_scc0 .LBB2_2 +; GFX11-GISEL-NEXT: ; %bb.1: ; %else +; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 +; GFX11-GISEL-NEXT: flat_store_b32 v[0:1], v2 +; GFX11-GISEL-NEXT: s_endpgm +; GFX11-GISEL-NEXT: .LBB2_2: ; %then +; GFX11-GISEL-NEXT: s_endpgm %cond = icmp sgt i32 %x, 0 br i1 %cond, label %then, label %else