diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index bca1eb85ca19..4510740ca11a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -1620,6 +1620,11 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}}) .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}); + addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn}, + Standard) + .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}}) + .Div(S64, {{Vgpr64}, {IntrId, Vgpr32}}); + addRulesForIOpcs( {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, Standard) .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}}); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll index e0416ed4041b..1a982a6e28e6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s declare i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32, i32 immarg) declare i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32, i32 immarg) @@ -28,6 +28,19 @@ define amdgpu_gs void @test_add_32_use(i32 %arg, ptr addrspace(1) %out) { ret void } +define amdgpu_gs void @test_add_32_s(i32 inreg %arg, ptr addrspace(1) %out) { +; CHECK-LABEL: test_add_32_s: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: ds_add_gs_reg_rtn v[2:3], v2 offset:16 gds +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v[0:1], v2, off +; CHECK-NEXT: s_endpgm + %res = call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %arg, i32 16) + store i32 %res, ptr addrspace(1) %out, align 4 + ret void +} + define amdgpu_gs void @test_add_64(i32 %arg) { ; CHECK-LABEL: test_add_64: ; CHECK: ; %bb.0: @@ -50,3 +63,16 @@ define amdgpu_gs void @test_add_64_use(i32 %arg, ptr addrspace(1) %out) { store i64 %res, ptr addrspace(1) %out, align 4 ret void } + +define amdgpu_gs void @test_add_64_s(i32 inreg %arg, ptr addrspace(1) %out) { +; CHECK-LABEL: test_add_64_s: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: ds_add_gs_reg_rtn v[2:3], v2 offset:32 gds +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b64 v[0:1], v[2:3], off +; CHECK-NEXT: s_endpgm + %res = call i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32 %arg, i32 32) + store i64 %res, ptr addrspace(1) %out, align 8 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll index d5ea159d2bfd..4f91fbb2cb47 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s declare i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32, i32 immarg) declare i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32, i32 immarg) @@ -28,6 +28,19 @@ define amdgpu_gs void @test_sub_32_use(i32 %arg, ptr addrspace(1) %out) { ret void } +define amdgpu_gs void @test_sub_32_s(i32 inreg %arg, ptr addrspace(1) %out) { +; CHECK-LABEL: test_sub_32_s: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: ds_sub_gs_reg_rtn v[2:3], v2 offset:16 gds +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v[0:1], v2, off +; CHECK-NEXT: s_endpgm + %res = call i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32 %arg, i32 16) + store i32 %res, ptr addrspace(1) %out, align 4 + ret void +} + define amdgpu_gs void @test_sub_64(i32 %arg) { ; CHECK-LABEL: test_sub_64: ; CHECK: ; %bb.0: @@ -51,3 +64,16 @@ define amdgpu_gs void @test_sub_64_use(i32 %arg, ptr addrspace(1) %out) { ret void } +define amdgpu_gs void @test_sub_64_s(i32 inreg %arg, ptr addrspace(1) %out) { +; CHECK-LABEL: test_sub_64_s: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: ds_sub_gs_reg_rtn v[2:3], v2 offset:32 gds +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b64 v[0:1], v[2:3], off +; CHECK-NEXT: s_endpgm + %res = call i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32 %arg, i32 32) + store i64 %res, ptr addrspace(1) %out, align 8 + ret void +} +