[NVPTX] Support intrinsics for reserved shared memory special registers (#182354)
Added reserved_smem_offset_{begin|end|cap|0} intrinsics to expose shared
memory special registers and NVPTX TableGen support for these
intrinsics.
This commit is contained in:
parent
fb81e59ccb
commit
dbaa4d117a
@ -300,6 +300,42 @@ including the portion reserved for system software use.
|
||||
The '``dynamic_smem_size``' variant returns the amount of dynamic shared
|
||||
memory allocated per CTA for the kernel at launch time.
|
||||
|
||||
'``llvm.nvvm.read.ptx.sreg.reserved_smem_offset_*``'
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_begin()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_end()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_cap()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_0()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_1()
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``@llvm.nvvm.read.ptx.sreg.reserved_smem_offset_*``' intrinsics provide
|
||||
access to PTX special registers that hold information about reserved shared
|
||||
memory offsets.
|
||||
|
||||
The '``reserved_smem_offset_begin``' intrinsic reads the start offset of
|
||||
the reserved shared memory region.
|
||||
|
||||
The '``reserved_smem_offset_end``' intrinsic reads the end offset of the
|
||||
reserved shared memory region.
|
||||
|
||||
The '``reserved_smem_offset_cap``' intrinsic reads the capacity limit of
|
||||
the reserved shared memory region.
|
||||
|
||||
The '``reserved_smem_offset_0``' and '``reserved_smem_offset_1``' intrinsics
|
||||
read additional offsets in the reserved shared memory region.
|
||||
|
||||
For more information, refer `PTX ISA
|
||||
<https://docs.nvidia.com/cuda/parallel-thread-execution/#special-registers-reserved-smem>`__.
|
||||
|
||||
Barriers
|
||||
--------
|
||||
|
||||
|
||||
@ -2352,7 +2352,8 @@ foreach vec = [TV_I8, TV_I16, TV_I32,
|
||||
// Accessing special registers.
|
||||
//
|
||||
class PTXReadSRegIntrinsicNB_r32<list<IntrinsicProperty> properties = [], string name = "">
|
||||
: NVVMPureIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>] # properties, name>;
|
||||
: NVVMPureIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>] # properties,
|
||||
!if(!eq(name, ""), "", !strconcat("llvm.nvvm.read.ptx.sreg.", name))>;
|
||||
|
||||
class PTXReadSRegIntrinsic_r32<list<IntrinsicProperty> properties = []>
|
||||
: PTXReadSRegIntrinsicNB_r32<properties>, NVVMBuiltin;
|
||||
@ -2452,12 +2453,26 @@ defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32<MAX_GR
|
||||
def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
|
||||
def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
|
||||
|
||||
//
|
||||
// Reserved Shared Memory Intrinsics
|
||||
//
|
||||
def int_nvvm_read_ptx_sreg_reserved_smem_offset_begin
|
||||
: PTXReadSRegIntrinsicNB_r32<name = "reserved_smem_offset_begin">;
|
||||
def int_nvvm_read_ptx_sreg_reserved_smem_offset_end
|
||||
: PTXReadSRegIntrinsicNB_r32<name = "reserved_smem_offset_end">;
|
||||
def int_nvvm_read_ptx_sreg_reserved_smem_offset_cap
|
||||
: PTXReadSRegIntrinsicNB_r32<name = "reserved_smem_offset_cap">;
|
||||
def int_nvvm_read_ptx_sreg_reserved_smem_offset_0
|
||||
: PTXReadSRegIntrinsicNB_r32<name = "reserved_smem_offset_0">;
|
||||
def int_nvvm_read_ptx_sreg_reserved_smem_offset_1
|
||||
: PTXReadSRegIntrinsicNB_r32<name = "reserved_smem_offset_1">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_total_smem_size :
|
||||
PTXReadSRegIntrinsicNB_r32<name = "llvm.nvvm.read.ptx.sreg.total_smem_size">;
|
||||
PTXReadSRegIntrinsicNB_r32<name = "total_smem_size">;
|
||||
def int_nvvm_read_ptx_sreg_aggr_smem_size :
|
||||
PTXReadSRegIntrinsicNB_r32<name = "llvm.nvvm.read.ptx.sreg.aggr_smem_size">;
|
||||
PTXReadSRegIntrinsicNB_r32<name = "aggr_smem_size">;
|
||||
def int_nvvm_read_ptx_sreg_dynamic_smem_size :
|
||||
PTXReadSRegIntrinsicNB_r32<name = "llvm.nvvm.read.ptx.sreg.dynamic_smem_size">;
|
||||
PTXReadSRegIntrinsicNB_r32<name = "dynamic_smem_size">;
|
||||
|
||||
//
|
||||
// SHUFFLE
|
||||
|
||||
@ -4926,6 +4926,14 @@ def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
|
||||
def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
|
||||
def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
|
||||
|
||||
// Reserved shared memory special register reads
|
||||
foreach suffix = ["begin", "end", "cap", "0", "1"] in {
|
||||
defvar regname = "reserved_smem_offset_" # suffix;
|
||||
defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname);
|
||||
def "INT_PTX_SREG_RESERVED_SMEM_OFFSET_" # !toupper(suffix) :
|
||||
PTX_READ_SREG_R32<regname, intr, [hasPTX<76>, hasSM<80>]>;
|
||||
}
|
||||
|
||||
// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
|
||||
// handle the constant.
|
||||
def INT_PTX_SREG_WARPSIZE :
|
||||
|
||||
40
llvm/test/CodeGen/NVPTX/reserved-smem-offset.ll
Normal file
40
llvm/test/CodeGen/NVPTX/reserved-smem-offset.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_80 -mattr=+ptx76 | FileCheck %s
|
||||
; RUN: %if ptxas-sm_80 && ptxas-isa-7.6 %{ llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_80 -mattr=+ptx76 | %ptxas-verify -arch=sm_80 %}
|
||||
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_begin()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_end()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_cap()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_0()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_1()
|
||||
|
||||
define i32 @test() {
|
||||
; CHECK-LABEL: test(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<10>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: mov.u32 %r1, %reserved_smem_offset_begin;
|
||||
; CHECK-NEXT: mov.u32 %r2, %reserved_smem_offset_end;
|
||||
; CHECK-NEXT: mov.u32 %r3, %reserved_smem_offset_cap;
|
||||
; CHECK-NEXT: mov.u32 %r4, %reserved_smem_offset_0;
|
||||
; CHECK-NEXT: mov.u32 %r5, %reserved_smem_offset_1;
|
||||
; CHECK-NEXT: add.s32 %r6, %r1, %r2;
|
||||
; CHECK-NEXT: add.s32 %r7, %r6, %r3;
|
||||
; CHECK-NEXT: add.s32 %r8, %r7, %r4;
|
||||
; CHECK-NEXT: add.s32 %r9, %r8, %r5;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r9;
|
||||
; CHECK-NEXT: ret;
|
||||
%begin = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_begin()
|
||||
%end = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_end()
|
||||
%cap = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_cap()
|
||||
%offset0 = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_0()
|
||||
%offset1 = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_1()
|
||||
|
||||
%ret0 = add i32 %begin, %end
|
||||
%ret1 = add i32 %ret0, %cap
|
||||
%ret2 = add i32 %ret1, %offset0
|
||||
%ret3 = add i32 %ret2, %offset1
|
||||
|
||||
ret i32 %ret3
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user