[cuda][HIP] __constant__ should imply constant (#110182)

Currently, `__constant__` variables do not get unconditionally marked as
`constant` in IR, which seems a bit odd given their definition. This is
generally inconsequential for NVPTX/AMDGPU, since said variables get
emitted in the constant address space for those BEs. However, it is
potentially significant for e.g. HIP-on-SPIR-V cases, as SPIR-V does not
allow casts to/from the constant AS (`UniformConstant`), which forces
`__constant__` variables to be emitted in the global AS, thus making IR
constness meaningful.
This commit is contained in:
Alex Voicu 2024-09-29 01:22:52 +01:00 committed by GitHub
parent 6558e5615a
commit e203a67f4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 44 additions and 30 deletions

View File

@ -5622,8 +5622,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
emitter->finalize(GV);
// If it is safe to mark the global 'constant', do so now.
GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor &&
D->getType().isConstantStorage(getContext(), true, true));
GV->setConstant((D->hasAttr<CUDAConstantAttr>() && LangOpts.CUDAIsDevice) ||
(!NeedsGlobalCtor && !NeedsGlobalDtor &&
D->getType().isConstantStorage(getContext(), true, true)));
// If it is in a read-only section, mark it 'constant'.
if (const SectionAttr *SA = D->getAttr<SectionAttr>()) {

View File

@ -9,7 +9,7 @@
// CHECK: @i ={{.*}} addrspace(1) externally_initialized global
__device__ int i;
// CHECK: @j ={{.*}} addrspace(4) externally_initialized global
// CHECK: @j ={{.*}} addrspace(4) externally_initialized constant
__constant__ int j;
// CHECK: @k ={{.*}} addrspace(3) global

View File

@ -4,11 +4,11 @@
#include "Inputs/cuda.h"
// CHECK-DEFAULT: @c ={{.*}} addrspace(4) externally_initialized global
// CHECK-DEFAULT: @c ={{.*}} addrspace(4) externally_initialized constant
// CHECK-DEFAULT: @g ={{.*}} addrspace(1) externally_initialized global
// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized global
// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized constant
// CHECK-PROTECTED: @g = protected addrspace(1) externally_initialized global
// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized global
// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized constant
// CHECK-HIDDEN: @g = protected addrspace(1) externally_initialized global
__constant__ int c;
__device__ int g;

View File

@ -28,13 +28,13 @@
// HIP-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
// HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
// HIP-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global
// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized constant
// HIP-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
// CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
// CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
// CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global
// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized constant
// CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
// COMMON-DAG: @_ZN12_GLOBAL__N_12vdE = internal addrspace(1) global

View File

@ -26,7 +26,7 @@ __shared__ int s_v;
// DEVICE: @s_v ={{.*}} addrspace(3) global i32 undef,
// HOST: @s_v = internal global i32 undef,
__constant__ int c_v;
// DEVICE: addrspace(4) externally_initialized global i32 0,
// DEVICE: addrspace(4) externally_initialized constant i32 0,
// HOST: @c_v = internal global i32 undef,
__device__ int d_v_i = 1;
@ -51,14 +51,14 @@ __shared__ T s_t;
// DEVICE: @s_t ={{.*}} addrspace(3) global %struct.T undef,
// HOST: @s_t = internal global %struct.T undef,
__constant__ T c_t;
// DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized global %struct.T zeroinitializer,
// DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized constant %struct.T zeroinitializer,
// HOST: @c_t = internal global %struct.T undef,
__device__ T d_t_i = {2};
// DEVICE: @d_t_i ={{.*}} addrspace(1) externally_initialized global %struct.T { i32 2 },
// HOST: @d_t_i = internal global %struct.T undef,
__constant__ T c_t_i = {2};
// DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized global %struct.T { i32 2 },
// DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized constant %struct.T { i32 2 },
// HOST: @c_t_i = internal global %struct.T undef,
// empty constructor
@ -69,7 +69,7 @@ __shared__ EC s_ec;
// DEVICE: @s_ec ={{.*}} addrspace(3) global %struct.EC undef,
// HOST: @s_ec = internal global %struct.EC undef,
__constant__ EC c_ec;
// DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized global %struct.EC zeroinitializer,
// DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC zeroinitializer,
// HOST: @c_ec = internal global %struct.EC undef
// empty destructor
@ -80,7 +80,7 @@ __shared__ ED s_ed;
// DEVICE: @s_ed ={{.*}} addrspace(3) global %struct.ED undef,
// HOST: @s_ed = internal global %struct.ED undef,
__constant__ ED c_ed;
// DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized global %struct.ED zeroinitializer,
// DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized constant %struct.ED zeroinitializer,
// HOST: @c_ed = internal global %struct.ED undef,
__device__ ECD d_ecd;
@ -90,7 +90,7 @@ __shared__ ECD s_ecd;
// DEVICE: @s_ecd ={{.*}} addrspace(3) global %struct.ECD undef,
// HOST: @s_ecd = internal global %struct.ECD undef,
__constant__ ECD c_ecd;
// DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized global %struct.ECD zeroinitializer,
// DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized constant %struct.ECD zeroinitializer,
// HOST: @c_ecd = internal global %struct.ECD undef,
// empty templated constructor -- allowed with no arguments
@ -101,14 +101,14 @@ __shared__ ETC s_etc;
// DEVICE: @s_etc ={{.*}} addrspace(3) global %struct.ETC undef,
// HOST: @s_etc = internal global %struct.ETC undef,
__constant__ ETC c_etc;
// DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized global %struct.ETC zeroinitializer,
// DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized constant %struct.ETC zeroinitializer,
// HOST: @c_etc = internal global %struct.ETC undef,
__device__ NCFS d_ncfs;
// DEVICE: @d_ncfs ={{.*}} addrspace(1) externally_initialized global %struct.NCFS { i32 3 }
// HOST: @d_ncfs = internal global %struct.NCFS undef,
__constant__ NCFS c_ncfs;
// DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized global %struct.NCFS { i32 3 }
// DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized constant %struct.NCFS { i32 3 }
// HOST: @c_ncfs = internal global %struct.NCFS undef,
// Regular base class -- allowed
@ -119,7 +119,7 @@ __shared__ T_B_T s_t_b_t;
// DEVICE: @s_t_b_t ={{.*}} addrspace(3) global %struct.T_B_T undef,
// HOST: @s_t_b_t = internal global %struct.T_B_T undef,
__constant__ T_B_T c_t_b_t;
// DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer,
// DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_B_T zeroinitializer,
// HOST: @c_t_b_t = internal global %struct.T_B_T undef,
// Incapsulated object of allowed class -- allowed
@ -130,7 +130,7 @@ __shared__ T_F_T s_t_f_t;
// DEVICE: @s_t_f_t ={{.*}} addrspace(3) global %struct.T_F_T undef,
// HOST: @s_t_f_t = internal global %struct.T_F_T undef,
__constant__ T_F_T c_t_f_t;
// DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer,
// DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_F_T zeroinitializer,
// HOST: @c_t_f_t = internal global %struct.T_F_T undef,
// array of allowed objects -- allowed
@ -141,7 +141,7 @@ __shared__ T_FA_T s_t_fa_t;
// DEVICE: @s_t_fa_t ={{.*}} addrspace(3) global %struct.T_FA_T undef,
// HOST: @s_t_fa_t = internal global %struct.T_FA_T undef,
__constant__ T_FA_T c_t_fa_t;
// DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer,
// DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_FA_T zeroinitializer,
// HOST: @c_t_fa_t = internal global %struct.T_FA_T undef,
@ -153,7 +153,7 @@ __shared__ EC_I_EC s_ec_i_ec;
// DEVICE: @s_ec_i_ec ={{.*}} addrspace(3) global %struct.EC_I_EC undef,
// HOST: @s_ec_i_ec = internal global %struct.EC_I_EC undef,
__constant__ EC_I_EC c_ec_i_ec;
// DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
// DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC_I_EC zeroinitializer,
// HOST: @c_ec_i_ec = internal global %struct.EC_I_EC undef,
// DEVICE: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef

View File

@ -20,7 +20,7 @@
// NORDC-H-DAG: @v1 = internal global i32 undef
// RDC-H-DAG: @v1 = global i32 undef
__device__ int v1;
// DEV-DAG: @v2 = addrspace(4) externally_initialized global i32 0
// DEV-DAG: @v2 = addrspace(4) externally_initialized constant i32 0
// NORDC-H-DAG: @v2 = internal global i32 undef
// RDC-H-DAG: @v2 = global i32 undef
__constant__ int v2;
@ -48,10 +48,10 @@ extern __managed__ int ev3;
// HOST-DAG: @_ZL3sv1 = internal global i32 undef
// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
static __device__ int sv1;
// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0
// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized constant i32 0
// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0
// HOST-DAG: @_ZL3sv2 = internal global i32 undef
// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized constant i32 0
static __constant__ int sv2;
// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null
// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global ptr addrspace(1) null

View File

@ -10,7 +10,7 @@
__asm__("file scope asm is host only");
// CHECK-HOST: constantdata = internal global
// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized global
// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized constant
__constant__ char constantdata[256];
// CHECK-HOST: devicedata = internal global

View File

@ -50,7 +50,7 @@ static __device__ int x5;
}
// Check a static constant variable referenced by host is externalized.
// DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized global i32 0
// DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized constant i32 0
// HOST-DAG: @_ZL1y = internal global i32 undef
// HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"

View File

@ -81,11 +81,11 @@ static __device__ int x;
static __device__ int x2;
// Test normal static device variables
// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized global i32 0
// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized constant i32 0
// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y[[FILEID:.*]]\00"
// Test externalized static device variables
// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0
// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"
static __constant__ int y;

View File

@ -38,7 +38,7 @@ const int A<T>::const_member;
template class A<int>;
//DEV-DAG: @_ZN1AIiE8d_memberE = internal addrspace(1) global i32 0, comdat, align 4
//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) global i32 0, comdat, align 4
//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) constant i32 0, comdat, align 4
//DEV-DAG: @_ZN1AIiE8m_memberE = internal addrspace(1) externally_initialized global ptr addrspace(1) null
//DEV-DAG: @_ZN1AIiE12const_memberE = internal addrspace(4) constant i32 0, comdat, align 4
//DEV-NEG-NOT: @_ZN1AIiE8h_memberE

View File

@ -12,7 +12,7 @@
// CHECK: @d ={{.*}} addrspace(1) externally_initialized global
__device__ int d;
// CHECK: @c ={{.*}} addrspace(1) externally_initialized global
// CHECK: @c ={{.*}} addrspace(1) externally_initialized constant
__constant__ int c;
// CHECK: @s ={{.*}} addrspace(3) global

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -passes=globalopt | FileCheck %s
; RUN: opt < %s -passes=early-cse | opt -S -passes=globalopt | FileCheck %s --check-prefix=CHECK-CONSTANT
; This global is externally_initialized, which may modify the value between
; it's static initializer and any code in this module being run, so the only
@ -12,6 +13,10 @@
; CHECK: @b = internal unnamed_addr externally_initialized global i32 undef
@b = internal externally_initialized global i32 undef
; This constant global is externally_initialized, which may modify the value
; between its static const initializer and any code in this module being run, so
; the read from it cannot be const propagated
@c = internal externally_initialized constant i32 42
define void @foo() {
; CHECK-LABEL: foo
@ -35,3 +40,11 @@ entry:
%val = load i32, ptr @b
ret i32 %val
}
define i32 @bam() {
; CHECK-CONSTANT-LABEL: bam
entry:
; CHECK-CONSTANT: %val = load i32, ptr @c
%val = load i32, ptr @c
ret i32 %val
}