
Add option and statement attribute for controlling emitting of target-specific metadata to atomicrmw instructions in IR. The RFC for this attribute and option is https://discourse.llvm.org/t/rfc-add-clang-atomic-control-options-and-pragmas/80641, Originally a pragma was proposed, then it was changed to clang attribute. This attribute allows users to specify one, two, or all three options and must be applied to a compound statement. The attribute can also be nested, with inner attributes overriding the options specified by outer attributes or the target's default options. These options will then determine the target-specific metadata added to atomic instructions in the IR. In addition to the attribute, three new compiler options are introduced: `-f[no-]atomic-remote-memory`, `-f[no-]atomic-fine-grained-memory`, `-f[no-]atomic-ignore-denormal-mode`. These compiler options allow users to override the default options through the Clang driver and front end. `-m[no-]unsafe-fp-atomics` is aliased to `-f[no-]ignore-denormal-mode`. In terms of implementation, the atomic attribute is represented in the AST by the existing AttributedStmt, with minimal changes to AST and Sema. During code generation in Clang, the CodeGenModule maintains the current atomic options, which are used to emit the relevant metadata for atomic instructions. RAII is used to manage the saving and restoring of atomic options when entering and exiting nested AttributedStmt.
55 lines
2.3 KiB
C
55 lines
2.3 KiB
C
/* Minimal declarations for CUDA support. Testing purposes only. */
|
|
|
|
#include <stddef.h>
|
|
|
|
// Make this file work with nvcc, for testing compatibility.
|
|
|
|
#ifndef __NVCC__
|
|
#define __constant__ __attribute__((constant))
|
|
#define __device__ __attribute__((device))
|
|
#define __global__ __attribute__((global))
|
|
#define __host__ __attribute__((host))
|
|
#define __shared__ __attribute__((shared))
|
|
#define __managed__ __attribute__((managed))
|
|
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
|
|
|
|
struct dim3 {
|
|
unsigned x, y, z;
|
|
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
|
|
};
|
|
|
|
#ifdef __HIP__
|
|
typedef struct hipStream *hipStream_t;
|
|
typedef enum hipError {} hipError_t;
|
|
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
|
|
hipStream_t stream = 0);
|
|
extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
|
|
size_t sharedSize = 0,
|
|
hipStream_t stream = 0);
|
|
extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
|
|
dim3 blockDim, void **args,
|
|
size_t sharedMem,
|
|
hipStream_t stream);
|
|
#else
|
|
typedef struct cudaStream *cudaStream_t;
|
|
typedef enum cudaError {} cudaError_t;
|
|
|
|
extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
|
|
size_t sharedSize = 0,
|
|
cudaStream_t stream = 0);
|
|
extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
|
|
size_t sharedSize = 0,
|
|
cudaStream_t stream = 0);
|
|
extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
|
|
dim3 blockDim, void **args,
|
|
size_t sharedMem, cudaStream_t stream);
|
|
#endif
|
|
|
|
// Host- and device-side placement new overloads.
|
|
void *operator new(__SIZE_TYPE__, void *p) { return p; }
|
|
void *operator new[](__SIZE_TYPE__, void *p) { return p; }
|
|
__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; }
|
|
__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; }
|
|
|
|
#endif // !__NVCC__
|