diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index ed3fc5ccd7bd..253a42673079 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -147,11 +147,10 @@ __gpu_is_first_in_lane(uint64_t __lane_mask) { // Copies the value from the first active thread to the rest. _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) { - uint32_t __hi = (uint32_t)(__x >> 32ull); - uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFFull); - return ((uint64_t)__gpu_read_first_lane_u32(__lane_mask, __hi) << 32ull) | - ((uint64_t)__gpu_read_first_lane_u32(__lane_mask, __lo) & - 0xFFFFFFFFull); + uint32_t __hi = (uint32_t)(__x >> 32); + uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF); + return ((uint64_t)__gpu_read_first_lane_u32(__lane_mask, __hi) << 32) | + ((uint64_t)__gpu_read_first_lane_u32(__lane_mask, __lo) & 0xFFFFFFFF); } // Gets the first floating point value from the active lanes. @@ -174,11 +173,10 @@ __gpu_read_first_lane_f64(uint64_t __lane_mask, double __x) { _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, uint32_t __width) { - uint32_t __hi = (uint32_t)(__x >> 32ull); + uint32_t __hi = (uint32_t)(__x >> 32); uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF); uint32_t __mask = (uint32_t)__lane_mask; - return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width) - << 32ull) | + return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width) << 32) | ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width)); } @@ -211,7 +209,7 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t __idx, double __x, _DEFAULT_FN_ATTRS static __inline__ __type \ __gpu_suffix_scan_##__prefix##_##__suffix(uint64_t __lane_mask, \ __type __x) { \ - uint64_t __above = __lane_mask & -(2ull << __gpu_lane_id()); \ + uint64_t __above = __lane_mask & -(UINT64_C(2) << __gpu_lane_id()); \ for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \ uint32_t __src = __builtin_ctzg(__above, (int)sizeof(__above) * 8); \ __type __result = __gpu_shuffle_idx_##__suffix(__lane_mask, __src, __x, \ @@ -226,14 +224,15 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t __idx, double __x, _DEFAULT_FN_ATTRS static __inline__ __type \ __gpu_prefix_scan_##__prefix##_##__suffix(uint64_t __lane_mask, \ __type __x) { \ - uint64_t __below = __lane_mask & ((1ull << __gpu_lane_id()) - 1); \ + uint64_t __below = __lane_mask & ((UINT64_C(1) << __gpu_lane_id()) - 1); \ for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \ uint32_t __src = 63 - __builtin_clzg(__below, (int)sizeof(__below) * 8); \ __type __result = __gpu_shuffle_idx_##__suffix(__lane_mask, __src, __x, \ __gpu_num_lanes()); \ __x = __op(__x, __below ? __result : (__type)__identity); \ for (uint32_t __i = 0; __i < __step; ++__i) \ - __below ^= (1ull << (63 - __builtin_clzg(__below, 0))) & __below; \ + __below ^= \ + (UINT64_C(1) << (63 - __builtin_clzg(__below, 0))) & __below; \ } \ return __x; \ } \ @@ -337,7 +336,7 @@ __gpu_match_all_u32_impl(uint64_t __lane_mask, uint32_t __x) { uint32_t __first = __gpu_shuffle_idx_u32( __lane_mask, __builtin_ctzg(__lane_mask), __x, __gpu_num_lanes()); uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first); - return __ballot == __lane_mask ? __lane_mask : 0ull; + return __ballot == __lane_mask ? __lane_mask : UINT64_C(0); } // Returns the current lane mask if every lane contains __x. @@ -346,7 +345,7 @@ __gpu_match_all_u64_impl(uint64_t __lane_mask, uint64_t __x) { uint64_t __first = __gpu_shuffle_idx_u64( __lane_mask, __builtin_ctzg(__lane_mask), __x, __gpu_num_lanes()); uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first); - return __ballot == __lane_mask ? __lane_mask : 0ull; + return __ballot == __lane_mask ? __lane_mask : UINT64_C(0); } _Pragma("omp end declare variant"); diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index b2e538580ba1..57a6a2cd0863 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -137,7 +137,7 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, uint32_t __width) { // Mask out inactive lanes to match AMDGPU behavior. uint32_t __mask = (uint32_t)__lane_mask; - bool __bitmask = (1ull << __idx) & __lane_mask; + bool __bitmask = (UINT64_C(1) << __idx) & __lane_mask; return -__bitmask & __nvvm_shfl_sync_idx_i32(__mask, __x, __idx, ((__gpu_num_lanes() - __width) << 8u) | 0x1f); diff --git a/clang/test/Headers/Inputs/include/stdint.h b/clang/test/Headers/Inputs/include/stdint.h index c4836441096b..a44ebff2d69f 100644 --- a/clang/test/Headers/Inputs/include/stdint.h +++ b/clang/test/Headers/Inputs/include/stdint.h @@ -39,4 +39,7 @@ typedef unsigned __INTPTR_TYPE__ uintptr_t; #define UINT32_MAX __UINT32_C(4294967295) #define UINT64_MAX __UINT64_C(18446744073709551615) +#define UINT32_C __UINT32_C +#define UINT64_C __UINT64_C + #endif /* STDINT_H */ diff --git a/clang/test/Headers/gpuintrin_lang.c b/clang/test/Headers/gpuintrin_lang.c index 433d24b18d65..80e73b8c5647 100644 --- a/clang/test/Headers/gpuintrin_lang.c +++ b/clang/test/Headers/gpuintrin_lang.c @@ -40,6 +40,7 @@ __device__ int foo() { return __gpu_thread_id_x(); } #elif defined(SYCL) extern "C" [[clang::sycl_external]] int foo() { return __gpu_thread_id_x(); } #else +// // CUDA-LABEL: define dso_local i32 @foo( // CUDA-SAME: ) #[[ATTR0:[0-9]+]] { // CUDA-NEXT: [[ENTRY:.*:]]