llvm-project/clang/test/CodeGenHIP/spirv-amdgcn-ballot.cpp
Jameson Nash 0dd21ad1c6
[clang] remove addrspace cast from CreateIRTemp (#179327)
This just added unnecessary work to the IR, since they are only used for
load and store, which just causes some IR noise. Tests updated by UTC
script to remove the extra lines.
2026-02-04 13:09:32 -05:00

26 lines
1.3 KiB
C++

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
// Unlike OpenCL, HIP depends on the C++ interpration of "unsigned long", which
// is 64 bits long on Linux and 32 bits long on Windows. The return type of the
// ballot intrinsic needs to be a 64 bit integer on both platforms. This test
// cross-compiles to Windows to confirm that the return type is indeed 64 bits
// on Windows.
#define __device__ __attribute__((device))
// CHECK-LABEL: define spir_func noundef i64 @_Z3fooi(
// CHECK-SAME: i32 noundef [[P:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr [[P_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: store i32 [[P]], ptr addrspace(4) [[P_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[P_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK-NEXT: [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.ballot.i64(i1 [[TOBOOL]])
// CHECK-NEXT: ret i64 [[TMP1]]
//
__device__ unsigned long long foo(int p) {
return __builtin_amdgcn_ballot_w64(p);
}