Joseph Huber 0a41fb71f1
[Clang] Treat ext_vector_type as a regular type attribute (#130177)
Summary:
This attribute is mostly borrowed from OpenCL, but is useful in general
for accessing the LLVM vector types. Previously the only way to use it
was through typedefs. This patch changes that to allow use as a regular
type attribute, similar to address spaces.
2025-03-07 10:09:06 -06:00

126 lines
6.5 KiB
Plaintext

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
// REQUIRES: x86-registered-target
// RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa" \
// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s
#include "Inputs/cuda.h"
// CHECK-LABEL: @_Z8test_argPDF16bDF16b(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
// CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BF16]] to ptr
// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: store bfloat [[TMP0]], ptr [[BF16_ASCAST]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[BF16_ASCAST]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store bfloat [[TMP1]], ptr [[TMP2]], align 2
// CHECK-NEXT: ret void
//
__device__ void test_arg(__bf16 *out, __bf16 in) {
__bf16 bf16 = in;
*out = bf16;
}
// CHECK-LABEL: @_Z9test_loadPDF16bS_(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
// CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BF16]] to ptr
// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store ptr [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[TMP0]], align 2
// CHECK-NEXT: store bfloat [[TMP1]], ptr [[BF16_ASCAST]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[BF16_ASCAST]], align 2
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store bfloat [[TMP2]], ptr [[TMP3]], align 2
// CHECK-NEXT: ret void
//
__device__ void test_load(__bf16 *out, __bf16 *in) {
__bf16 bf16 = *in;
*out = bf16;
}
// CHECK-LABEL: @_Z8test_retDF16b(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: ret bfloat [[TMP0]]
//
__device__ __bf16 test_ret( __bf16 in) {
return in;
}
// CHECK-LABEL: @_Z9test_callDF16b(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[CALL:%.*]] = call contract noundef bfloat @_Z8test_retDF16b(bfloat noundef [[TMP0]]) #[[ATTR1:[0-9]+]]
// CHECK-NEXT: ret bfloat [[CALL]]
//
__device__ __bf16 test_call( __bf16 in) {
return test_ret(in);
}
// CHECK-LABEL: @_Z15test_vec_assignv(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC2_A:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
// CHECK-NEXT: [[VEC2_B:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
// CHECK-NEXT: [[VEC4_A:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
// CHECK-NEXT: [[VEC4_B:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
// CHECK-NEXT: [[VEC8_A:%.*]] = alloca <8 x bfloat>, align 16, addrspace(5)
// CHECK-NEXT: [[VEC8_B:%.*]] = alloca <8 x bfloat>, align 16, addrspace(5)
// CHECK-NEXT: [[VEC16_A:%.*]] = alloca <16 x bfloat>, align 32, addrspace(5)
// CHECK-NEXT: [[VEC16_B:%.*]] = alloca <16 x bfloat>, align 32, addrspace(5)
// CHECK-NEXT: [[VEC2_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC2_A]] to ptr
// CHECK-NEXT: [[VEC2_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC2_B]] to ptr
// CHECK-NEXT: [[VEC4_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4_A]] to ptr
// CHECK-NEXT: [[VEC4_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4_B]] to ptr
// CHECK-NEXT: [[VEC8_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8_A]] to ptr
// CHECK-NEXT: [[VEC8_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8_B]] to ptr
// CHECK-NEXT: [[VEC16_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC16_A]] to ptr
// CHECK-NEXT: [[VEC16_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC16_B]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr [[VEC2_B_ASCAST]], align 4
// CHECK-NEXT: store <2 x bfloat> [[TMP0]], ptr [[VEC2_A_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr [[VEC4_B_ASCAST]], align 8
// CHECK-NEXT: store <4 x bfloat> [[TMP1]], ptr [[VEC4_A_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr [[VEC8_B_ASCAST]], align 16
// CHECK-NEXT: store <8 x bfloat> [[TMP2]], ptr [[VEC8_A_ASCAST]], align 16
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x bfloat>, ptr [[VEC16_B_ASCAST]], align 32
// CHECK-NEXT: store <16 x bfloat> [[TMP3]], ptr [[VEC16_A_ASCAST]], align 32
// CHECK-NEXT: ret void
//
__device__ void test_vec_assign() {
__bf16 [[clang::ext_vector_type(2)]] vec2_a, vec2_b;
vec2_a = vec2_b;
__bf16 __attribute__((ext_vector_type(4))) vec4_a, vec4_b;
vec4_a = vec4_b;
__bf16 [[clang::ext_vector_type(8)]] vec8_a, vec8_b;
vec8_a = vec8_b;
__bf16 __attribute__((ext_vector_type(16))) vec16_a, vec16_b;
vec16_a = vec16_b;
}