Adding new clang builtins for AMDGPU raw/struct buffer format load/store intrinsics. Clang currently has `__builtin_amdgcn_raw_buffer_load_b*` and `__builtin_amdgcn_raw_buffer_store_b*` builtins, but is missing builtins for the format variants. These format intrinsics are currently used by device-libs via manually written IR wrappers in [buffer-intrinsics.ll](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/ockl/src/buffer-intrinsics.ll).
45 lines
2.1 KiB
Common Lisp
45 lines
2.1 KiB
Common Lisp
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
|
// REQUIRES: amdgpu-registered-target
|
|
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s
|
|
|
|
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
|
|
|
typedef float v4f32 __attribute__((ext_vector_type(4)));
|
|
typedef half v4f16 __attribute__((ext_vector_type(4)));
|
|
|
|
// CHECK-LABEL: @test_raw_buffer_load_format_v4f32(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP0]]
|
|
//
|
|
v4f32 test_raw_buffer_load_format_v4f32(__amdgpu_buffer_rsrc_t rsrc) {
|
|
return __builtin_amdgcn_raw_buffer_load_format_v4f32(rsrc, 0, 0, 0);
|
|
}
|
|
|
|
// CHECK-LABEL: @test_raw_buffer_load_format_v4f16(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP0]]
|
|
//
|
|
v4f16 test_raw_buffer_load_format_v4f16(__amdgpu_buffer_rsrc_t rsrc) {
|
|
return __builtin_amdgcn_raw_buffer_load_format_v4f16(rsrc, 0, 0, 0);
|
|
}
|
|
|
|
// CHECK-LABEL: @test_raw_buffer_load_format_v4f32_non_const_offset(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP0]]
|
|
//
|
|
v4f32 test_raw_buffer_load_format_v4f32_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset) {
|
|
return __builtin_amdgcn_raw_buffer_load_format_v4f32(rsrc, offset, 0, 0);
|
|
}
|
|
|
|
// CHECK-LABEL: @test_raw_buffer_load_format_v4f32_non_const_soffset(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP0]]
|
|
//
|
|
v4f32 test_raw_buffer_load_format_v4f32_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int soffset) {
|
|
return __builtin_amdgcn_raw_buffer_load_format_v4f32(rsrc, 0, soffset, 0);
|
|
}
|