Fix the IR lowering for `__amdgpu_texture_t` to generate a single 256-bit load instead of a double indirection through a flat pointer. Previously, `__amdgpu_texture_t` was lowered to `ptr addrspace(0)` (64-bit flat pointer), which caused the double load and indirection. With the same reproducer like #187697. ```c #define TSHARP __constant uint * // Old tsharp handling: // #define LOAD_TSHARP(I) *(__constant uint8 *)I #define LOAD_TSHARP(I) *(__constant __amdgpu_texture_t *)I float4 test_image_load_1D(TSHARP i, int c) { return __builtin_amdgcn_image_load_1d_v4f32_i32(15, c, LOAD_TSHARP(i), 0, 0); } ``` old output: ```llvm define hidden <4 x float> @test_image_load_1D(ptr addrspace(4) noundef readonly captures(none) %i, i32 noundef %c) local_unnamed_addr #0 { entry: %0 = load ptr, ptr addrspace(4) %i, align 32, !tbaa !9 %1 = addrspacecast ptr %0 to ptr addrspace(1) %tex.rsrc.val = load <8 x i32>, ptr addrspace(1) %1, align 32 %2 = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %c, <8 x i32> %tex.rsrc.val, i32 0, i32 0) ret <4 x float> %2 } ``` This matches the old `__constant uint8 *` behavior. With this fix new output is ```llvm define hidden <4 x float> @test_image_load_1D(ptr addrspace(4) noundef readonly captures(none) %0, i32 noundef %1) local_unnamed_addr #0 { %3 = load <8 x i32>, ptr addrspace(4) %0, align 32, !tbaa !10 %4 = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %1, <8 x i32> %3, i32 0, i32 0) ret <4 x float> %4 } ``` Fixes #187697
1078 lines
78 KiB
C
1078 lines
78 KiB
C
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
|
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 %s -emit-llvm -o - | FileCheck %s
|
|
|
|
typedef int int4 __attribute__((ext_vector_type(4)));
|
|
typedef float float4 __attribute__((ext_vector_type(4)));
|
|
typedef _Float16 half;
|
|
typedef half half4 __attribute__((ext_vector_type(4)));
|
|
|
|
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
|
|
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 2, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103)
|
|
// CHECK-NEXT: ret float [[TMP3]]
|
|
//
|
|
float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_2d_f32_i32(2, i32, i32, tex, 106, 103);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP3]]
|
|
//
|
|
float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_2d_v4f32_i32(15, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP3]]
|
|
//
|
|
half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_2d_v4f16_i32(15, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
|
|
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 4, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret float [[TMP4]]
|
|
//
|
|
float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_2darray_f32_i32(4, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP4]]
|
|
//
|
|
float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_2darray_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP4]]
|
|
//
|
|
half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_2darray_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP2]]
|
|
//
|
|
float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_1d_v4f32_i32(15, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP2]]
|
|
//
|
|
half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_1d_v4f16_i32(15, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP3]]
|
|
//
|
|
float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_1darray_v4f32_i32(15, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP3]]
|
|
//
|
|
half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_1darray_v4f16_i32(15, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP4]]
|
|
//
|
|
float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_3d_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP4]]
|
|
//
|
|
half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_3d_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP4]]
|
|
//
|
|
float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_cube_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP4]]
|
|
//
|
|
half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_cube_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP3]]
|
|
//
|
|
float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(15, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP3]]
|
|
//
|
|
half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(15, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP4]]
|
|
//
|
|
float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP4]]
|
|
//
|
|
half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
|
|
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 8, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret float [[TMP4]]
|
|
//
|
|
float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_2d_f32_i32(8, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP4]]
|
|
//
|
|
float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP4]]
|
|
//
|
|
half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
|
|
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 8, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret float [[TMP5]]
|
|
//
|
|
float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_2darray_f32_i32(8, i32, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP5]]
|
|
//
|
|
float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(15, i32, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP5]]
|
|
//
|
|
half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(15, i32, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP5]]
|
|
//
|
|
float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(15, i32, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP5]]
|
|
//
|
|
half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(15, i32, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP5]]
|
|
//
|
|
float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(15, i32, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP5]]
|
|
//
|
|
half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
|
|
|
|
return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(15, i32, i32, i32, i32, tex, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP3]]
|
|
//
|
|
float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_1d_v4f32_f32(15, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP3]]
|
|
//
|
|
half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_1d_v4f16_f32(15, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32(
|
|
// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP4]]
|
|
//
|
|
float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_1darray_v4f32_f32(15, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP4]]
|
|
//
|
|
half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_1darray_v4f16_f32(15, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32(
|
|
// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret float [[TMP4]]
|
|
//
|
|
float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP4]]
|
|
//
|
|
float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_2d_v4f32_f32(15, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP4]]
|
|
//
|
|
half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_2d_v4f16_f32(15, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32(
|
|
// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret float [[TMP5]]
|
|
//
|
|
float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_2darray_f32_f32(4, f32, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP5]]
|
|
//
|
|
float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_2darray_v4f32_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP5]]
|
|
//
|
|
half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_2darray_v4f16_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP5]]
|
|
//
|
|
float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_3d_v4f32_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP5]]
|
|
//
|
|
half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_3d_v4f16_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32(
|
|
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x float> [[TMP5]]
|
|
//
|
|
float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_cube_v4f32_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|
|
|
|
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32(
|
|
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
|
|
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
|
|
// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
|
|
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
|
|
// CHECK-NEXT: ret <4 x half> [[TMP5]]
|
|
//
|
|
half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
|
|
return __builtin_amdgcn_image_sample_cube_v4f16_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
|
|
}
|