The existing implementation has three issues which this patch addresses. 1. The last dimension which represents the bytes in the type, has the wrong stride and count. For example, for a 4 byte int, count=1 and stride=4. The correct representation here is count=4 and stride=1 because there are 4 bytes (count=4) that we need to copy and we do not skip any bytes (stride=1). 2. The size of the data copy was computed using the last dimension. However, this is incorrect in cases where some of the final dimensions get merged into one. In this case we need to take the combined size of the merged dimensions, which is (Count * Stride) of the first merged dimension. 3. The Offset into a dimension was computed as a multiple of its Stride. However, this Stride which is in bytes, already includes the stride multiplier given by the user. This means that when the user specified 1:3:2, i.e. elements 1, 3, 5, the runtime incorrectly copied elements 2, 4, 6. Fix this by precomputing at compile time the Offset to be in bytes by correctly multiplying the offset by the stride of the dimension without the user-specified multiplier.
136 lines
9.0 KiB
C
136 lines
9.0 KiB
C
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --global-value-regex "\.offload_.*" --global-hex-value-regex ".offload_maptypes.*" --version 3
|
|
|
|
// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - \
|
|
// RUN: | FileCheck %s
|
|
|
|
// Check same results after serialization round-trip
|
|
// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -emit-llvm %s -o - \
|
|
// RUN: | FileCheck %s
|
|
|
|
// expected-no-diagnostics
|
|
#ifndef HEADER
|
|
#define HEADER
|
|
|
|
// Test that variable stride expressions in target update directives correctly
|
|
// set the OMP_MAP_NON_CONTIG flag. For NON_CONTIG entries, offload_sizes contains
|
|
// dimension count (2 for 1D array section). For non-NON_CONTIG entries,
|
|
// offload_sizes contains byte size (5 elements * 4 bytes = 20).
|
|
int data[10];
|
|
int stride;
|
|
|
|
void test_variable_stride_to() {
|
|
#pragma omp target update to(data[0:5:stride])
|
|
}
|
|
|
|
void test_variable_stride_from() {
|
|
#pragma omp target update from(data[0:5:stride])
|
|
}
|
|
|
|
void test_constant_stride_one() {
|
|
#pragma omp target update to(data[0:5:1])
|
|
}
|
|
|
|
#endif // HEADER
|
|
//.
|
|
// CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 2]
|
|
// CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 [[#0x100000000001]]]
|
|
// CHECK: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 2]
|
|
// CHECK: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x100000000002]]]
|
|
// CHECK: @.offload_sizes.3 = private unnamed_addr constant [1 x i64] [i64 20]
|
|
// CHECK: @.offload_maptypes.4 = private unnamed_addr constant [1 x i64] [i64 [[#0x1]]]
|
|
//.
|
|
// CHECK-LABEL: define dso_local void @test_variable_stride_to(
|
|
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DIMS:%.*]] = alloca [2 x [[STRUCT_DESCRIPTOR_DIM:%.*]]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @stride, align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
|
// CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 4, [[TMP1]]
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr @data, ptr [[TMP3]], align 8
|
|
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr @data, ptr [[TMP4]], align 8
|
|
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR_DIM]]], ptr [[DIMS]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP6]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP7]], align 8
|
|
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP6]], i32 0, i32 1
|
|
// CHECK-NEXT: store i64 5, ptr [[TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP6]], i32 0, i32 2
|
|
// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP9]], align 8
|
|
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR_DIM]]], ptr [[DIMS]], i64 0, i64 1
|
|
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP10]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP11]], align 8
|
|
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP10]], i32 0, i32 1
|
|
// CHECK-NEXT: store i64 4, ptr [[TMP12]], align 8
|
|
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP10]], i32 0, i32 2
|
|
// CHECK-NEXT: store i64 1, ptr [[TMP13]], align 8
|
|
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr [[DIMS]], ptr [[TMP14]], align 8
|
|
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: call void @__tgt_target_data_update_mapper(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, ptr [[TMP15]], ptr [[TMP16]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null)
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define dso_local void @test_variable_stride_from(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DIMS:%.*]] = alloca [2 x [[STRUCT_DESCRIPTOR_DIM_0:%.*]]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @stride, align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
|
// CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 4, [[TMP1]]
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr @data, ptr [[TMP3]], align 8
|
|
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr @data, ptr [[TMP4]], align 8
|
|
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP5]], align 8
|
|
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR_DIM_0]]], ptr [[DIMS]], i64 0, i64 0
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM_0]], ptr [[TMP6]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP7]], align 8
|
|
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM_0]], ptr [[TMP6]], i32 0, i32 1
|
|
// CHECK-NEXT: store i64 5, ptr [[TMP8]], align 8
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM_0]], ptr [[TMP6]], i32 0, i32 2
|
|
// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP9]], align 8
|
|
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR_DIM_0]]], ptr [[DIMS]], i64 0, i64 1
|
|
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM_0]], ptr [[TMP10]], i32 0, i32 0
|
|
// CHECK-NEXT: store i64 0, ptr [[TMP11]], align 8
|
|
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM_0]], ptr [[TMP10]], i32 0, i32 1
|
|
// CHECK-NEXT: store i64 4, ptr [[TMP12]], align 8
|
|
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM_0]], ptr [[TMP10]], i32 0, i32 2
|
|
// CHECK-NEXT: store i64 1, ptr [[TMP13]], align 8
|
|
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr [[DIMS]], ptr [[TMP14]], align 8
|
|
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: call void @__tgt_target_data_update_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP15]], ptr [[TMP16]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null)
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
//
|
|
// CHECK-LABEL: define dso_local void @test_constant_stride_one(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr @data, ptr [[TMP0]], align 8
|
|
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: store ptr @data, ptr [[TMP1]], align 8
|
|
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
|
// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
|
// CHECK-NEXT: call void @__tgt_target_data_update_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP3]], ptr [[TMP4]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null)
|
|
// CHECK-NEXT: ret void
|
|
//
|