llvm-project/clang/test/OpenMP/target_update_count_expression_codegen.c
Ivan R. Ivanov 19420c0e77
[OpenMP] Fix non-contiguous array omp target update (#156889)
The existing implementation has three issues which this patch addresses.

1. The last dimension which represents the bytes in the type, has the
wrong stride and count. For example, for a 4 byte int, count=1 and
stride=4. The correct representation here is count=4 and stride=1
because there are 4 bytes (count=4) that we need to copy and we do not
skip any bytes (stride=1).

2. The size of the data copy was computed using the last dimension.
However, this is incorrect in cases where some of the final dimensions
get merged into one. In this case we need to take the combined size of
the merged dimensions, which is (Count * Stride) of the first merged
dimension.

3. The Offset into a dimension was computed as a multiple of its Stride.
However, this Stride which is in bytes, already includes the stride
multiplier given by the user. This means that when the user specified
1:3:2, i.e. elements 1, 3, 5, the runtime incorrectly copied elements 2,
4, 6. Fix this by precomputing at compile time the Offset to be in bytes
by correctly multiplying the offset by the stride of the dimension
without the user-specified multiplier.
2026-03-26 15:55:31 +01:00

90 lines
5.7 KiB
C

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --global-value-regex "\.offload_.*" --global-hex-value-regex ".offload_maptypes.*" --version 3
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - \
// RUN: | FileCheck %s
// Check same results after serialization round-trip
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -emit-llvm %s -o - \
// RUN: | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// Test that count expressions in target update directives correctly set the
// OMP_MAP_NON_CONTIG flag. For NON_CONTIG entries, offload_sizes contains
// dimension count (2 for 1D array section). For non-NON_CONTIG entries,
// offload_sizes contains byte size (5 elements * 4 bytes = 20).
int data[10];
const int len = 10;
void test_non_contig_dimension_count() {
#pragma omp target update to(data[0:len/2:2])
}
void test_contig_byte_size() {
#pragma omp target update to(data[0:5:1])
}
#endif // HEADER
//.
// CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 2]
// CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 [[#0x100000000001]]]
// CHECK: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 20]
// CHECK: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x1]]]
//.
// CHECK-LABEL: define dso_local void @test_non_contig_dimension_count(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DIMS:%.*]] = alloca [2 x [[STRUCT_DESCRIPTOR_DIM:%.*]]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr @data, ptr [[TMP0]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr @data, ptr [[TMP1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR_DIM]]], ptr [[DIMS]], i64 0, i64 0
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP3]], i32 0, i32 0
// CHECK-NEXT: store i64 0, ptr [[TMP4]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP3]], i32 0, i32 1
// CHECK-NEXT: store i64 5, ptr [[TMP5]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP3]], i32 0, i32 2
// CHECK-NEXT: store i64 8, ptr [[TMP6]], align 8
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x [[STRUCT_DESCRIPTOR_DIM]]], ptr [[DIMS]], i64 0, i64 1
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP7]], i32 0, i32 0
// CHECK-NEXT: store i64 0, ptr [[TMP8]], align 8
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP7]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR_DIM]], ptr [[TMP7]], i32 0, i32 2
// CHECK-NEXT: store i64 1, ptr [[TMP10]], align 8
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr [[DIMS]], ptr [[TMP11]], align 8
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: call void @__tgt_target_data_update_mapper(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, ptr [[TMP12]], ptr [[TMP13]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define dso_local void @test_contig_byte_size(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr @data, ptr [[TMP0]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr @data, ptr [[TMP1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: call void @__tgt_target_data_update_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP3]], ptr [[TMP4]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null)
// CHECK-NEXT: ret void
//