The existing implementation has three issues which this patch addresses. 1. The last dimension which represents the bytes in the type, has the wrong stride and count. For example, for a 4 byte int, count=1 and stride=4. The correct representation here is count=4 and stride=1 because there are 4 bytes (count=4) that we need to copy and we do not skip any bytes (stride=1). 2. The size of the data copy was computed using the last dimension. However, this is incorrect in cases where some of the final dimensions get merged into one. In this case we need to take the combined size of the merged dimensions, which is (Count * Stride) of the first merged dimension. 3. The Offset into a dimension was computed as a multiple of its Stride. However, this Stride which is in bytes, already includes the stride multiplier given by the user. This means that when the user specified 1:3:2, i.e. elements 1, 3, 5, the runtime incorrectly copied elements 2, 4, 6. Fix this by precomputing at compile time the Offset to be in bytes by correctly multiplying the offset by the stride of the dimension without the user-specified multiplier.
90 lines
1.8 KiB
C
90 lines
1.8 KiB
C
// RUN: %libomptarget-compile-run-and-check-generic
|
|
// XFAIL: intelgpu
|
|
// Miscellaneous variable stride tests: stride=1, stride=array_size, stride from
|
|
// array subscript.
|
|
|
|
#include <omp.h>
|
|
#include <stdio.h>
|
|
|
|
void test_1_variable_stride_one() {
|
|
int stride_one = 1;
|
|
double data1[10];
|
|
|
|
// Initialize data on host
|
|
for (int i = 0; i < 10; i++) {
|
|
data1[i] = i + 1;
|
|
}
|
|
|
|
#pragma omp target data map(to : stride_one, data1[0 : 10])
|
|
{
|
|
#pragma omp target
|
|
{
|
|
for (int i = 0; i < 10; i++) {
|
|
data1[i] += i + 1;
|
|
}
|
|
}
|
|
|
|
#pragma omp target update from(data1[0 : 10 : stride_one])
|
|
}
|
|
|
|
printf("Test 1: Variable stride = 1\n");
|
|
for (int i = 0; i < 10; i++)
|
|
printf("%f\n", data1[i]);
|
|
}
|
|
|
|
void test_2_variable_stride_large() {
|
|
int stride_large = 5;
|
|
double data2[10];
|
|
|
|
// Initialize data on host
|
|
for (int i = 0; i < 10; i++) {
|
|
data2[i] = i + 1;
|
|
}
|
|
|
|
#pragma omp target data map(to : stride_large, data2[0 : 10])
|
|
{
|
|
#pragma omp target
|
|
{
|
|
for (int i = 0; i < 10; i++) {
|
|
data2[i] += i + 1;
|
|
}
|
|
}
|
|
|
|
#pragma omp target update from(data2[0 : 2 : stride_large])
|
|
}
|
|
|
|
printf("\nTest 2: Variable stride = 5\n");
|
|
for (int i = 0; i < 10; i++)
|
|
printf("%f\n", data2[i]);
|
|
}
|
|
|
|
int main() {
|
|
test_1_variable_stride_one();
|
|
test_2_variable_stride_large();
|
|
return 0;
|
|
}
|
|
|
|
// CHECK: Test 1: Variable stride = 1
|
|
// CHECK-NEXT: 2.000000
|
|
// CHECK-NEXT: 4.000000
|
|
// CHECK-NEXT: 6.000000
|
|
// CHECK-NEXT: 8.000000
|
|
// CHECK-NEXT: 10.000000
|
|
// CHECK-NEXT: 12.000000
|
|
// CHECK-NEXT: 14.000000
|
|
// CHECK-NEXT: 16.000000
|
|
// CHECK-NEXT: 18.000000
|
|
// CHECK-NEXT: 20.000000
|
|
|
|
// CHECK: Test 2: Variable stride = 5
|
|
// CHECK-NEXT: 2.000000
|
|
// CHECK-NEXT: 2.000000
|
|
// CHECK-NEXT: 3.000000
|
|
// CHECK-NEXT: 4.000000
|
|
// CHECK-NEXT: 5.000000
|
|
// CHECK-NEXT: 12.000000
|
|
// CHECK-NEXT: 7.000000
|
|
// CHECK-NEXT: 8.000000
|
|
// CHECK-NEXT: 9.000000
|
|
// CHECK-NEXT: 10.000000
|