Andy Kaylor 1e45ea12db
[CIR] Add support for function linkage and visibility (#145600)
This change adds support for function linkage and visibility and related
attributes. Most of the test changes are generalizations to allow
'dso_local' to be accepted where we aren't specifically testing for it.
Some tests based on CIR inputs have been updated to add 'private' to
function declarations where required by newly supported interfaces.

The dso-local.c test has been updated to add specific tests for
dso_local being set correctly, and a new test, func-linkage.cpp tests
other linkage settings.

This change sets `comdat` correctly in CIR, but it is not yet applied to
functions when lowering to LLVM IR. That will be handled in a later
change.
2025-06-25 10:59:30 -07:00

477 lines
22 KiB
C++

// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
extern "C" void acc_loop(int *A, int *B, int *C, int N) {
// CHECK: cir.func{{.*}} @acc_loop(%[[ARG_A:.*]]: !cir.ptr<!s32i> loc{{.*}}, %[[ARG_B:.*]]: !cir.ptr<!s32i> loc{{.*}}, %[[ARG_C:.*]]: !cir.ptr<!s32i> loc{{.*}}, %[[ARG_N:.*]]: !s32i loc{{.*}}) {
// CHECK-NEXT: %[[ALLOCA_A:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["A", init]
// CHECK-NEXT: %[[ALLOCA_B:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["B", init]
// CHECK-NEXT: %[[ALLOCA_C:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["C", init]
// CHECK-NEXT: %[[ALLOCA_N:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["N", init]
// CHECK-NEXT: cir.store %[[ARG_A]], %[[ALLOCA_A]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
// CHECK-NEXT: cir.store %[[ARG_B]], %[[ALLOCA_B]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
// CHECK-NEXT: cir.store %[[ARG_C]], %[[ALLOCA_C]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
// CHECK-NEXT: cir.store %[[ARG_N]], %[[ALLOCA_N]] : !s32i, !cir.ptr<!s32i>
#pragma acc loop
for (unsigned I = 0u; I < N; ++I) {
A[I] = B[I] + C[I];
}
// CHECK-NEXT: acc.loop {
// CHECK-NEXT: cir.scope {
// CHECK: cir.for : cond {
// CHECK: cir.condition
// CHECK-NEXT: } body {
// CHECK-NEXT: cir.scope {
// CHECK: }
// CHECK-NEXT: cir.yield
// CHECK-NEXT: } step {
// CHECK: cir.yield
// CHECK-NEXT: } loc
// CHECK-NEXT: } loc
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop seq
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
#pragma acc loop device_type(nvidia, radeon) seq
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>], seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
#pragma acc loop device_type(radeon) seq
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>], seq = [#acc.device_type<radeon>]} loc
#pragma acc loop seq device_type(nvidia, radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
#pragma acc loop seq device_type(radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
#pragma acc loop independent
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
#pragma acc loop device_type(nvidia, radeon) independent
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>, #acc.device_type<none>]} loc
#pragma acc loop device_type(radeon) independent
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<radeon>, #acc.device_type<none>]} loc
#pragma acc loop independent device_type(nvidia, radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
#pragma acc loop independent device_type(radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
#pragma acc loop auto
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
#pragma acc loop device_type(nvidia, radeon) auto
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>], independent = [#acc.device_type<none>]} loc
#pragma acc loop device_type(radeon) auto
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<radeon>], independent = [#acc.device_type<none>]} loc
#pragma acc loop auto device_type(nvidia, radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
#pragma acc loop auto device_type(radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
#pragma acc loop collapse(1) device_type(radeon)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>]}
#pragma acc loop collapse(1) device_type(radeon) collapse (2)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>], independent = [#acc.device_type<none>]}
#pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>], independent = [#acc.device_type<none>]}
#pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>], independent = [#acc.device_type<none>]}
#pragma acc loop tile(1, 2, 3)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
// CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
// CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop tile(2) device_type(radeon)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
// CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop tile(2) device_type(radeon) tile (1, *)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
// CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
// CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
// CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
// CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
// CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
// CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc kernels
{
#pragma acc loop worker
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop worker {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop worker(N)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop worker device_type(nvidia, radeon) worker
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop worker(N) device_type(nvidia, radeon) worker
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.loop worker([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop worker device_type(nvidia, radeon) worker(N)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.loop worker([#acc.device_type<none>], %[[N_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_CONV]] : si32 [#acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop worker(N) device_type(nvidia, radeon) worker(N + 1)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
// CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop device_type(nvidia, radeon) worker(num:N + 1)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
// CHECK-NEXT: acc.loop worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
#pragma acc loop vector
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop vector {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop vector(N)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop vector device_type(nvidia, radeon) vector
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop vector([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop vector(N) device_type(nvidia, radeon) vector
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.loop vector([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop vector(N) device_type(nvidia, radeon) vector(N + 1)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
// CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop device_type(nvidia, radeon) vector(length:N + 1)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
// CHECK-NEXT: acc.loop vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop worker vector device_type(nvidia) worker vector
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>]) vector([#acc.device_type<none>, #acc.device_type<nvidia>])
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
// CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32
// CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32
// CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type<nvidia>]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type<nvidia>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
}
#pragma acc parallel
// CHECK: acc.parallel {
{
#pragma acc loop gang
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop gang {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop gang device_type(nvidia) gang
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop gang([#acc.device_type<none>, #acc.device_type<nvidia>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop gang(dim:1) device_type(nvidia) gang(dim:2)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
// CHECK-NEXT: acc.loop gang({dim=%[[ONE_CONST]] : i64}, {dim=%[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop gang(static:N, dim: 1) device_type(nvidia, radeon) gang(static:*, dim : 2)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
// CHECK-NEXT: acc.loop gang({static=%[[N_CONV]] : si32, dim=%[[ONE_CONST]] : i64}, {static=%[[STAR_CONST]] : i64, dim=%[[TWO_CONST]] : i64} [#acc.device_type<nvidia>], {static=%[[STAR_CONST]] : i64, dim=%[[TWO_CONST]] : i64} [#acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
}
#pragma acc kernels
// CHECK: acc.kernels {
{
#pragma acc loop gang(num:N) device_type(nvidia, radeon) gang(num:N)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
// CHECK-NEXT: acc.loop gang({num=%[[N_CONV]] : si32}, {num=%[[N_CONV2]] : si32} [#acc.device_type<nvidia>], {num=%[[N_CONV2]] : si32} [#acc.device_type<radeon>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop gang(static:N) device_type(nvidia) gang(static:*)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
// CHECK-NEXT: acc.loop gang({static=%[[N_CONV]] : si32}, {static=%[[STAR_CONST]] : i64} [#acc.device_type<nvidia>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop gang(static:N, num: N + 1) device_type(nvidia) gang(static:*, num : N + 2)
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
// CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CIR_ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[CIR_ONE_CONST]]) nsw : !s32i
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
// CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CIR_TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i
// CHECK-NEXT: %[[N_PLUS_TWO:.*]] = cir.binop(add, %[[N_LOAD3]], %[[CIR_TWO_CONST]]) nsw : !s32i
// CHECK-NEXT: %[[N_PLUS_TWO_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_TWO]] : !s32i to si32
// CHECK-NEXT: acc.loop gang({static=%[[N_CONV]] : si32, num=%[[N_PLUS_ONE_CONV]] : si32}, {static=%[[STAR_CONST]] : i64, num=%[[N_PLUS_TWO_CONV]] : si32} [#acc.device_type<nvidia>]) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
}
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
// Checking the automatic-addition of parallelism clauses.
#pragma acc loop
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
#pragma acc parallel
{
// CHECK-NEXT: acc.parallel {
#pragma acc loop
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
}
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc kernels
{
// CHECK-NEXT: acc.kernels {
#pragma acc loop
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
}
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
#pragma acc serial
{
// CHECK-NEXT: acc.serial {
#pragma acc loop
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
}
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial
{
// CHECK-NEXT: acc.serial {
#pragma acc loop worker
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop worker {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
}
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial
{
// CHECK-NEXT: acc.serial {
#pragma acc loop vector
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop vector {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
}
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial
{
// CHECK-NEXT: acc.serial {
#pragma acc loop gang
for(unsigned I = 0; I < N; ++I);
// CHECK-NEXT: acc.loop gang {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
}
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
}