Sergio Afonso 18dd299fb1
[Flang][MLIR][OpenMP] Host-evaluation of omp.loop bounds (#133908)
This patch updates Flang lowering and kernel flags identification in
MLIR so that loop bounds on `target teams loop` constructs are evaluated
on the host, making the trip count available to the corresponding
`__tgt_target_kernel` call emitted for the target region.

This is necessary in order to properly execute these constructs as
`target teams distribute parallel do`.

Co-authored-by: Kareem Ergawy <kareem.ergawy@amd.com>
2025-04-03 15:06:19 +01:00

286 lines
6.7 KiB
Fortran

! The "thread_limit" clause was added to the "target" construct in OpenMP 5.1.
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s --check-prefixes=BOTH,HOST
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE
! BOTH-LABEL: func.func @_QPteams
subroutine teams()
! BOTH: omp.target
! HOST-SAME: host_eval(%{{.*}} -> %[[NUM_TEAMS:.*]], %{{.*}} -> %[[THREAD_LIMIT:.*]] : i32, i32)
! DEVICE-NOT: host_eval({{.*}})
! DEVICE-SAME: {
!$omp target
! BOTH: omp.teams
! HOST-SAME: num_teams( to %[[NUM_TEAMS]] : i32) thread_limit(%[[THREAD_LIMIT]] : i32)
! DEVICE-SAME: num_teams({{.*}}) thread_limit({{.*}})
!$omp teams num_teams(1) thread_limit(2)
call foo()
!$omp end teams
!$omp end target
! BOTH: omp.teams
! BOTH-SAME: num_teams({{.*}}) thread_limit({{.*}}) {
!$omp teams num_teams(1) thread_limit(2)
call foo()
!$omp end teams
end subroutine teams
! BOTH-LABEL: func.func @_QPdistribute_parallel_do
subroutine distribute_parallel_do()
! BOTH: omp.target
! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]], %{{.*}} -> %[[NUM_THREADS:.*]] : i32, i32, i32, i32)
! DEVICE-NOT: host_eval({{.*}})
! DEVICE-SAME: {
! BOTH: omp.teams
!$omp target teams
! BOTH: omp.parallel
! HOST-SAME: num_threads(%[[NUM_THREADS]] : i32)
! DEVICE-SAME: num_threads({{.*}})
! BOTH: omp.distribute
! BOTH-NEXT: omp.wsloop
! BOTH-NEXT: omp.loop_nest
! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
!$omp distribute parallel do num_threads(1)
do i=1,10
call foo()
end do
!$omp end distribute parallel do
!$omp end target teams
! BOTH: omp.target
! BOTH-NOT: host_eval({{.*}})
! BOTH-SAME: {
! BOTH: omp.teams
!$omp target teams
call foo() !< Prevents this from being SPMD.
! BOTH: omp.parallel
! BOTH-SAME: num_threads({{.*}})
! BOTH: omp.distribute
! BOTH-NEXT: omp.wsloop
!$omp distribute parallel do num_threads(1)
do i=1,10
call foo()
end do
!$omp end distribute parallel do
!$omp end target teams
! BOTH: omp.teams
!$omp teams
! BOTH: omp.parallel
! BOTH-SAME: num_threads({{.*}})
! BOTH: omp.distribute
! BOTH-NEXT: omp.wsloop
!$omp distribute parallel do num_threads(1)
do i=1,10
call foo()
end do
!$omp end distribute parallel do
!$omp end teams
end subroutine distribute_parallel_do
! BOTH-LABEL: func.func @_QPdistribute_parallel_do_simd
subroutine distribute_parallel_do_simd()
! BOTH: omp.target
! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]], %{{.*}} -> %[[NUM_THREADS:.*]] : i32, i32, i32, i32)
! DEVICE-NOT: host_eval({{.*}})
! DEVICE-SAME: {
! BOTH: omp.teams
!$omp target teams
! BOTH: omp.parallel
! HOST-SAME: num_threads(%[[NUM_THREADS]] : i32)
! DEVICE-SAME: num_threads({{.*}})
! BOTH: omp.distribute
! BOTH-NEXT: omp.wsloop
! BOTH-NEXT: omp.simd
! BOTH-NEXT: omp.loop_nest
! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
!$omp distribute parallel do simd num_threads(1)
do i=1,10
call foo()
end do
!$omp end distribute parallel do simd
!$omp end target teams
! BOTH: omp.target
! BOTH-NOT: host_eval({{.*}})
! BOTH-SAME: {
! BOTH: omp.teams
!$omp target teams
call foo() !< Prevents this from being SPMD.
! BOTH: omp.parallel
! BOTH-SAME: num_threads({{.*}})
! BOTH: omp.distribute
! BOTH-NEXT: omp.wsloop
! BOTH-NEXT: omp.simd
!$omp distribute parallel do simd num_threads(1)
do i=1,10
call foo()
end do
!$omp end distribute parallel do simd
!$omp end target teams
! BOTH: omp.teams
!$omp teams
! BOTH: omp.parallel
! BOTH-SAME: num_threads({{.*}})
! BOTH: omp.distribute
! BOTH-NEXT: omp.wsloop
! BOTH-NEXT: omp.simd
!$omp distribute parallel do simd num_threads(1)
do i=1,10
call foo()
end do
!$omp end distribute parallel do simd
!$omp end teams
end subroutine distribute_parallel_do_simd
! BOTH-LABEL: func.func @_QPdistribute
subroutine distribute()
! BOTH: omp.target
! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32)
! DEVICE-NOT: host_eval({{.*}})
! DEVICE-SAME: {
! BOTH: omp.teams
!$omp target teams
! BOTH: omp.distribute
! BOTH-NEXT: omp.loop_nest
! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
!$omp distribute
do i=1,10
call foo()
end do
!$omp end distribute
!$omp end target teams
! BOTH: omp.target
! BOTH-NOT: host_eval({{.*}})
! BOTH-SAME: {
! BOTH: omp.teams
!$omp target teams
call foo() !< Prevents this from being Generic-SPMD.
! BOTH: omp.distribute
!$omp distribute
do i=1,10
call foo()
end do
!$omp end distribute
!$omp end target teams
! BOTH: omp.teams
!$omp teams
! BOTH: omp.distribute
!$omp distribute
do i=1,10
call foo()
end do
!$omp end distribute
!$omp end teams
end subroutine distribute
! BOTH-LABEL: func.func @_QPdistribute_simd
subroutine distribute_simd()
! BOTH: omp.target
! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32)
! DEVICE-NOT: host_eval({{.*}})
! DEVICE-SAME: {
! BOTH: omp.teams
!$omp target teams
! BOTH: omp.distribute
! BOTH-NEXT: omp.simd
! BOTH-NEXT: omp.loop_nest
! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
!$omp distribute simd
do i=1,10
call foo()
end do
!$omp end distribute simd
!$omp end target teams
! BOTH: omp.target
! BOTH-NOT: host_eval({{.*}})
! BOTH-SAME: {
! BOTH: omp.teams
!$omp target teams
call foo() !< Prevents this from being Generic-SPMD.
! BOTH: omp.distribute
! BOTH-NEXT: omp.simd
!$omp distribute simd
do i=1,10
call foo()
end do
!$omp end distribute simd
!$omp end target teams
! BOTH: omp.teams
!$omp teams
! BOTH: omp.distribute
! BOTH-NEXT: omp.simd
!$omp distribute simd
do i=1,10
call foo()
end do
!$omp end distribute simd
!$omp end teams
end subroutine distribute_simd
! BOTH-LABEL: func.func @_QPloop
subroutine loop()
! BOTH: omp.target
! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32)
! DEVICE-NOT: host_eval({{.*}})
! DEVICE-SAME: {
! BOTH: omp.teams
!$omp target teams
! BOTH: omp.parallel
! BOTH: omp.distribute
! BOTH-NEXT: omp.wsloop
! BOTH-NEXT: omp.loop_nest
! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
!$omp loop
do i=1,10
end do
!$omp end target teams
end subroutine loop