From f1ea831dbc40052ff0cdfcb10f89a6c2fae81ec8 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Mon, 25 Sep 2023 11:27:28 -0700 Subject: [PATCH] [flang][hlfir] Make alias analysis trace through box designators. (#67353) The changes are needed to get leslie3d same performance with HLFIR as with FIR lowering. The two module allocatable variables cannot alias, so the optimized bufferization should be able to elide the temporary and inline the assignment loop. --- .../Dialect/FortranVariableInterface.td | 14 +++ .../lib/Optimizer/Analysis/AliasAnalysis.cpp | 26 ++++- .../AliasAnalysis/alias-analysis-6.fir | 2 +- .../AliasAnalysis/alias-analysis-7.fir | 99 +++++++++++++++++ .../alias-analysis-host-assoc.fir | 3 +- .../test/HLFIR/opt-bufferization-leslie3d.fir | 102 ++++++++++++++++++ flang/test/HLFIR/opt-variable-assign.fir | 24 ++--- 7 files changed, 252 insertions(+), 18 deletions(-) create mode 100644 flang/test/Analysis/AliasAnalysis/alias-analysis-7.fir create mode 100644 flang/test/HLFIR/opt-bufferization-leslie3d.fir diff --git a/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.td b/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.td index cf31fbcf37cf..6405afbf1bfb 100644 --- a/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.td +++ b/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.td @@ -177,6 +177,20 @@ def fir_FortranVariableOpInterface : OpInterface<"FortranVariableOpInterface"> { fir::FortranVariableFlagsEnum::host_assoc); } + /// Is this variable a Fortran target? + bool isTarget() { + auto attrs = getFortranAttrs(); + return attrs && bitEnumContainsAny(*attrs, + fir::FortranVariableFlagsEnum::target); + } + + /// Is this variable a Fortran intent(in)? + bool isIntentIn() { + auto attrs = getFortranAttrs(); + return attrs && bitEnumContainsAny(*attrs, + fir::FortranVariableFlagsEnum::intent_in); + } + /// Interface verifier imlementation for declare operations. mlir::LogicalResult verifyDeclareLikeOpImpl(mlir::Value memRef); diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index aeb6e692784f..850026ebf33b 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -239,13 +239,26 @@ ModRefResult AliasAnalysis::getModRef(Operation *op, Value location) { return result; } +AliasAnalysis::Source::Attributes +getAttrsFromVariable(fir::FortranVariableOpInterface var) { + AliasAnalysis::Source::Attributes attrs; + if (var.isTarget()) + attrs.set(AliasAnalysis::Attribute::Target); + if (var.isPointer()) + attrs.set(AliasAnalysis::Attribute::Pointer); + if (var.isIntentIn()) + attrs.set(AliasAnalysis::Attribute::IntentIn); + + return attrs; +} + AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v) { auto *defOp = v.getDefiningOp(); SourceKind type{SourceKind::Unknown}; mlir::Type ty; bool breakFromLoop{false}; bool approximateSource{false}; - bool followBoxAddr{false}; + bool followBoxAddr{mlir::isa(v.getType())}; mlir::SymbolRefAttr global; Source::Attributes attributes; while (defOp && !breakFromLoop) { @@ -334,6 +347,15 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v) { }) .Case([&](auto op) { auto varIf = llvm::cast(defOp); + // While going through a declare operation collect + // the variable attributes from it. Right now, some + // of the attributes are duplicated, e.g. a TARGET dummy + // argument has the target attribute both on its declare + // operation and on the entry block argument. + // In case of host associated use, the declare operation + // is the only carrier of the variable attributes, + // so we have to collect them here. + attributes |= getAttrsFromVariable(varIf); if (varIf.isHostAssoc()) { // Do not track past such DeclareOp, because it does not // currently provide any useful information. The host associated @@ -364,6 +386,8 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v) { // because of this limitation, we need to make sure we never return // MustAlias after going through a designate operation approximateSource = true; + if (mlir::isa(v.getType())) + followBoxAddr = true; }) .Default([&](auto op) { defOp = nullptr; diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-6.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-6.fir index 82d89989c9cf..85c780312bd4 100644 --- a/flang/test/Analysis/AliasAnalysis/alias-analysis-6.fir +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-6.fir @@ -1,4 +1,4 @@ -// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' +// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' 2>&1 | FileCheck %s // CHECK: test_y(1)#0 <-> test_x(1)#0: MayAlias func.func @_QPtest(%arg0: !fir.ref>>> {fir.bindc_name = "y"}) { diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-7.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-7.fir new file mode 100644 index 000000000000..fdf2fc4353bb --- /dev/null +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-7.fir @@ -0,0 +1,99 @@ +// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' 2>&1 | FileCheck %s + +// leslie3d case with two allocatable module variables +// that cannot alias: +// module les3d_data +// implicit real*8 (a-h,o-z) +// integer imax, jmax, kmax +// double precision,allocatable,dimension(:,:,:,:,:) :: q +// double precision,allocatable,dimension(:,:,:,:) :: du +// end module les3d_data +// subroutine update() +// use les3d_data +// implicit real*8(a-h,o-z) +// i2 = imax - 1 +// do k = 1, kmax - 1 +// do j = 1, jmax - 1 +// q(1:i2,j,k,1,m) = (q(1:i2,j,k,1,m) + du(1:i2,j,k,1)) +// end do +// end do +// end subroutine update + +// CHECK: allocatable_mod1#0 <-> allocatable_mod2#0: NoAlias +func.func @_QPupdate() { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c1_i32 = arith.constant 1 : i32 + %0 = fir.address_of(@_QMles3d_dataEdu) : !fir.ref>>> + %1:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMles3d_dataEdu"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %2 = fir.alloca i32 {bindc_name = "i2", uniq_name = "_QFupdateEi2"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFupdateEi2"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %4 = fir.address_of(@_QMles3d_dataEimax) : !fir.ref + %5:2 = hlfir.declare %4 {uniq_name = "_QMles3d_dataEimax"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %6 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFupdateEj"} + %7:2 = hlfir.declare %6 {uniq_name = "_QFupdateEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %8 = fir.address_of(@_QMles3d_dataEjmax) : !fir.ref + %9:2 = hlfir.declare %8 {uniq_name = "_QMles3d_dataEjmax"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %10 = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFupdateEk"} + %11:2 = hlfir.declare %10 {uniq_name = "_QFupdateEk"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %12 = fir.address_of(@_QMles3d_dataEkmax) : !fir.ref + %13:2 = hlfir.declare %12 {uniq_name = "_QMles3d_dataEkmax"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %14 = fir.alloca i32 {bindc_name = "m", uniq_name = "_QFupdateEm"} + %15:2 = hlfir.declare %14 {uniq_name = "_QFupdateEm"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %16 = fir.address_of(@_QMles3d_dataEq) : !fir.ref>>> + %17:2 = hlfir.declare %16 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMles3d_dataEq"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %18 = fir.load %5#0 : !fir.ref + %19 = arith.subi %18, %c1_i32 : i32 + hlfir.assign %19 to %3#0 : i32, !fir.ref + %20 = fir.load %13#0 : !fir.ref + %21 = arith.subi %20, %c1_i32 : i32 + %22 = fir.convert %21 : (i32) -> index + %23 = fir.convert %c1 : (index) -> i32 + %24:2 = fir.do_loop %arg0 = %c1 to %22 step %c1 iter_args(%arg1 = %23) -> (index, i32) { + fir.store %arg1 to %11#1 : !fir.ref + %25 = fir.load %9#0 : !fir.ref + %26 = arith.subi %25, %c1_i32 : i32 + %27 = fir.convert %26 : (i32) -> index + %28:2 = fir.do_loop %arg2 = %c1 to %27 step %c1 iter_args(%arg3 = %23) -> (index, i32) { + fir.store %arg3 to %7#1 : !fir.ref + %32 = fir.load %17#0 : !fir.ref>>> + %33 = fir.load %3#0 : !fir.ref + %34 = fir.convert %33 : (i32) -> index + %35 = arith.cmpi sgt, %34, %c0 : index + %36 = arith.select %35, %34, %c0 : index + %37 = fir.load %7#0 : !fir.ref + %38 = fir.convert %37 : (i32) -> i64 + %39 = fir.load %11#0 : !fir.ref + %40 = fir.convert %39 : (i32) -> i64 + %41 = fir.load %15#0 : !fir.ref + %42 = fir.convert %41 : (i32) -> i64 + %43 = fir.shape %36 : (index) -> !fir.shape<1> + %44 = hlfir.designate %32 (%c1:%34:%c1, %38, %40, %c1, %42) shape %43 {test.ptr = "allocatable_mod1"} : (!fir.box>>, index, index, index, i64, i64, index, i64, !fir.shape<1>) -> !fir.box> + %45 = fir.load %1#0 : !fir.ref>>> + %46 = hlfir.designate %45 (%c1:%34:%c1, %38, %40, %c1) shape %43 {test.ptr = "allocatable_mod2"} : (!fir.box>>, index, index, index, i64, i64, index, !fir.shape<1>) -> !fir.box> + %47 = hlfir.elemental %43 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %51 = hlfir.designate %44 (%arg4) : (!fir.box>, index) -> !fir.ref + %52 = hlfir.designate %46 (%arg4) : (!fir.box>, index) -> !fir.ref + %53 = fir.load %51 : !fir.ref + %54 = fir.load %52 : !fir.ref + %55 = arith.addf %53, %54 fastmath : f64 + %56 = hlfir.no_reassoc %55 : f64 + hlfir.yield_element %56 : f64 + } + hlfir.assign %47 to %44 : !hlfir.expr, !fir.box> + hlfir.destroy %47 : !hlfir.expr + %48 = arith.addi %arg2, %c1 : index + %49 = fir.load %7#1 : !fir.ref + %50 = arith.addi %49, %23 : i32 + fir.result %48, %50 : index, i32 + } + fir.store %28#1 to %7#1 : !fir.ref + %29 = arith.addi %arg0, %c1 : index + %30 = fir.load %11#1 : !fir.ref + %31 = arith.addi %30, %23 : i32 + fir.result %29, %31 : index, i32 + } + fir.store %24#1 to %11#1 : !fir.ref + return +} diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir index 785f1b91a7d9..7f90384ac99c 100644 --- a/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir @@ -297,8 +297,7 @@ func.func @_QFtest8Pinner(%arg0: !fir.ref>>> { // end subroutine inner // end subroutine test9 -// FIXME: 'g' is classified as Indirect access leading to a conservative reply: -// CHECK: test9_g(1)#0 <-> test9_x(1)#0: MayAlias +// CHECK: test9_g(1)#0 <-> test9_x(1)#0: NoAlias func.func @_QFtest9Pinner(%arg0: !fir.ref>>> {fir.host_assoc}) attributes {fir.internal_proc} { %0 = fir.address_of(@_QMglobalsEg) : !fir.ref>>> %1:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMglobalsEg"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) diff --git a/flang/test/HLFIR/opt-bufferization-leslie3d.fir b/flang/test/HLFIR/opt-bufferization-leslie3d.fir new file mode 100644 index 000000000000..351b3754cf4d --- /dev/null +++ b/flang/test/HLFIR/opt-bufferization-leslie3d.fir @@ -0,0 +1,102 @@ +// RUN: fir-opt --opt-bufferization %s | FileCheck %s + +// leslie3d case with two allocatable module variables +// that cannot alias: +// module les3d_data +// implicit real*8 (a-h,o-z) +// integer imax, jmax, kmax +// double precision,allocatable,dimension(:,:,:,:,:) :: q +// double precision,allocatable,dimension(:,:,:,:) :: du +// end module les3d_data +// subroutine update() +// use les3d_data +// implicit real*8(a-h,o-z) +// i2 = imax - 1 +// do k = 1, kmax - 1 +// do j = 1, jmax - 1 +// q(1:i2,j,k,1,m) = (q(1:i2,j,k,1,m) + du(1:i2,j,k,1)) +// end do +// end do +// end subroutine update + +func.func @_QPupdate() { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c1_i32 = arith.constant 1 : i32 + %0 = fir.address_of(@_QMles3d_dataEdu) : !fir.ref>>> + %1:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMles3d_dataEdu"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %2 = fir.alloca i32 {bindc_name = "i2", uniq_name = "_QFupdateEi2"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFupdateEi2"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %4 = fir.address_of(@_QMles3d_dataEimax) : !fir.ref + %5:2 = hlfir.declare %4 {uniq_name = "_QMles3d_dataEimax"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %6 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFupdateEj"} + %7:2 = hlfir.declare %6 {uniq_name = "_QFupdateEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %8 = fir.address_of(@_QMles3d_dataEjmax) : !fir.ref + %9:2 = hlfir.declare %8 {uniq_name = "_QMles3d_dataEjmax"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %10 = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFupdateEk"} + %11:2 = hlfir.declare %10 {uniq_name = "_QFupdateEk"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %12 = fir.address_of(@_QMles3d_dataEkmax) : !fir.ref + %13:2 = hlfir.declare %12 {uniq_name = "_QMles3d_dataEkmax"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %14 = fir.alloca i32 {bindc_name = "m", uniq_name = "_QFupdateEm"} + %15:2 = hlfir.declare %14 {uniq_name = "_QFupdateEm"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %16 = fir.address_of(@_QMles3d_dataEq) : !fir.ref>>> + %17:2 = hlfir.declare %16 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMles3d_dataEq"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %18 = fir.load %5#0 : !fir.ref + %19 = arith.subi %18, %c1_i32 : i32 + hlfir.assign %19 to %3#0 : i32, !fir.ref + %20 = fir.load %13#0 : !fir.ref + %21 = arith.subi %20, %c1_i32 : i32 + %22 = fir.convert %21 : (i32) -> index + %23 = fir.convert %c1 : (index) -> i32 + %24:2 = fir.do_loop %arg0 = %c1 to %22 step %c1 iter_args(%arg1 = %23) -> (index, i32) { + fir.store %arg1 to %11#1 : !fir.ref + %25 = fir.load %9#0 : !fir.ref + %26 = arith.subi %25, %c1_i32 : i32 + %27 = fir.convert %26 : (i32) -> index + %28:2 = fir.do_loop %arg2 = %c1 to %27 step %c1 iter_args(%arg3 = %23) -> (index, i32) { + fir.store %arg3 to %7#1 : !fir.ref + %32 = fir.load %17#0 : !fir.ref>>> + %33 = fir.load %3#0 : !fir.ref + %34 = fir.convert %33 : (i32) -> index + %35 = arith.cmpi sgt, %34, %c0 : index + %36 = arith.select %35, %34, %c0 : index + %37 = fir.load %7#0 : !fir.ref + %38 = fir.convert %37 : (i32) -> i64 + %39 = fir.load %11#0 : !fir.ref + %40 = fir.convert %39 : (i32) -> i64 + %41 = fir.load %15#0 : !fir.ref + %42 = fir.convert %41 : (i32) -> i64 + %43 = fir.shape %36 : (index) -> !fir.shape<1> + %44 = hlfir.designate %32 (%c1:%34:%c1, %38, %40, %c1, %42) shape %43 : (!fir.box>>, index, index, index, i64, i64, index, i64, !fir.shape<1>) -> !fir.box> + %45 = fir.load %1#0 : !fir.ref>>> + %46 = hlfir.designate %45 (%c1:%34:%c1, %38, %40, %c1) shape %43 : (!fir.box>>, index, index, index, i64, i64, index, !fir.shape<1>) -> !fir.box> + %47 = hlfir.elemental %43 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %51 = hlfir.designate %44 (%arg4) : (!fir.box>, index) -> !fir.ref + %52 = hlfir.designate %46 (%arg4) : (!fir.box>, index) -> !fir.ref + %53 = fir.load %51 : !fir.ref + %54 = fir.load %52 : !fir.ref + %55 = arith.addf %53, %54 fastmath : f64 + %56 = hlfir.no_reassoc %55 : f64 + hlfir.yield_element %56 : f64 + } + hlfir.assign %47 to %44 : !hlfir.expr, !fir.box> + hlfir.destroy %47 : !hlfir.expr + %48 = arith.addi %arg2, %c1 : index + %49 = fir.load %7#1 : !fir.ref + %50 = arith.addi %49, %23 : i32 + fir.result %48, %50 : index, i32 + } + fir.store %28#1 to %7#1 : !fir.ref + %29 = arith.addi %arg0, %c1 : index + %30 = fir.load %11#1 : !fir.ref + %31 = arith.addi %30, %23 : i32 + fir.result %29, %31 : index, i32 + } + fir.store %24#1 to %11#1 : !fir.ref + return +} +// CHECK-LABEL: func.func @_QPupdate() { +// CHECK-NOT: hlfir.assign {{.*}}!fir.box> +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref +// CHECK-NOT: hlfir.assign {{.*}}!fir.box> diff --git a/flang/test/HLFIR/opt-variable-assign.fir b/flang/test/HLFIR/opt-variable-assign.fir index f0a8f013f3c2..17124fa86af6 100644 --- a/flang/test/HLFIR/opt-variable-assign.fir +++ b/flang/test/HLFIR/opt-variable-assign.fir @@ -185,7 +185,7 @@ func.func @_QPtest4(%arg0: !fir.ref>>> {f // CHECK-NOT: hlfir.assign -// TODO: LHS is a pointer, but RHS is a subroutine local, +// LHS is a pointer, but RHS is a subroutine local, // so they cannot alias. func.func @_QPtest5(%arg0: !fir.ref>>> {fir.bindc_name = "x"}) { %c0 = arith.constant 0 : index @@ -220,13 +220,13 @@ func.func @_QPtest5(%arg0: !fir.ref>>> {fi } // CHECK-LABEL: func.func @_QPtest5( // CHECK-NOT: hlfir.assign -// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !fir.ref>, !fir.box> +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref // CHECK-NOT: hlfir.assign -// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !fir.ref>, !fir.box>> +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref // CHECK-NOT: hlfir.assign -// TODO: RHS is a pointer, but LHS is a subroutine local, +// RHS is a pointer, but LHS is a subroutine local, // so they cannot alias. func.func @_QPtest6(%arg0: !fir.ref>>> {fir.bindc_name = "x"}) { %c1 = arith.constant 1 : index @@ -261,20 +261,14 @@ func.func @_QPtest6(%arg0: !fir.ref>>> {fi } // CHECK-LABEL: func.func @_QPtest6( // CHECK-NOT: hlfir.assign -// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !fir.box>, !fir.ref> +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref // CHECK-NOT: hlfir.assign -// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !fir.box>>, !fir.ref> +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref // CHECK-NOT: hlfir.assign -// TODO: LHS and RHS do not alias, and the assignment cannot +// LHS and RHS do not alias, and the assignment cannot // allocate/reallocate LHS, so we should be able to optimize. -// The box load blocks alias analysis. -// subroutine test7(x) -// real, allocatable :: x(:,:) -// real :: y(3,3) -// x(:,:) = y(:,:) -// end subroutine test7 func.func @_QPtest7(%arg0: !fir.ref>>> {fir.bindc_name = "x"}) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -305,4 +299,6 @@ func.func @_QPtest7(%arg0: !fir.ref>>> {f return } // CHECK-LABEL: func.func @_QPtest7( -// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !fir.ref>, !fir.box> +// CHECK-NOT: hlfir.assign +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref +// CHECK-NOT: hlfir.assign