diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index fe7169423b6b..cc05fb71c84e 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -7651,6 +7651,17 @@ def J : JoinedOrSeparate<["-"], "J">, let Visibility = [FC1Option] in { +def ffp_maxmin_behavior_EQ + : Joined<["-"], "ffp-maxmin-behavior=">, + Flags<[HelpHidden]>, + Group, + Values<"legacy,portable,extremum,extremenum">, + HelpText<"Control max/min and [max|min][loc|val] behavior: " + "legacy (cmp+select), portable (same as legacy, " + " but may use max/minNum when -fno-signed-zeros " + "-fno-honor-nans), extremum (IEEE-754-2019 maximum/minimum), " + "extremenum (IEEE-754-2008 max/minNum)">; + def fget_definition : MultiArg<["-"], "fget-definition", 3>, HelpText<"Get the symbol definition from ">, Group; diff --git a/flang/include/flang/Evaluate/common.h b/flang/include/flang/Evaluate/common.h index 3d220afa7171..6adf395442ed 100644 --- a/flang/include/flang/Evaluate/common.h +++ b/flang/include/flang/Evaluate/common.h @@ -17,6 +17,7 @@ #include "flang/Common/target-rounding.h" #include "flang/Parser/char-block.h" #include "flang/Parser/message.h" +#include "flang/Support/FPMaxminBehavior.h" #include "flang/Support/Fortran-features.h" #include "flang/Support/Fortran.h" #include "flang/Support/default-kinds.h" @@ -218,15 +219,21 @@ public: FoldingContext(const common::IntrinsicTypeDefaultKinds &d, const IntrinsicProcTable &t, const TargetCharacteristics &c, const common::LanguageFeatureControl &lfc, - std::set &tempNames) + std::set &tempNames, + common::FPMaxminBehavior fpMaxminBehavior = + common::FPMaxminBehavior::Legacy) : defaults_{d}, intrinsics_{t}, targetCharacteristics_{c}, - languageFeatures_{lfc}, tempNames_{tempNames} {} + languageFeatures_{lfc}, tempNames_{tempNames}, + fpMaxminBehavior_{fpMaxminBehavior} {} FoldingContext(const parser::ContextualMessages &m, const common::IntrinsicTypeDefaultKinds &d, const IntrinsicProcTable &t, const TargetCharacteristics &c, const common::LanguageFeatureControl &lfc, - std::set &tempNames) + std::set &tempNames, + common::FPMaxminBehavior fpMaxminBehavior = + common::FPMaxminBehavior::Legacy) : messages_{m}, defaults_{d}, intrinsics_{t}, targetCharacteristics_{c}, - languageFeatures_{lfc}, tempNames_{tempNames} {} + languageFeatures_{lfc}, tempNames_{tempNames}, + fpMaxminBehavior_{fpMaxminBehavior} {} FoldingContext(const FoldingContext &that) : messages_{that.messages_}, defaults_{that.defaults_}, intrinsics_{that.intrinsics_}, @@ -235,8 +242,8 @@ public: analyzingPDTComponentKindSelector_{ that.analyzingPDTComponentKindSelector_}, impliedDos_{that.impliedDos_}, - languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_} { - } + languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_}, + fpMaxminBehavior_{that.fpMaxminBehavior_} {} FoldingContext( const FoldingContext &that, const parser::ContextualMessages &m) : messages_{m}, defaults_{that.defaults_}, intrinsics_{that.intrinsics_}, @@ -245,8 +252,8 @@ public: analyzingPDTComponentKindSelector_{ that.analyzingPDTComponentKindSelector_}, impliedDos_{that.impliedDos_}, - languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_} { - } + languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_}, + fpMaxminBehavior_{that.fpMaxminBehavior_} {} parser::ContextualMessages &messages() { return messages_; } const parser::ContextualMessages &messages() const { return messages_; } @@ -264,6 +271,9 @@ public: const common::LanguageFeatureControl &languageFeatures() const { return languageFeatures_; } + common::FPMaxminBehavior fpMaxminBehavior() const { + return fpMaxminBehavior_; + } template parser::Message *Warn(common::LanguageFeature feature, A &&...args) { return messages_.Warn( @@ -325,6 +335,7 @@ private: const common::LanguageFeatureControl &languageFeatures_; std::set &tempNames_; std::string realFlagWarningContext_; + common::FPMaxminBehavior fpMaxminBehavior_{common::FPMaxminBehavior::Legacy}; }; } // namespace Fortran::evaluate diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index 05ee0e28bcaa..cb5e66b1c2b7 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -52,6 +52,7 @@ CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass CODEGENOPT(DwarfVersion, 3, 0) ///< Dwarf version CODEGENOPT(Underscoring, 1, 1) +ENUM_CODEGENOPT(FPMaxminBehavior, Fortran::common::FPMaxminBehavior, 2, Fortran::common::FPMaxminBehavior::Legacy) ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use. ENUM_CODEGENOPT(DebugInfo, llvm::codegenoptions::DebugInfoKind, 4, llvm::codegenoptions::NoDebugInfo) ///< Level of debug info to generate ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 4, llvm::driver::VectorLibrary::NoLibrary) ///< Vector functions library to use diff --git a/flang/include/flang/Frontend/CodeGenOptions.h b/flang/include/flang/Frontend/CodeGenOptions.h index 5a141e3c0a87..0fc006312854 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.h +++ b/flang/include/flang/Frontend/CodeGenOptions.h @@ -16,6 +16,7 @@ #define FORTRAN_FRONTEND_CODEGENOPTIONS_H #include "flang/Optimizer/OpenMP/Utils.h" +#include "flang/Support/FPMaxminBehavior.h" #include "llvm/Frontend/Debug/Options.h" #include "llvm/Frontend/Driver/CodeGenOptions.h" #include "llvm/Support/CodeGen.h" diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index 9cf408a5d7b3..0b829bf3e08a 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -88,5 +88,9 @@ ENUM_LOWERINGOPT(RegisterMLIRDiagnosticsHandler, unsigned, 1, 1) /// LineTablesOnly. Off by default. ENUM_LOWERINGOPT(PreserveUseDebugInfo, unsigned, 1, 0) +/// FP max/min behavior for max/min intrinsics and [max|min][loc|val] (Legacy, +/// Portable, Extremum, ExtremeNum). Default: Legacy. +ENUM_LOWERINGOPT(FPMaxminBehavior, Fortran::common::FPMaxminBehavior, 2, 0) + #undef LOWERINGOPT #undef ENUM_LOWERINGOPT diff --git a/flang/include/flang/Lower/LoweringOptions.h b/flang/include/flang/Lower/LoweringOptions.h index 171510393b81..d44d5f73eeb6 100644 --- a/flang/include/flang/Lower/LoweringOptions.h +++ b/flang/include/flang/Lower/LoweringOptions.h @@ -15,6 +15,7 @@ #ifndef FLANG_LOWER_LOWERINGOPTIONS_H #define FLANG_LOWER_LOWERINGOPTIONS_H +#include "flang/Support/FPMaxminBehavior.h" #include "flang/Support/MathOptionsBase.h" namespace Fortran::lower { diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index c6531ac3d055..e31a01b88f19 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -21,6 +21,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Support/FPMaxminBehavior.h" #include "flang/Support/MathOptionsBase.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" @@ -633,6 +634,14 @@ public: return complexDivisionToRuntimeFlag; } + /// Setter/getter for fpMaxminBehavior. + void setFPMaxminBehavior(Fortran::common::FPMaxminBehavior mode) { + fpMaxminBehavior = mode; + } + Fortran::common::FPMaxminBehavior getFPMaxminBehavior() const { + return fpMaxminBehavior; + } + /// Dump the current function. (debug) LLVM_DUMP_METHOD void dumpFunc(); @@ -693,6 +702,14 @@ private: /// mlir::arith::FastMathAttr. mlir::arith::FastMathFlags fastMathFlags{}; + /// Controls how max/min idioms should be implemented. + /// Right now, it is only used to propagate FPMaxminBehavior + /// to the IntrinsicCall lowering. In general, it can be used + /// for generating max/min idioms through FirBuilder anywhere + /// in the pipeline. + Fortran::common::FPMaxminBehavior fpMaxminBehavior{ + Fortran::common::FPMaxminBehavior::Legacy}; + /// IntegerOverflowFlags that need to be set for operations that support /// mlir::arith::IntegerOverflowFlagsAttr. mlir::arith::IntegerOverflowFlags integerOverflowFlags{}; diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 753d475b4de5..3ef4045518cc 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -56,49 +56,6 @@ genIntrinsicCall(fir::FirOpBuilder &, mlir::Location, llvm::ArrayRef args, Fortran::lower::AbstractConverter *converter = nullptr); -/// Enums used to templatize and share lowering of MIN and MAX. -enum class Extremum { Min, Max }; - -// There are different ways to deal with NaNs in MIN and MAX. -// Known existing behaviors are listed below and can be selected for -// f18 MIN/MAX implementation. -enum class ExtremumBehavior { - // Note: the Signaling/quiet aspect of NaNs in the behaviors below are - // not described because there is no way to control/observe such aspect in - // MLIR/LLVM yet. The IEEE behaviors come with requirements regarding this - // aspect that are therefore currently not enforced. In the descriptions - // below, NaNs can be signaling or quite. Returned NaNs may be signaling - // if one of the input NaN was signaling but it cannot be guaranteed either. - // Existing compilers using an IEEE behavior (gfortran) also do not fulfill - // signaling/quiet requirements. - IeeeMinMaximumNumber, - // IEEE minimumNumber/maximumNumber behavior (754-2019, section 9.6): - // If one of the argument is and number and the other is NaN, return the - // number. If both arguements are NaN, return NaN. - // Compilers: gfortran. - IeeeMinMaximum, - // IEEE minimum/maximum behavior (754-2019, section 9.6): - // If one of the argument is NaN, return NaN. - MinMaxss, - // x86 minss/maxss behavior: - // If the second argument is a number and the other is NaN, return the number. - // In all other cases where at least one operand is NaN, return NaN. - // Compilers: xlf (only for MAX), ifort, pgfortran -nollvm, and nagfor. - PgfortranLlvm, - // "Opposite of" x86 minss/maxss behavior: - // If the first argument is a number and the other is NaN, return the - // number. - // In all other cases where at least one operand is NaN, return NaN. - // Compilers: xlf (only for MIN), and pgfortran (with llvm). - IeeeMinMaxNum - // IEEE minNum/maxNum behavior (754-2008, section 5.3.1): - // TODO: Not implemented. - // It is the only behavior where the signaling/quiet aspect of a NaN argument - // impacts if the result should be NaN or the argument that is a number. - // LLVM/MLIR do not provide ways to observe this aspect, so it is not - // possible to implement it without some target dependent runtime. -}; - /// Enum specifying how intrinsic argument evaluate::Expr should be /// lowered to fir::ExtendedValue to be passed to genIntrinsicCall. enum class LowerIntrinsicArgAs { @@ -260,7 +217,7 @@ struct IntrinsicLibrary { mlir::Value genExponent(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genExtendsTypeOf(mlir::Type, llvm::ArrayRef); - template + template mlir::Value genExtremum(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genFCString(mlir::Type, llvm::ArrayRef); diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h index 8f2ff5f82299..6b7840bbe816 100644 --- a/flang/include/flang/Optimizer/Passes/Pipelines.h +++ b/flang/include/flang/Optimizer/Passes/Pipelines.h @@ -132,11 +132,11 @@ enum class EnableOpenMP { None, Simd, Full }; /// Create a pass pipeline for lowering from HLFIR to FIR /// /// \param pm - MLIR pass manager that will hold the pipeline definition -/// \param optLevel - optimization level used for creating FIR optimization -/// passes pipeline -void createHLFIRToFIRPassPipeline( - mlir::PassManager &pm, EnableOpenMP enableOpenMP, - llvm::OptimizationLevel optLevel = defaultOptLevel); +/// \param enableOpenMP - whether OpenMP lowering is enabled +/// \param config - pipeline config (OptLevel, fpMaxminBehavior, etc.) +void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, + EnableOpenMP enableOpenMP, + const MLIRToLLVMPassPipelineConfig &config); struct OpenMPFIRPassPipelineOpts { /// Whether code is being generated for a target device rather than the host diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index f7a8e76879b7..0abfd150cefe 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -68,7 +68,8 @@ class SemanticsContext { public: SemanticsContext(const common::IntrinsicTypeDefaultKinds &, const common::LanguageFeatureControl &, const common::LangOptions &, - parser::AllCookedSources &); + parser::AllCookedSources &, + common::FPMaxminBehavior = common::FPMaxminBehavior::Legacy); ~SemanticsContext(); const common::IntrinsicTypeDefaultKinds &defaultKinds() const { diff --git a/flang/include/flang/Support/FPMaxminBehavior.h b/flang/include/flang/Support/FPMaxminBehavior.h new file mode 100644 index 000000000000..859ef4944c6b --- /dev/null +++ b/flang/include/flang/Support/FPMaxminBehavior.h @@ -0,0 +1,42 @@ +//===- Support/FPMaxminBehavior.h - FP max/min behavior option --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Shared definition of FP max/min behavior for max/min and [max|min][loc|val]. +/// Used by CodeGenOptions, LoweringOptions, and other components. +/// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_SUPPORT_FPMAXMINBEHAVIOR_H_ +#define FORTRAN_SUPPORT_FPMAXMINBEHAVIOR_H_ + +#include "llvm/ADT/StringRef.h" + +namespace Fortran::common { + +/// Control for max/min and [max|min][loc|val] lowering, constant folding, and +/// related behavior. Legacy: current Flang behavior (always cmp+select). +/// Portable: same as Legacy but may use arith.maxnumf under +/// '-fno-signed-zeros -fno-honor-nans'. +/// Extremum: arith.maximumf/minimumf +/// ExtremeNum: arith.maxnumf/minnumf. +/// Legacy is transitional and will eventually be replaced by Portable. +enum class FPMaxminBehavior : unsigned { + Legacy, + Portable, + Extremum, + ExtremeNum, +}; + +/// Parse -ffp-maxmin-behavior= value. Triggers llvm_unreachable +/// for unknown strings. +FPMaxminBehavior parseFPMaxminBehavior(llvm::StringRef value); + +} // namespace Fortran::common + +#endif // FORTRAN_SUPPORT_FPMAXMINBEHAVIOR_H_ diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h index f29fc8ef0ea5..987abcc4972f 100644 --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -14,6 +14,7 @@ #define FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H #include "flang/Frontend/CodeGenOptions.h" +#include "flang/Support/FPMaxminBehavior.h" #include "flang/Support/LangOptions.h" #include "flang/Support/MathOptionsBase.h" #include @@ -150,6 +151,8 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks { int32_t DwarfVersion = 0; ///< Version of DWARF debug info to generate std::string SplitDwarfFile = ""; ///< File name for the split debug info std::string DwarfDebugFlags = ""; ///< Debug flags to append to DWARF producer + Fortran::common::FPMaxminBehavior fpMaxminBehavior = + Fortran::common::FPMaxminBehavior::Legacy; }; struct OffloadModuleOpts { diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 72b766e52ab3..ce38da39006d 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1661,6 +1661,12 @@ bool CompilerInvocation::createFromArgs( invoc.loweringOpts.setRepackArraysWhole(arg->getValue() == llvm::StringRef{"whole"}); + if (auto *arg = args.getLastArg(clang::options::OPT_ffp_maxmin_behavior_EQ)) { + auto value = Fortran::common::parseFPMaxminBehavior(arg->getValue()); + invoc.getCodeGenOpts().setFPMaxminBehavior(value); + invoc.loweringOpts.setFPMaxminBehavior(value); + } + success &= parseFrontendArgs(invoc.getFrontendOpts(), args, diags); parseTargetArgs(invoc.getTargetOpts(), args); parsePreprocessorArgs(invoc.getPreprocessorOpts(), args); @@ -1899,7 +1905,7 @@ CompilerInvocation::getSemanticsCtx( auto semanticsContext = std::make_unique( getDefaultKinds(), fortranOptions.features, getLangOpts(), - allCookedSources); + allCookedSources, getCodeGenOpts().getFPMaxminBehavior()); semanticsContext->set_moduleDirectory(getModuleDir()) .set_searchDirectories(fortranOptions.searchDirectories) diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index b3731ee2526c..e74c913cfa13 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -635,8 +635,11 @@ void CodeGenAction::lowerHLFIRToFIR() { enableOpenMP = fir::EnableOpenMP::Full; if (ci.getInvocation().getLangOpts().OpenMPSimd) enableOpenMP = fir::EnableOpenMP::Simd; + MLIRToLLVMPassPipelineConfig config(level); + config.fpMaxminBehavior = + ci.getInvocation().getLoweringOpts().getFPMaxminBehavior(); // Create the pass pipeline - fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, level); + fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, config); (void)mlir::applyPassManagerCLOptions(pm); mlir::TimingScope timingScopeMLIRPasses = timingScopeRoot.nest( @@ -748,6 +751,7 @@ void CodeGenAction::generateLLVMIR() { pm.enableVerifier(/*verifyPasses=*/true); MLIRToLLVMPassPipelineConfig config(level, opts, mathOpts); + config.fpMaxminBehavior = invoc.getLoweringOpts().getFPMaxminBehavior(); llvm::Triple pipelineTriple(invoc.getTargetOpts().triple); config.SkipConvertComplexPow = pipelineTriple.isAMDGCN(); fir::registerDefaultInlinerPass(config); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index a8f405dd03d1..4ee777e8e0a4 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -6454,6 +6454,8 @@ private: builder->setComplexDivisionToRuntimeFlag( bridge.getLoweringOptions().getComplexDivisionToRuntime()); builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions()); + builder->setFPMaxminBehavior( + bridge.getLoweringOptions().getFPMaxminBehavior()); builder->setInsertionPointToStart(&func.front()); if (funit.parent.isA()) { // Give internal linkage to internal functions. There are no name clash @@ -6737,6 +6739,8 @@ private: builder = new fir::FirOpBuilder(func, bridge.getKindMap(), symbolTable); assert(builder && "FirOpBuilder did not instantiate"); builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions()); + builder->setFPMaxminBehavior( + bridge.getLoweringOptions().getFPMaxminBehavior()); createGlobals(); if (mlir::Region *region = func.getCallableRegion()) region->dropAllReferences(); diff --git a/flang/lib/Lower/LoweringOptions.cpp b/flang/lib/Lower/LoweringOptions.cpp index 9456abf0e8de..fd1ba787e7f4 100644 --- a/flang/lib/Lower/LoweringOptions.cpp +++ b/flang/lib/Lower/LoweringOptions.cpp @@ -16,7 +16,8 @@ namespace Fortran::lower { LoweringOptions::LoweringOptions() : MathOptions{} { #define LOWERINGOPT(Name, Bits, Default) Name = Default; -#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) set##Name(Default); +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) \ + set##Name(static_cast(Default)); #include "flang/Lower/LoweringOptions.def" } diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 45420a1b720c..f44e5b2a28f6 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -1061,15 +1061,31 @@ genDataExitOperations(fir::FirOpBuilder &builder, /// Return the corresponding enum value for the mlir::acc::ReductionOperator /// from the parser representation. static mlir::acc::ReductionOperator -getReductionOperator(const Fortran::parser::ReductionOperator &op) { +getReductionOperator(const Fortran::parser::ReductionOperator &op, + mlir::Type reductionTy, + const Fortran::lower::AbstractConverter &converter) { + Fortran::common::FPMaxminBehavior maxminMode = + converter.getLoweringOptions().getFPMaxminBehavior(); switch (op.v) { case Fortran::parser::ReductionOperator::Operator::Plus: return mlir::acc::ReductionOperator::AccAdd; case Fortran::parser::ReductionOperator::Operator::Multiply: return mlir::acc::ReductionOperator::AccMul; case Fortran::parser::ReductionOperator::Operator::Max: + if (fir::isa_real(reductionTy)) { + if (maxminMode == Fortran::common::FPMaxminBehavior::Extremum) + return mlir::acc::ReductionOperator::AccMaximumf; + else if (maxminMode == Fortran::common::FPMaxminBehavior::ExtremeNum) + return mlir::acc::ReductionOperator::AccMaxnumf; + } return mlir::acc::ReductionOperator::AccMax; case Fortran::parser::ReductionOperator::Operator::Min: + if (fir::isa_real(reductionTy)) { + if (maxminMode == Fortran::common::FPMaxminBehavior::Extremum) + return mlir::acc::ReductionOperator::AccMinimumf; + else if (maxminMode == Fortran::common::FPMaxminBehavior::ExtremeNum) + return mlir::acc::ReductionOperator::AccMinnumf; + } return mlir::acc::ReductionOperator::AccMin; case Fortran::parser::ReductionOperator::Operator::Iand: return mlir::acc::ReductionOperator::AccIand; @@ -1115,7 +1131,6 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList, fir::FirOpBuilder &builder = converter.getFirOpBuilder(); const auto &objects = std::get(objectList.t); const auto &op = std::get(objectList.t); - mlir::acc::ReductionOperator mlirOp = getReductionOperator(op); Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext}; for (const auto &accObject : objects.v) { llvm::SmallVector bounds; @@ -1144,6 +1159,9 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList, if (!isSupportedReductionType(reductionTy)) TODO(operandLocation, "reduction with unsupported type"); + mlir::acc::ReductionOperator mlirOp = + getReductionOperator(op, reductionTy, converter); + if (designator) { Fortran::semantics::SomeExpr someExpr = *designator; if (Fortran::lower::detail::getRef( diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index d754a8122e64..9036d65f138d 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -550,7 +550,7 @@ static constexpr IntrinsicHandler handlers[]{ &I::genMatmulTranspose, {{{"matrix_a", asAddr}, {"matrix_b", asAddr}}}, /*isElemental=*/false}, - {"max", &I::genExtremum}, + {"max", &I::genExtremum}, {"maxloc", &I::genMaxloc, {{{"array", asBox}, @@ -567,7 +567,7 @@ static constexpr IntrinsicHandler handlers[]{ /*isElemental=*/false}, {"merge", &I::genMerge}, {"merge_bits", &I::genMergeBits}, - {"min", &I::genExtremum}, + {"min", &I::genExtremum}, {"minloc", &I::genMinloc, {{{"array", asBox}, @@ -8729,75 +8729,77 @@ IntrinsicLibrary::genTrim(mlir::Type resultType, } // Compare two FIR values and return boolean result as i1. -template -static mlir::Value createExtremumCompare(mlir::Location loc, - fir::FirOpBuilder &builder, - mlir::Value left, mlir::Value right) { +template +static mlir::Value genExtremumResult(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value left, mlir::Value right) { mlir::Type type = left.getType(); mlir::arith::CmpIPredicate integerPredicate = - type.isUnsignedInteger() ? extremum == Extremum::Max - ? mlir::arith::CmpIPredicate::ugt - : mlir::arith::CmpIPredicate::ult - : extremum == Extremum::Max ? mlir::arith::CmpIPredicate::sgt - : mlir::arith::CmpIPredicate::slt; - static constexpr mlir::arith::CmpFPredicate orderedCmp = - extremum == Extremum::Max ? mlir::arith::CmpFPredicate::OGT - : mlir::arith::CmpFPredicate::OLT; - mlir::Value result; + type.isUnsignedInteger() ? isMax ? mlir::arith::CmpIPredicate::ugt + : mlir::arith::CmpIPredicate::ult + : isMax ? mlir::arith::CmpIPredicate::sgt + : mlir::arith::CmpIPredicate::slt; + mlir::Value pred; if (fir::isa_real(type)) { - // Note: the signaling/quit aspect of the result required by IEEE - // cannot currently be obtained with LLVM without ad-hoc runtime. - if constexpr (behavior == ExtremumBehavior::IeeeMinMaximumNumber) { - // Return the number if one of the inputs is NaN and the other is - // a number. - auto leftIsResult = - mlir::arith::CmpFOp::create(builder, loc, orderedCmp, left, right); - auto rightIsNan = mlir::arith::CmpFOp::create( - builder, loc, mlir::arith::CmpFPredicate::UNE, right, right); - result = - mlir::arith::OrIOp::create(builder, loc, leftIsResult, rightIsNan); - } else if constexpr (behavior == ExtremumBehavior::IeeeMinMaximum) { - // Always return NaNs if one the input is NaNs - auto leftIsResult = - mlir::arith::CmpFOp::create(builder, loc, orderedCmp, left, right); - auto leftIsNan = mlir::arith::CmpFOp::create( - builder, loc, mlir::arith::CmpFPredicate::UNE, left, left); - result = - mlir::arith::OrIOp::create(builder, loc, leftIsResult, leftIsNan); - } else if constexpr (behavior == ExtremumBehavior::MinMaxss) { - // If the left is a NaN, return the right whatever it is. - result = - mlir::arith::CmpFOp::create(builder, loc, orderedCmp, left, right); - } else if constexpr (behavior == ExtremumBehavior::PgfortranLlvm) { - // If one of the operand is a NaN, return left whatever it is. - static constexpr auto unorderedCmp = - extremum == Extremum::Max ? mlir::arith::CmpFPredicate::UGT - : mlir::arith::CmpFPredicate::ULT; - result = - mlir::arith::CmpFOp::create(builder, loc, unorderedCmp, left, right); - } else { - // TODO: ieeeMinNum/ieeeMaxNum - static_assert(behavior == ExtremumBehavior::IeeeMinMaxNum, - "ieeeMinNum/ieeeMaxNum behavior not implemented"); + switch (builder.getFPMaxminBehavior()) { + case Fortran::common::FPMaxminBehavior::Portable: + // If the left is NaN, return the right whatever it is. + // Signed zeros are equal, so max/min(zero, zero) always + // returns the second 'zero'. + if (mlir::arith::bitEnumContainsAll( + builder.getFastMathFlags(), + mlir::arith::FastMathFlags::nnan | + mlir::arith::FastMathFlags::nsz)) { + // If there are no NaNs and signed zeros, we can use a shorter + // arith.max/minnumf representation. + if constexpr (isMax) + return mlir::arith::MaxNumFOp::create(builder, loc, left, right); + else + return mlir::arith::MinNumFOp::create(builder, loc, left, right); + } + [[fallthrough]]; + case Fortran::common::FPMaxminBehavior::Legacy: { + static constexpr mlir::arith::CmpFPredicate pred = + isMax ? mlir::arith::CmpFPredicate::OGT + : mlir::arith::CmpFPredicate::OLT; + mlir::Value cmp = + mlir::arith::CmpFOp::create(builder, loc, pred, left, right); + return mlir::arith::SelectOp::create(builder, loc, cmp, left, right); } + case Fortran::common::FPMaxminBehavior::Extremum: + if constexpr (isMax) + return mlir::arith::MaximumFOp::create(builder, loc, left, right); + else + return mlir::arith::MinimumFOp::create(builder, loc, left, right); + case Fortran::common::FPMaxminBehavior::ExtremeNum: + if constexpr (isMax) + return mlir::arith::MaxNumFOp::create(builder, loc, left, right); + else + return mlir::arith::MinNumFOp::create(builder, loc, left, right); + } + + llvm_unreachable("unsupported FPMaxminBehavior"); } else if (fir::isa_integer(type)) { + mlir::Value cmpLeft = left; + mlir::Value cmpRight = right; if (type.isUnsignedInteger()) { mlir::Type signlessType = mlir::IntegerType::get( builder.getContext(), type.getIntOrFloatBitWidth(), mlir::IntegerType::SignednessSemantics::Signless); - left = builder.createConvert(loc, signlessType, left); - right = builder.createConvert(loc, signlessType, right); + cmpLeft = builder.createConvert(loc, signlessType, left); + cmpRight = builder.createConvert(loc, signlessType, right); } - result = mlir::arith::CmpIOp::create(builder, loc, integerPredicate, left, - right); + pred = mlir::arith::CmpIOp::create(builder, loc, integerPredicate, cmpLeft, + cmpRight); } else if (fir::isa_char(type) || fir::isa_char(fir::unwrapRefType(type))) { // TODO: ! character min and max is tricky because the result // length is the length of the longest argument! // So we may need a temp. TODO(loc, "intrinsic: min and max for CHARACTER"); } - assert(result && "result must be defined"); - return result; + assert(pred && "pred must be defined"); + + return mlir::arith::SelectOp::create(builder, loc, pred, left, right); } // UNLINK @@ -9097,16 +9099,13 @@ IntrinsicLibrary::genMinval(mlir::Type resultType, } // MIN and MAX -template +template mlir::Value IntrinsicLibrary::genExtremum(mlir::Type, llvm::ArrayRef args) { assert(args.size() >= 1); mlir::Value result = args[0]; - for (auto arg : args.drop_front()) { - mlir::Value mask = - createExtremumCompare(loc, builder, result, arg); - result = mlir::arith::SelectOp::create(builder, loc, mask, result, arg); - } + for (auto arg : args.drop_front()) + result = genExtremumResult(loc, builder, result, arg); return result; } @@ -9168,17 +9167,15 @@ genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value genMax(fir::FirOpBuilder &builder, mlir::Location loc, llvm::ArrayRef args) { assert(args.size() > 0 && "max requires at least one argument"); - return IntrinsicLibrary{builder, loc} - .genExtremum(args[0].getType(), - args); + return IntrinsicLibrary{builder, loc}.genExtremum( + args[0].getType(), args); } mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc, llvm::ArrayRef args) { assert(args.size() > 0 && "min requires at least one argument"); - return IntrinsicLibrary{builder, loc} - .genExtremum(args[0].getType(), - args); + return IntrinsicLibrary{builder, loc}.genExtremum( + args[0].getType(), args); } mlir::Value genDivC(fir::FirOpBuilder &builder, mlir::Location loc, diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index d8ed9ce968e0..5d427c9aab8a 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -1025,11 +1025,35 @@ static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder, TODO(loc, "reduction mul type"); } - if (op == mlir::acc::ReductionOperator::AccMin) - return fir::genMin(builder, loc, {value1, value2}); + if (op == mlir::acc::ReductionOperator::AccMin || + op == mlir::acc::ReductionOperator::AccMinimumf || + op == mlir::acc::ReductionOperator::AccMinnumf) { + Fortran::common::FPMaxminBehavior savedMode = builder.getFPMaxminBehavior(); + if (op == mlir::acc::ReductionOperator::AccMinimumf) + builder.setFPMaxminBehavior(Fortran::common::FPMaxminBehavior::Extremum); + else if (op == mlir::acc::ReductionOperator::AccMinnumf) + builder.setFPMaxminBehavior( + Fortran::common::FPMaxminBehavior::ExtremeNum); - if (op == mlir::acc::ReductionOperator::AccMax) - return fir::genMax(builder, loc, {value1, value2}); + mlir::Value result = fir::genMin(builder, loc, {value1, value2}); + builder.setFPMaxminBehavior(savedMode); + return result; + } + + if (op == mlir::acc::ReductionOperator::AccMax || + op == mlir::acc::ReductionOperator::AccMaximumf || + op == mlir::acc::ReductionOperator::AccMaxnumf) { + Fortran::common::FPMaxminBehavior savedMode = builder.getFPMaxminBehavior(); + if (op == mlir::acc::ReductionOperator::AccMaximumf) + builder.setFPMaxminBehavior(Fortran::common::FPMaxminBehavior::Extremum); + else if (op == mlir::acc::ReductionOperator::AccMaxnumf) + builder.setFPMaxminBehavior( + Fortran::common::FPMaxminBehavior::ExtremeNum); + + mlir::Value result = fir::genMax(builder, loc, {value1, value2}); + builder.setFPMaxminBehavior(savedMode); + return result; + } if (op == mlir::acc::ReductionOperator::AccIand) return mlir::arith::AndIOp::create(builder, loc, value1, value2); diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index bb1ad84523c8..b6a34e0e5aad 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -255,11 +255,12 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm, /// Create a pass pipeline for lowering from HLFIR to FIR /// /// \param pm - MLIR pass manager that will hold the pipeline definition -/// \param optLevel - optimization level used for creating FIR optimization -/// passes pipeline +/// \param enableOpenMP - whether OpenMP lowering is enabled +/// \param config - pipeline config (OptLevel, etc.) void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, EnableOpenMP enableOpenMP, - llvm::OptimizationLevel optLevel) { + const MLIRToLLVMPassPipelineConfig &config) { + llvm::OptimizationLevel optLevel = config.OptLevel; if (optLevel.getSizeLevel() > 0 || optLevel.getSpeedupLevel() > 0) { addNestedPassToAllTopLevelOperations( pm, hlfir::createExpressionSimplification); @@ -442,7 +443,7 @@ void createMLIRToLLVMPassPipeline(mlir::PassManager &pm, enableOpenMP = fir::EnableOpenMP::Full; if (config.EnableOpenMPSimd) enableOpenMP = fir::EnableOpenMP::Simd; - fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, config.OptLevel); + fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, config); // Add default optimizer pass pipeline. fir::createDefaultFIROptimizerPassPipeline(pm, config); diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 72b36416adda..8bca853f01a2 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -381,14 +381,16 @@ SemanticsContext::SemanticsContext( const common::IntrinsicTypeDefaultKinds &defaultKinds, const common::LanguageFeatureControl &languageFeatures, const common::LangOptions &langOpts, - parser::AllCookedSources &allCookedSources) + parser::AllCookedSources &allCookedSources, + common::FPMaxminBehavior fpMaxminBehavior) : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures}, langOpts_{langOpts}, allCookedSources_{allCookedSources}, intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)}, globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope( Scope::Kind::IntrinsicModules, nullptr)}, foldingContext_{parser::ContextualMessages{&messages_}, defaultKinds_, - intrinsics_, targetCharacteristics_, languageFeatures_, tempNames_} {} + intrinsics_, targetCharacteristics_, languageFeatures_, tempNames_, + fpMaxminBehavior} {} SemanticsContext::~SemanticsContext() {} diff --git a/flang/lib/Support/CMakeLists.txt b/flang/lib/Support/CMakeLists.txt index 363f57ce97da..599cd485f87c 100644 --- a/flang/lib/Support/CMakeLists.txt +++ b/flang/lib/Support/CMakeLists.txt @@ -44,6 +44,7 @@ endif() add_flang_library(FortranSupport default-kinds.cpp + FPMaxminBehavior.cpp Flags.cpp Fortran.cpp Fortran-features.cpp diff --git a/flang/lib/Support/FPMaxminBehavior.cpp b/flang/lib/Support/FPMaxminBehavior.cpp new file mode 100644 index 000000000000..a7739d4a0633 --- /dev/null +++ b/flang/lib/Support/FPMaxminBehavior.cpp @@ -0,0 +1,23 @@ +//===--- Support/FPMaxminBehavior.cpp - Parse FP max/min behavior ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Support/FPMaxminBehavior.h" +#include "llvm/ADT/StringSwitch.h" + +namespace Fortran::common { + +FPMaxminBehavior parseFPMaxminBehavior(llvm::StringRef value) { + return llvm::StringSwitch(value) + .Case("legacy", FPMaxminBehavior::Legacy) + .Case("portable", FPMaxminBehavior::Portable) + .Case("extremum", FPMaxminBehavior::Extremum) + .Case("extremenum", FPMaxminBehavior::ExtremeNum) + .Default(FPMaxminBehavior::Legacy); +} + +} // namespace Fortran::common diff --git a/flang/test/Driver/ffp-maxmin-behavior.f90 b/flang/test/Driver/ffp-maxmin-behavior.f90 new file mode 100644 index 000000000000..242ad274d614 --- /dev/null +++ b/flang/test/Driver/ffp-maxmin-behavior.f90 @@ -0,0 +1,19 @@ +! Test that -ffp-maxmin-behavior is accepted by flang -fc1 (all values and +! unknown defaulting to legacy) and is not recognized by the flang driver. + +program p +end program p + +! flang -fc1 accepts all valid values +! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=legacy %s +! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=portable %s +! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=extremum %s +! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=extremenum %s + +! flang -fc1 accepts unknown value (defaults to legacy, no error) +! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=invalid %s + +! flang driver does not forward the option to -fc1 (fc1-only option) +! RUN: not %flang -### -ffp-maxmin-behavior=legacy %s 2>&1 \ +! RUN: | FileCheck %s -check-prefix=DRIVER-UNKNOWN-ARG +! DRIVER-UNKNOWN-ARG: error: unknown argument '-ffp-maxmin-behavior=legacy' diff --git a/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90 b/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90 new file mode 100644 index 000000000000..2d0746decf29 --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90 @@ -0,0 +1,114 @@ +! This test checks lowering of OpenACC reduction clause. + +! RUN: %flang_fc1 -fopenacc -emit-hlfir -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY +! RUN: %flang_fc1 -fopenacc -emit-hlfir -ffp-maxmin-behavior=legacy -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY +! RUN: %flang_fc1 -fopenacc -emit-hlfir -ffp-maxmin-behavior=extremum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMUM +! RUN: %flang_fc1 -fopenacc -emit-hlfir -ffp-maxmin-behavior=extremenum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMENUM + +! TODO: we should get rid of the legacy mode to make the generation of +! arith.max/minnumf straightforward for portable mode + nsz + nnan: +! RUN: %flang_fc1 -fopenacc -emit-hlfir -ffp-maxmin-behavior=portable -fno-signed-zeros -menable-no-nans -o - %s 2>&1 | FileCheck %s -check-prefix=PORTABLE-NANNSZ + +subroutine acc_scalar_reduction_max(a) + real :: a + !$acc parallel reduction(max:a) + !$acc end parallel +end subroutine acc_scalar_reduction_max + +subroutine acc_array_reduction_max(a) + real :: a(10) + !$acc parallel reduction(max:a) + !$acc end parallel +end subroutine acc_array_reduction_max + +subroutine acc_scalar_reduction_min(a) + real :: a + !$acc parallel reduction(min:a) + !$acc end parallel +end subroutine acc_scalar_reduction_min + +subroutine acc_array_reduction_min(a) + real :: a(10) + !$acc parallel reduction(min:a) + !$acc end parallel +end subroutine acc_array_reduction_min + +! LEGACY-LABEL: acc.reduction.recipe @reduction_min_ref_10xf32 : !fir.ref> reduction_operator init { +! LEGACY: } combiner { +! LEGACY: fir.do_loop +! LEGACY: %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath : f32 +! LEGACY: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32 + +! LEGACY-LABEL: acc.reduction.recipe @reduction_min_ref_f32 : !fir.ref reduction_operator init { +! LEGACY: } combiner { +! LEGACY: %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath : f32 +! LEGACY: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32 + +! LEGACY-LABEL: acc.reduction.recipe @reduction_max_ref_10xf32 : !fir.ref> reduction_operator init { +! LEGACY: } combiner { +! LEGACY: fir.do_loop +! LEGACY: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath : f32 +! LEGACY: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32 + +! LEGACY-LABEL: acc.reduction.recipe @reduction_max_ref_f32 : !fir.ref reduction_operator init { +! LEGACY: } combiner { +! LEGACY: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath : f32 +! LEGACY: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32 + +! EXTREMUM-LABEL: acc.reduction.recipe @reduction_minimumf_ref_10xf32 : !fir.ref> reduction_operator init { +! EXTREMUM: } combiner { +! EXTREMUM: fir.do_loop +! EXTREMUM: %[[MINIMUMF_0:.*]] = arith.minimumf %{{.*}}, %{{.*}} fastmath : f32 + +! EXTREMUM-LABEL: acc.reduction.recipe @reduction_minimumf_ref_f32 : !fir.ref reduction_operator init { +! EXTREMUM: } combiner { +! EXTREMUM: %[[MINIMUMF_0:.*]] = arith.minimumf %{{.*}}, %{{.*}} fastmath : f32 + +! EXTREMUM-LABEL: acc.reduction.recipe @reduction_maximumf_ref_10xf32 : !fir.ref> reduction_operator init { +! EXTREMUM: } combiner { +! EXTREMUM: fir.do_loop +! EXTREMUM: %[[MAXIMUMF_0:.*]] = arith.maximumf %{{.*}}, %{{.*}} fastmath : f32 + +! EXTREMUM-LABEL: acc.reduction.recipe @reduction_maximumf_ref_f32 : !fir.ref reduction_operator init { +! EXTREMUM-LABEL: } combiner { +! EXTREMUM: %[[MAXIMUMF_0:.*]] = arith.maximumf %{{.*}}, %{{.*}} fastmath : f32 + +! EXTREMENUM-LABEL: acc.reduction.recipe @reduction_minnumf_ref_10xf32 : !fir.ref> reduction_operator init { +! EXTREMENUM: } combiner { +! EXTREMENUM: fir.do_loop +! EXTREMENUM: %[[MINNUMF_0:.*]] = arith.minnumf %{{.*}}, %{{.*}} fastmath : f32 + +! EXTREMENUM-LABEL: acc.reduction.recipe @reduction_minnumf_ref_f32 : !fir.ref reduction_operator init { +! EXTREMENUM: } combiner { +! EXTREMENUM: %[[MINNUMF_0:.*]] = arith.minnumf %{{.*}}, %{{.*}} fastmath : f32 + +! EXTREMENUM-LABEL: acc.reduction.recipe @reduction_maxnumf_ref_10xf32 : !fir.ref> reduction_operator init { +! EXTREMENUM: } combiner { +! EXTREMENUM: fir.do_loop +! EXTREMENUM: %[[MAXNUMF_0:.*]] = arith.maxnumf %{{.*}}, %{{.*}} fastmath : f32 + +! EXTREMENUM-LABEL: acc.reduction.recipe @reduction_maxnumf_ref_f32 : !fir.ref reduction_operator init { +! EXTREMENUM-LABEL: } combiner { +! EXTREMENUM: %[[MAXNUMF_0:.*]] = arith.maxnumf %{{.*}}, %{{.*}} fastmath : f32 + +! PORTABLE-NANNSZ-LABEL: acc.reduction.recipe @reduction_min_ref_10xf32 : !fir.ref> reduction_operator init { +! PORTABLE-NANNSZ: } combiner { +! PORTABLE-NANNSZ: fir.do_loop +! PORTABLE-NANNSZ: %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath : f32 +! PORTABLE-NANNSZ: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32 + +! PORTABLE-NANNSZ-LABEL: acc.reduction.recipe @reduction_min_ref_f32 : !fir.ref reduction_operator init { +! PORTABLE-NANNSZ: } combiner { +! PORTABLE-NANNSZ: %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath : f32 +! PORTABLE-NANNSZ: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32 + +! PORTABLE-NANNSZ-LABEL: acc.reduction.recipe @reduction_max_ref_10xf32 : !fir.ref> reduction_operator init { +! PORTABLE-NANNSZ: } combiner { +! PORTABLE-NANNSZ: fir.do_loop +! PORTABLE-NANNSZ: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath : f32 +! PORTABLE-NANNSZ: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32 + +! PORTABLE-NANNSZ-LABEL: acc.reduction.recipe @reduction_max_ref_f32 : !fir.ref reduction_operator init { +! PORTABLE-NANNSZ: } combiner { +! PORTABLE-NANNSZ: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath : f32 +! PORTABLE-NANNSZ: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32 diff --git a/flang/test/Lower/fp-maxmin-behavior.f90 b/flang/test/Lower/fp-maxmin-behavior.f90 new file mode 100644 index 000000000000..7b668f3b5085 --- /dev/null +++ b/flang/test/Lower/fp-maxmin-behavior.f90 @@ -0,0 +1,52 @@ +! Test lowering of real MIN/MAX with -ffp-maxmin-behavior (legacy, portable, extremum, extremenum). +! Legacy uses arith.cmpf + arith.select; extremum uses arith.maximumf/minimumf; +! extremenum uses arith.maxnumf/minnumf; portable with -fno-signed-zeros -menable-no-nans uses maxnumf/minnumf. + +! bbc: legacy, extremum, extremenum +! RUN: bbc -emit-hlfir -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY +! RUN: bbc -emit-hlfir -ffp-maxmin-behavior=legacy -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY +! RUN: bbc -emit-hlfir -ffp-maxmin-behavior=extremum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMUM +! RUN: bbc -emit-hlfir -ffp-maxmin-behavior=extremenum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMENUM + +! flang -fc1: legacy, extremum, extremenum +! RUN: %flang_fc1 -emit-hlfir -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY +! RUN: %flang_fc1 -emit-hlfir -ffp-maxmin-behavior=legacy -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY +! RUN: %flang_fc1 -emit-hlfir -ffp-maxmin-behavior=extremum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMUM +! RUN: %flang_fc1 -emit-hlfir -ffp-maxmin-behavior=extremenum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMENUM + +! portable with -fno-signed-zeros -menable-no-nans => maxnumf/minnumf (flang -fc1 only; bbc does not expose these flags) +! RUN: %flang_fc1 -emit-hlfir -ffp-maxmin-behavior=portable -fno-signed-zeros -menable-no-nans -o - %s 2>&1 | FileCheck %s -check-prefix=PORTABLE-NANNSZ + +subroutine real_max(a, b, r) + real :: a, b, r + r = max(a, b) +end subroutine +! LEGACY-LABEL: func.func @_QPreal_max( +! LEGACY: arith.cmpf ogt, +! LEGACY: arith.select + +! EXTREMUM-LABEL: func.func @_QPreal_max( +! EXTREMUM: arith.maximumf + +! EXTREMENUM-LABEL: func.func @_QPreal_max( +! EXTREMENUM: arith.maxnumf + +! PORTABLE-NANNSZ-LABEL: func.func @_QPreal_max( +! PORTABLE-NANNSZ: arith.maxnumf + +subroutine real_min(a, b, r) + real :: a, b, r + r = min(a, b) +end subroutine +! LEGACY-LABEL: func.func @_QPreal_min( +! LEGACY: arith.cmpf olt, +! LEGACY: arith.select + +! EXTREMUM-LABEL: func.func @_QPreal_min( +! EXTREMUM: arith.minimumf + +! EXTREMENUM-LABEL: func.func @_QPreal_min( +! EXTREMENUM: arith.minnumf + +! PORTABLE-NANNSZ-LABEL: func.func @_QPreal_min( +! PORTABLE-NANNSZ: arith.minnumf diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index 1f5989bc3460..fc00af3d924c 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -17,6 +17,7 @@ #include "flang/Frontend/CodeGenOptions.h" #include "flang/Frontend/TargetOptions.h" #include "flang/Lower/Bridge.h" +#include "flang/Lower/LoweringOptions.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/Support/Verifier.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" @@ -37,6 +38,7 @@ #include "flang/Semantics/runtime-type-info.h" #include "flang/Semantics/semantics.h" #include "flang/Semantics/unparse-with-symbols.h" +#include "flang/Support/FPMaxminBehavior.h" #include "flang/Support/Fortran-features.h" #include "flang/Support/LangOptions.h" #include "flang/Support/OpenMP-features.h" @@ -292,6 +294,22 @@ static llvm::cl::opt complexRange( "multiplication and division [full|improved|basic]"), llvm::cl::init("")); +static llvm::cl::opt fpMaxminBehavior( + "ffp-maxmin-behavior", + llvm::cl::desc("Control max/min and [max|min][loc|val] lowering " + "[legacy|portable|extremum|extremenum]"), + llvm::cl::values(clEnumValN(Fortran::common::FPMaxminBehavior::Legacy, + "legacy", "cmp+select"), + clEnumValN(Fortran::common::FPMaxminBehavior::Portable, + "portable", + "cmp+select and arith.max/minnumf when nnan " + "and nsz fast math flags are enabled"), + clEnumValN(Fortran::common::FPMaxminBehavior::Extremum, + "extremum", "arith.max/minimum"), + clEnumValN(Fortran::common::FPMaxminBehavior::ExtremeNum, + "extremenum", "arith.max/minnum")), + llvm::cl::init(Fortran::common::FPMaxminBehavior::Legacy)); + #define FLANG_EXCLUDE_CODEGEN #include "flang/Optimizer/Passes/CommandLineOpts.h" #include "flang/Optimizer/Passes/Pipelines.h" @@ -462,6 +480,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR( loweringOptions.setCUDARuntimeCheck(true); if (complexRange == "improved" || complexRange == "basic") loweringOptions.setComplexDivisionToRuntime(false); + loweringOptions.setFPMaxminBehavior(fpMaxminBehavior.getValue()); std::vector envDefaults = {}; Fortran::frontend::TargetOptions targetOpts; Fortran::frontend::CodeGenOptions cgOpts; @@ -534,8 +553,9 @@ static llvm::LogicalResult convertFortranSourceToMLIR( // lower HLFIR to FIR fir::EnableOpenMP enableOmp = enableOpenMP ? fir::EnableOpenMP::Full : fir::EnableOpenMP::None; - fir::createHLFIRToFIRPassPipeline(pm, enableOmp, - llvm::OptimizationLevel::O2); + MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2); + config.fpMaxminBehavior = loweringOptions.getFPMaxminBehavior(); + fir::createHLFIRToFIRPassPipeline(pm, enableOmp, config); if (mlir::failed(pm.run(mlirModule))) { llvm::errs() << "FATAL: lowering from HLFIR to FIR failed"; return mlir::failure(); @@ -550,6 +570,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR( // Add O2 optimizer pass pipeline. MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2); + config.fpMaxminBehavior = loweringOptions.getFPMaxminBehavior(); config.SkipConvertComplexPow = targetMachine.getTargetTriple().isAMDGCN(); if (enableOpenMP) config.EnableOpenMP = true; diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp index 36939802f55a..88865083f245 100644 --- a/flang/tools/tco/tco.cpp +++ b/flang/tools/tco/tco.cpp @@ -18,6 +18,7 @@ #include "flang/Optimizer/Support/InitFIR.h" #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Transforms/Passes.h" +#include "flang/Support/FPMaxminBehavior.h" #include "flang/Tools/CrossToolHelpers.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" @@ -95,6 +96,22 @@ static cl::opt testGeneratorMode( "test-gen", cl::desc("-emit-final-mlir -simplify-mlir -enable-aa=false"), cl::init(false)); +static cl::opt fpMaxminBehavior( + "ffp-maxmin-behavior", + cl::desc("Control max/min and [max|min][loc|val] behavior " + "[legacy|portable|extremum|extremenum] (for future pass use)"), + cl::values(clEnumValN(Fortran::common::FPMaxminBehavior::Legacy, "legacy", + "cmp+select"), + clEnumValN(Fortran::common::FPMaxminBehavior::Portable, + "portable", + "cmp+select and arith.max/minnumf when nnan and nsz " + "fast math flags are enabled"), + clEnumValN(Fortran::common::FPMaxminBehavior::Extremum, + "extremum", "arith.max/minimum"), + clEnumValN(Fortran::common::FPMaxminBehavior::ExtremeNum, + "extremenum", "arith.max/minnum")), + cl::init(Fortran::common::FPMaxminBehavior::Legacy)); + #include "flang/Optimizer/Passes/CommandLineOpts.h" #include "flang/Optimizer/Passes/Pipelines.h" @@ -186,6 +203,7 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { return mlir::failure(); } MLIRToLLVMPassPipelineConfig config(*level); + config.fpMaxminBehavior = fpMaxminBehavior.getValue(); // TODO: config.StackArrays should be set here? config.EnableOpenMP = true; // assume the input contains OpenMP config.AliasAnalysis = enableAliasAnalysis && !testGeneratorMode; diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 33d3b84b32b9..25e3dbd29043 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -38,6 +38,11 @@ class OpenACC_Op traits = []> : def OpenACC_ReductionOperatorNone : I32EnumAttrCase<"AccNone", 0, "none">; def OpenACC_ReductionOperatorAdd : I32EnumAttrCase<"AccAdd", 1, "add">; def OpenACC_ReductionOperatorMul : I32EnumAttrCase<"AccMul", 2, "mul">; +// When NaNs or signed zeros are possible, AccMax and AccMin parallel reductions +// cannot guarantee stable results for floating-point values. +// As such, auto-parallelization of such reductions cannot be done. +// The FrontEnds can use alternative max/min reduction kinds (see below) +// to enable auto-parallelization. def OpenACC_ReductionOperatorMax : I32EnumAttrCase<"AccMax", 3, "max">; def OpenACC_ReductionOperatorMin : I32EnumAttrCase<"AccMin", 4, "min">; def OpenACC_ReductionOperatorAnd : I32EnumAttrCase<"AccIand", 5, "iand">; @@ -47,16 +52,62 @@ def OpenACC_ReductionOperatorLogEqv : I32EnumAttrCase<"AccEqv", 8, "eqv">; def OpenACC_ReductionOperatorLogNeqv : I32EnumAttrCase<"AccNeqv", 9, "neqv">; def OpenACC_ReductionOperatorLogAnd : I32EnumAttrCase<"AccLand", 10, "land">; def OpenACC_ReductionOperatorLogOr : I32EnumAttrCase<"AccLor", 11, "lor">; +// The following reduction operators correspond to arith::AtomicRMWKind kinds +// named alike. They can only be applied to floating-point types. +// These reductions can be auto-parallelized. +def OpenACC_ReductionOperatorMaximum + : I32EnumAttrCase<"AccMaximumf", 12, "maximumf">; +def OpenACC_ReductionOperatorMinimum + : I32EnumAttrCase<"AccMinimumf", 13, "minimumf">; +def OpenACC_ReductionOperatorMaxnum + : I32EnumAttrCase<"AccMaxnumf", 14, "maxnumf">; +def OpenACC_ReductionOperatorMinnum + : I32EnumAttrCase<"AccMinnumf", 15, "minnumf">; -def OpenACC_ReductionOperator : I32EnumAttr<"ReductionOperator", - "built-in reduction operations supported by OpenACC", - [OpenACC_ReductionOperatorNone, OpenACC_ReductionOperatorAdd, - OpenACC_ReductionOperatorMul, OpenACC_ReductionOperatorMax, OpenACC_ReductionOperatorMin, - OpenACC_ReductionOperatorAnd, OpenACC_ReductionOperatorOr, - OpenACC_ReductionOperatorXor, OpenACC_ReductionOperatorLogEqv, - OpenACC_ReductionOperatorLogNeqv, OpenACC_ReductionOperatorLogAnd, - OpenACC_ReductionOperatorLogOr - ]> { +def OpenACC_ReductionOperator + : I32EnumAttr< + "ReductionOperator", + // Built-in reduction operations supported by OpenACC + // according OpenACC 3.3: + // + // |-------------------|----------------------| + // | Language operator | | + // |-------------------| ReductionOperator | + // | C/C++ | Fortran | | + // |------------------------------------------| + // | + | + | add | + // | * | * | mul | + // | max | max | max/maximumf/maxnumf | + // | min | min | min/minimumf/minnumf | + // | & | iand | iand | + // | | | ior | ior | + // | ^ | ieor | xor | + // | && | .and. | land | + // | || | .or. | lor | + // | | .eqv. | eqv | + // | | .neqv. | neqv | + // |------------------------------------------| + // + // The different max/min ReductionOperator's have different behavior + // when the arguments may be NaNs or signed zeros: + // * max/min - in general, produces inconsistent results + // in parallel execution, because it is not commutative. + // The max/min returns the second argument, + // when one of the arguments is NaN, + // or both arguments are zeros regardless of the sign. + // * maximumf/minimumf - safe to parallelize, corresponds to + // maximum/minimum defined in IEEE-754-2019. + // * maxnumf/minnumf - safe to parallelize, corresponds to + // maxNum/minNum defined in IEEE-754-2008. + "built-in reduction operations supported by OpenACC", + [OpenACC_ReductionOperatorNone, OpenACC_ReductionOperatorAdd, + OpenACC_ReductionOperatorMul, OpenACC_ReductionOperatorMax, + OpenACC_ReductionOperatorMin, OpenACC_ReductionOperatorAnd, + OpenACC_ReductionOperatorOr, OpenACC_ReductionOperatorXor, + OpenACC_ReductionOperatorLogEqv, OpenACC_ReductionOperatorLogNeqv, + OpenACC_ReductionOperatorLogAnd, OpenACC_ReductionOperatorLogOr, + OpenACC_ReductionOperatorMaximum, OpenACC_ReductionOperatorMinimum, + OpenACC_ReductionOperatorMaxnum, OpenACC_ReductionOperatorMinnum]> { let genSpecializedAttr = 0; let cppNamespace = "::mlir::acc"; } @@ -1651,6 +1702,9 @@ def OpenACC_ReductionRecipeOp (`destroy` $destroyRegion^)? }]; + // TODO: we need to verify that reduction operators + // maxnumf, maximumf, minnumf anf minimumf are only applied + // to FloatType element types. let hasRegionVerifier = 1; } diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 069c4ee104f1..2fb73e400001 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -2522,3 +2522,49 @@ func.func @test_getdeviceptr_opaque_ptr(%a: !llvm.ptr) -> () { // CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[A]] : !llvm.ptr) -> !llvm.ptr // CHECK: acc.declare_enter dataOperands(%[[DEVPTR]] : !llvm.ptr) + +// ----- + +acc.reduction.recipe @reduction_maximum_memref_f32 : memref reduction_operator init { +^bb0(%arg0: memref): + %alloca = memref.alloca() : memref + acc.yield %alloca : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + acc.yield %arg0 : memref +} + +// CHECK-LABEL: acc.reduction.recipe @reduction_maximum_memref_f32 : memref reduction_operator + +acc.reduction.recipe @reduction_maxnum_memref_f32 : memref reduction_operator init { +^bb0(%arg0: memref): + %alloca = memref.alloca() : memref + acc.yield %alloca : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + acc.yield %arg0 : memref +} + +// CHECK-LABEL: acc.reduction.recipe @reduction_maxnum_memref_f32 : memref reduction_operator + +acc.reduction.recipe @reduction_minimum_memref_f32 : memref reduction_operator init { +^bb0(%arg0: memref): + %alloca = memref.alloca() : memref + acc.yield %alloca : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + acc.yield %arg0 : memref +} + +// CHECK-LABEL: acc.reduction.recipe @reduction_minimum_memref_f32 : memref reduction_operator + +acc.reduction.recipe @reduction_minnum_memref_f32 : memref reduction_operator init { +^bb0(%arg0: memref): + %alloca = memref.alloca() : memref + acc.yield %alloca : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + acc.yield %arg0 : memref +} + +// CHECK-LABEL: acc.reduction.recipe @reduction_minnum_memref_f32 : memref reduction_operator