[flang,acc] Support -ffp-maxmin-behavior option in lowering. (#184730)

This patch adds `flang -fc1` option `-ffp-maxmin-behavior` and propagates it throughout Flang, so that semantics context, lowering and the pass pipeline builder can use it. MAX/MIN intrinsic and OpenACC max/min reduction lowering are now controlled by the option. I kept the `Legacy` mode, which is the default and matches the current behavior. I am going to test and merge a follow-up patch that replaces `Legacy` with `Portable`. RFC: https://discourse.llvm.org/t/flang-canonical-and-optimizable-representation-for-min-max/90037
2026-03-06 12:21:58 -08:00 · 2026-03-06 12:21:58 -08:00 · 5230955af7
commit 5230955af7
parent a8783dc6ba
30 changed files with 605 additions and 151 deletions
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@ -7651,6 +7651,17 @@ def J : JoinedOrSeparate<["-"], "J">,

 let Visibility = [FC1Option] in {

+def ffp_maxmin_behavior_EQ
+    : Joined<["-"], "ffp-maxmin-behavior=">,
+      Flags<[HelpHidden]>,
+      Group<f_Group>,
+      Values<"legacy,portable,extremum,extremenum">,
+      HelpText<"Control max/min and [max|min][loc|val] behavior: "
+               "legacy (cmp+select), portable (same as legacy, "
+               " but may use max/minNum when -fno-signed-zeros "
+               "-fno-honor-nans), extremum (IEEE-754-2019 maximum/minimum), "
+               "extremenum (IEEE-754-2008 max/minNum)">;
+
 def fget_definition : MultiArg<["-"], "fget-definition", 3>,
  HelpText<"Get the symbol definition from <line> <start-column> <end-column>">,
  Group<Action_Group>;
--- a/flang/include/flang/Evaluate/common.h
+++ b/flang/include/flang/Evaluate/common.h
@ -17,6 +17,7 @@
 #include "flang/Common/target-rounding.h"
 #include "flang/Parser/char-block.h"
 #include "flang/Parser/message.h"
+#include "flang/Support/FPMaxminBehavior.h"
 #include "flang/Support/Fortran-features.h"
 #include "flang/Support/Fortran.h"
 #include "flang/Support/default-kinds.h"
@ -218,15 +219,21 @@ public:
  FoldingContext(const common::IntrinsicTypeDefaultKinds &d,
      const IntrinsicProcTable &t, const TargetCharacteristics &c,
      const common::LanguageFeatureControl &lfc,
-      std::set<std::string> &tempNames)
+      std::set<std::string> &tempNames,
+      common::FPMaxminBehavior fpMaxminBehavior =
+          common::FPMaxminBehavior::Legacy)
      : defaults_{d}, intrinsics_{t}, targetCharacteristics_{c},
-        languageFeatures_{lfc}, tempNames_{tempNames} {}
+        languageFeatures_{lfc}, tempNames_{tempNames},
+        fpMaxminBehavior_{fpMaxminBehavior} {}
  FoldingContext(const parser::ContextualMessages &m,
      const common::IntrinsicTypeDefaultKinds &d, const IntrinsicProcTable &t,
      const TargetCharacteristics &c, const common::LanguageFeatureControl &lfc,
-      std::set<std::string> &tempNames)
+      std::set<std::string> &tempNames,
+      common::FPMaxminBehavior fpMaxminBehavior =
+          common::FPMaxminBehavior::Legacy)
      : messages_{m}, defaults_{d}, intrinsics_{t}, targetCharacteristics_{c},
-        languageFeatures_{lfc}, tempNames_{tempNames} {}
+        languageFeatures_{lfc}, tempNames_{tempNames},
+        fpMaxminBehavior_{fpMaxminBehavior} {}
  FoldingContext(const FoldingContext &that)
      : messages_{that.messages_}, defaults_{that.defaults_},
        intrinsics_{that.intrinsics_},
@ -235,8 +242,8 @@ public:
        analyzingPDTComponentKindSelector_{
            that.analyzingPDTComponentKindSelector_},
        impliedDos_{that.impliedDos_},
-        languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_} {
-  }
+        languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_},
+        fpMaxminBehavior_{that.fpMaxminBehavior_} {}
  FoldingContext(
      const FoldingContext &that, const parser::ContextualMessages &m)
      : messages_{m}, defaults_{that.defaults_}, intrinsics_{that.intrinsics_},
@ -245,8 +252,8 @@ public:
        analyzingPDTComponentKindSelector_{
            that.analyzingPDTComponentKindSelector_},
        impliedDos_{that.impliedDos_},
-        languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_} {
-  }
+        languageFeatures_{that.languageFeatures_}, tempNames_{that.tempNames_},
+        fpMaxminBehavior_{that.fpMaxminBehavior_} {}

  parser::ContextualMessages &messages() { return messages_; }
  const parser::ContextualMessages &messages() const { return messages_; }
@ -264,6 +271,9 @@ public:
  const common::LanguageFeatureControl &languageFeatures() const {
    return languageFeatures_;
  }
+  common::FPMaxminBehavior fpMaxminBehavior() const {
+    return fpMaxminBehavior_;
+  }
  template <typename... A>
  parser::Message *Warn(common::LanguageFeature feature, A &&...args) {
    return messages_.Warn(
@ -325,6 +335,7 @@ private:
  const common::LanguageFeatureControl &languageFeatures_;
  std::set<std::string> &tempNames_;
  std::string realFlagWarningContext_;
+  common::FPMaxminBehavior fpMaxminBehavior_{common::FPMaxminBehavior::Legacy};
 };

 } // namespace Fortran::evaluate
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@ -52,6 +52,7 @@ CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
 CODEGENOPT(DwarfVersion, 3, 0) ///< Dwarf version

 CODEGENOPT(Underscoring, 1, 1)
+ENUM_CODEGENOPT(FPMaxminBehavior, Fortran::common::FPMaxminBehavior, 2, Fortran::common::FPMaxminBehavior::Legacy)
 ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.
 ENUM_CODEGENOPT(DebugInfo,  llvm::codegenoptions::DebugInfoKind, 4,  llvm::codegenoptions::NoDebugInfo) ///< Level of debug info to generate
 ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 4, llvm::driver::VectorLibrary::NoLibrary) ///< Vector functions library to use
--- a/flang/include/flang/Frontend/CodeGenOptions.h
+++ b/flang/include/flang/Frontend/CodeGenOptions.h
@ -16,6 +16,7 @@
 #define FORTRAN_FRONTEND_CODEGENOPTIONS_H

 #include "flang/Optimizer/OpenMP/Utils.h"
+#include "flang/Support/FPMaxminBehavior.h"
 #include "llvm/Frontend/Debug/Options.h"
 #include "llvm/Frontend/Driver/CodeGenOptions.h"
 #include "llvm/Support/CodeGen.h"
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@ -88,5 +88,9 @@ ENUM_LOWERINGOPT(RegisterMLIRDiagnosticsHandler, unsigned, 1, 1)
 /// LineTablesOnly. Off by default.
 ENUM_LOWERINGOPT(PreserveUseDebugInfo, unsigned, 1, 0)

+/// FP max/min behavior for max/min intrinsics and [max|min][loc|val] (Legacy,
+/// Portable, Extremum, ExtremeNum). Default: Legacy.
+ENUM_LOWERINGOPT(FPMaxminBehavior, Fortran::common::FPMaxminBehavior, 2, 0)
+
 #undef LOWERINGOPT
 #undef ENUM_LOWERINGOPT
--- a/flang/include/flang/Lower/LoweringOptions.h
+++ b/flang/include/flang/Lower/LoweringOptions.h
@ -15,6 +15,7 @@
 #ifndef FLANG_LOWER_LOWERINGOPTIONS_H
 #define FLANG_LOWER_LOWERINGOPTIONS_H

+#include "flang/Support/FPMaxminBehavior.h"
 #include "flang/Support/MathOptionsBase.h"

 namespace Fortran::lower {
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@ -21,6 +21,7 @@
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Support/FPMaxminBehavior.h"
 #include "flang/Support/MathOptionsBase.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
@ -633,6 +634,14 @@ public:
    return complexDivisionToRuntimeFlag;
  }

+  /// Setter/getter for fpMaxminBehavior.
+  void setFPMaxminBehavior(Fortran::common::FPMaxminBehavior mode) {
+    fpMaxminBehavior = mode;
+  }
+  Fortran::common::FPMaxminBehavior getFPMaxminBehavior() const {
+    return fpMaxminBehavior;
+  }
+
  /// Dump the current function. (debug)
  LLVM_DUMP_METHOD void dumpFunc();

@ -693,6 +702,14 @@ private:
  /// mlir::arith::FastMathAttr.
  mlir::arith::FastMathFlags fastMathFlags{};

+  /// Controls how max/min idioms should be implemented.
+  /// Right now, it is only used to propagate FPMaxminBehavior
+  /// to the IntrinsicCall lowering. In general, it can be used
+  /// for generating max/min idioms through FirBuilder anywhere
+  /// in the pipeline.
+  Fortran::common::FPMaxminBehavior fpMaxminBehavior{
+      Fortran::common::FPMaxminBehavior::Legacy};
+
  /// IntegerOverflowFlags that need to be set for operations that support
  /// mlir::arith::IntegerOverflowFlagsAttr.
  mlir::arith::IntegerOverflowFlags integerOverflowFlags{};
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@ -56,49 +56,6 @@ genIntrinsicCall(fir::FirOpBuilder &, mlir::Location,
                 llvm::ArrayRef<fir::ExtendedValue> args,
                 Fortran::lower::AbstractConverter *converter = nullptr);

-/// Enums used to templatize and share lowering of MIN and MAX.
-enum class Extremum { Min, Max };
-
-// There are different ways to deal with NaNs in MIN and MAX.
-// Known existing behaviors are listed below and can be selected for
-// f18 MIN/MAX implementation.
-enum class ExtremumBehavior {
-  // Note: the Signaling/quiet aspect of NaNs in the behaviors below are
-  // not described because there is no way to control/observe such aspect in
-  // MLIR/LLVM yet. The IEEE behaviors come with requirements regarding this
-  // aspect that are therefore currently not enforced. In the descriptions
-  // below, NaNs can be signaling or quite. Returned NaNs may be signaling
-  // if one of the input NaN was signaling but it cannot be guaranteed either.
-  // Existing compilers using an IEEE behavior (gfortran) also do not fulfill
-  // signaling/quiet requirements.
-  IeeeMinMaximumNumber,
-  // IEEE minimumNumber/maximumNumber behavior (754-2019, section 9.6):
-  // If one of the argument is and number and the other is NaN, return the
-  // number. If both arguements are NaN, return NaN.
-  // Compilers: gfortran.
-  IeeeMinMaximum,
-  // IEEE minimum/maximum behavior (754-2019, section 9.6):
-  // If one of the argument is NaN, return NaN.
-  MinMaxss,
-  // x86 minss/maxss behavior:
-  // If the second argument is a number and the other is NaN, return the number.
-  // In all other cases where at least one operand is NaN, return NaN.
-  // Compilers: xlf (only for MAX), ifort, pgfortran -nollvm, and nagfor.
-  PgfortranLlvm,
-  // "Opposite of" x86 minss/maxss behavior:
-  // If the first argument is a number and the other is NaN, return the
-  // number.
-  // In all other cases where at least one operand is NaN, return NaN.
-  // Compilers: xlf (only for MIN), and pgfortran (with llvm).
-  IeeeMinMaxNum
-  // IEEE minNum/maxNum behavior (754-2008, section 5.3.1):
-  // TODO: Not implemented.
-  // It is the only behavior where the signaling/quiet aspect of a NaN argument
-  // impacts if the result should be NaN or the argument that is a number.
-  // LLVM/MLIR do not provide ways to observe this aspect, so it is not
-  // possible to implement it without some target dependent runtime.
-};
-
 /// Enum specifying how intrinsic argument evaluate::Expr should be
 /// lowered to fir::ExtendedValue to be passed to genIntrinsicCall.
 enum class LowerIntrinsicArgAs {
@ -260,7 +217,7 @@ struct IntrinsicLibrary {
  mlir::Value genExponent(mlir::Type, llvm::ArrayRef<mlir::Value>);
  fir::ExtendedValue genExtendsTypeOf(mlir::Type,
                                      llvm::ArrayRef<fir::ExtendedValue>);
-  template <Extremum, ExtremumBehavior>
+  template <bool isMax>
  mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>);
  fir::ExtendedValue genFCString(mlir::Type,
                                 llvm::ArrayRef<fir::ExtendedValue>);
--- a/flang/include/flang/Optimizer/Passes/Pipelines.h
+++ b/flang/include/flang/Optimizer/Passes/Pipelines.h
@ -132,11 +132,11 @@ enum class EnableOpenMP { None, Simd, Full };
 /// Create a pass pipeline for lowering from HLFIR to FIR
 ///
 /// \param pm - MLIR pass manager that will hold the pipeline definition
-/// \param optLevel - optimization level used for creating FIR optimization
-///   passes pipeline
-void createHLFIRToFIRPassPipeline(
-    mlir::PassManager &pm, EnableOpenMP enableOpenMP,
-    llvm::OptimizationLevel optLevel = defaultOptLevel);
+/// \param enableOpenMP - whether OpenMP lowering is enabled
+/// \param config - pipeline config (OptLevel, fpMaxminBehavior, etc.)
+void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
+                                  EnableOpenMP enableOpenMP,
+                                  const MLIRToLLVMPassPipelineConfig &config);

 struct OpenMPFIRPassPipelineOpts {
  /// Whether code is being generated for a target device rather than the host
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@ -68,7 +68,8 @@ class SemanticsContext {
 public:
  SemanticsContext(const common::IntrinsicTypeDefaultKinds &,
      const common::LanguageFeatureControl &, const common::LangOptions &,
-      parser::AllCookedSources &);
+      parser::AllCookedSources &,
+      common::FPMaxminBehavior = common::FPMaxminBehavior::Legacy);
  ~SemanticsContext();

  const common::IntrinsicTypeDefaultKinds &defaultKinds() const {
--- a/flang/include/flang/Support/FPMaxminBehavior.h
+++ b/flang/include/flang/Support/FPMaxminBehavior.h
@ -0,0 +1,42 @@
+//===- Support/FPMaxminBehavior.h - FP max/min behavior option --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Shared definition of FP max/min behavior for max/min and [max|min][loc|val].
+/// Used by CodeGenOptions, LoweringOptions, and other components.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_SUPPORT_FPMAXMINBEHAVIOR_H_
+#define FORTRAN_SUPPORT_FPMAXMINBEHAVIOR_H_
+
+#include "llvm/ADT/StringRef.h"
+
+namespace Fortran::common {
+
+/// Control for max/min and [max|min][loc|val] lowering, constant folding, and
+/// related behavior. Legacy: current Flang behavior (always cmp+select).
+/// Portable: same as Legacy but may use arith.maxnumf under
+/// '-fno-signed-zeros -fno-honor-nans'.
+/// Extremum: arith.maximumf/minimumf
+/// ExtremeNum: arith.maxnumf/minnumf.
+/// Legacy is transitional and will eventually be replaced by Portable.
+enum class FPMaxminBehavior : unsigned {
+  Legacy,
+  Portable,
+  Extremum,
+  ExtremeNum,
+};
+
+/// Parse -ffp-maxmin-behavior= value. Triggers llvm_unreachable
+/// for unknown strings.
+FPMaxminBehavior parseFPMaxminBehavior(llvm::StringRef value);
+
+} // namespace Fortran::common
+
+#endif // FORTRAN_SUPPORT_FPMAXMINBEHAVIOR_H_
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@ -14,6 +14,7 @@
 #define FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H

 #include "flang/Frontend/CodeGenOptions.h"
+#include "flang/Support/FPMaxminBehavior.h"
 #include "flang/Support/LangOptions.h"
 #include "flang/Support/MathOptionsBase.h"
 #include <cstdint>
@ -150,6 +151,8 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
  int32_t DwarfVersion = 0; ///< Version of DWARF debug info to generate
  std::string SplitDwarfFile = ""; ///< File name for the split debug info
  std::string DwarfDebugFlags = ""; ///< Debug flags to append to DWARF producer
+  Fortran::common::FPMaxminBehavior fpMaxminBehavior =
+      Fortran::common::FPMaxminBehavior::Legacy;
 };

 struct OffloadModuleOpts {
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@ -1661,6 +1661,12 @@ bool CompilerInvocation::createFromArgs(
    invoc.loweringOpts.setRepackArraysWhole(arg->getValue() ==
                                            llvm::StringRef{"whole"});

+  if (auto *arg = args.getLastArg(clang::options::OPT_ffp_maxmin_behavior_EQ)) {
+    auto value = Fortran::common::parseFPMaxminBehavior(arg->getValue());
+    invoc.getCodeGenOpts().setFPMaxminBehavior(value);
+    invoc.loweringOpts.setFPMaxminBehavior(value);
+  }
+
  success &= parseFrontendArgs(invoc.getFrontendOpts(), args, diags);
  parseTargetArgs(invoc.getTargetOpts(), args);
  parsePreprocessorArgs(invoc.getPreprocessorOpts(), args);
@ -1899,7 +1905,7 @@ CompilerInvocation::getSemanticsCtx(

  auto semanticsContext = std::make_unique<semantics::SemanticsContext>(
      getDefaultKinds(), fortranOptions.features, getLangOpts(),
-      allCookedSources);
+      allCookedSources, getCodeGenOpts().getFPMaxminBehavior());

  semanticsContext->set_moduleDirectory(getModuleDir())
      .set_searchDirectories(fortranOptions.searchDirectories)
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@ -635,8 +635,11 @@ void CodeGenAction::lowerHLFIRToFIR() {
    enableOpenMP = fir::EnableOpenMP::Full;
  if (ci.getInvocation().getLangOpts().OpenMPSimd)
    enableOpenMP = fir::EnableOpenMP::Simd;
+  MLIRToLLVMPassPipelineConfig config(level);
+  config.fpMaxminBehavior =
+      ci.getInvocation().getLoweringOpts().getFPMaxminBehavior();
  // Create the pass pipeline
-  fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, level);
+  fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, config);
  (void)mlir::applyPassManagerCLOptions(pm);

  mlir::TimingScope timingScopeMLIRPasses = timingScopeRoot.nest(
@ -748,6 +751,7 @@ void CodeGenAction::generateLLVMIR() {
  pm.enableVerifier(/*verifyPasses=*/true);

  MLIRToLLVMPassPipelineConfig config(level, opts, mathOpts);
+  config.fpMaxminBehavior = invoc.getLoweringOpts().getFPMaxminBehavior();
  llvm::Triple pipelineTriple(invoc.getTargetOpts().triple);
  config.SkipConvertComplexPow = pipelineTriple.isAMDGCN();
  fir::registerDefaultInlinerPass(config);
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@ -6454,6 +6454,8 @@ private:
    builder->setComplexDivisionToRuntimeFlag(
        bridge.getLoweringOptions().getComplexDivisionToRuntime());
    builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions());
+    builder->setFPMaxminBehavior(
+        bridge.getLoweringOptions().getFPMaxminBehavior());
    builder->setInsertionPointToStart(&func.front());
    if (funit.parent.isA<Fortran::lower::pft::FunctionLikeUnit>()) {
      // Give internal linkage to internal functions. There are no name clash
@ -6737,6 +6739,8 @@ private:
    builder = new fir::FirOpBuilder(func, bridge.getKindMap(), symbolTable);
    assert(builder && "FirOpBuilder did not instantiate");
    builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions());
+    builder->setFPMaxminBehavior(
+        bridge.getLoweringOptions().getFPMaxminBehavior());
    createGlobals();
    if (mlir::Region *region = func.getCallableRegion())
      region->dropAllReferences();
--- a/flang/lib/Lower/LoweringOptions.cpp
+++ b/flang/lib/Lower/LoweringOptions.cpp
@ -16,7 +16,8 @@ namespace Fortran::lower {

 LoweringOptions::LoweringOptions() : MathOptions{} {
 #define LOWERINGOPT(Name, Bits, Default) Name = Default;
-#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) set##Name(Default);
+#define ENUM_LOWERINGOPT(Name, Type, Bits, Default)                            \
+  set##Name(static_cast<Type>(Default));
 #include "flang/Lower/LoweringOptions.def"
 }

--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@ -1061,15 +1061,31 @@ genDataExitOperations(fir::FirOpBuilder &builder,
 /// Return the corresponding enum value for the mlir::acc::ReductionOperator
 /// from the parser representation.
 static mlir::acc::ReductionOperator
-getReductionOperator(const Fortran::parser::ReductionOperator &op) {
+getReductionOperator(const Fortran::parser::ReductionOperator &op,
+                     mlir::Type reductionTy,
+                     const Fortran::lower::AbstractConverter &converter) {
+  Fortran::common::FPMaxminBehavior maxminMode =
+      converter.getLoweringOptions().getFPMaxminBehavior();
  switch (op.v) {
  case Fortran::parser::ReductionOperator::Operator::Plus:
    return mlir::acc::ReductionOperator::AccAdd;
  case Fortran::parser::ReductionOperator::Operator::Multiply:
    return mlir::acc::ReductionOperator::AccMul;
  case Fortran::parser::ReductionOperator::Operator::Max:
+    if (fir::isa_real(reductionTy)) {
+      if (maxminMode == Fortran::common::FPMaxminBehavior::Extremum)
+        return mlir::acc::ReductionOperator::AccMaximumf;
+      else if (maxminMode == Fortran::common::FPMaxminBehavior::ExtremeNum)
+        return mlir::acc::ReductionOperator::AccMaxnumf;
+    }
    return mlir::acc::ReductionOperator::AccMax;
  case Fortran::parser::ReductionOperator::Operator::Min:
+    if (fir::isa_real(reductionTy)) {
+      if (maxminMode == Fortran::common::FPMaxminBehavior::Extremum)
+        return mlir::acc::ReductionOperator::AccMinimumf;
+      else if (maxminMode == Fortran::common::FPMaxminBehavior::ExtremeNum)
+        return mlir::acc::ReductionOperator::AccMinnumf;
+    }
    return mlir::acc::ReductionOperator::AccMin;
  case Fortran::parser::ReductionOperator::Operator::Iand:
    return mlir::acc::ReductionOperator::AccIand;
@ -1115,7 +1131,6 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  const auto &objects = std::get<Fortran::parser::AccObjectList>(objectList.t);
  const auto &op = std::get<Fortran::parser::ReductionOperator>(objectList.t);
-  mlir::acc::ReductionOperator mlirOp = getReductionOperator(op);
  Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
  for (const auto &accObject : objects.v) {
    llvm::SmallVector<mlir::Value> bounds;
@ -1144,6 +1159,9 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
    if (!isSupportedReductionType(reductionTy))
      TODO(operandLocation, "reduction with unsupported type");

+    mlir::acc::ReductionOperator mlirOp =
+        getReductionOperator(op, reductionTy, converter);
+
    if (designator) {
      Fortran::semantics::SomeExpr someExpr = *designator;
      if (Fortran::lower::detail::getRef<Fortran::evaluate::Component>(
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@ -550,7 +550,7 @@ static constexpr IntrinsicHandler handlers[]{
     &I::genMatmulTranspose,
     {{{"matrix_a", asAddr}, {"matrix_b", asAddr}}},
     /*isElemental=*/false},
-    {"max", &I::genExtremum<Extremum::Max, ExtremumBehavior::MinMaxss>},
+    {"max", &I::genExtremum</*isMax=*/true>},
    {"maxloc",
     &I::genMaxloc,
     {{{"array", asBox},
@ -567,7 +567,7 @@ static constexpr IntrinsicHandler handlers[]{
     /*isElemental=*/false},
    {"merge", &I::genMerge},
    {"merge_bits", &I::genMergeBits},
-    {"min", &I::genExtremum<Extremum::Min, ExtremumBehavior::MinMaxss>},
+    {"min", &I::genExtremum</*isMax=*/false>},
    {"minloc",
     &I::genMinloc,
     {{{"array", asBox},
@ -8729,75 +8729,77 @@ IntrinsicLibrary::genTrim(mlir::Type resultType,
 }

 // Compare two FIR values and return boolean result as i1.
-template <Extremum extremum, ExtremumBehavior behavior>
-static mlir::Value createExtremumCompare(mlir::Location loc,
-                                         fir::FirOpBuilder &builder,
-                                         mlir::Value left, mlir::Value right) {
+template <bool isMax>
+static mlir::Value genExtremumResult(mlir::Location loc,
+                                     fir::FirOpBuilder &builder,
+                                     mlir::Value left, mlir::Value right) {
  mlir::Type type = left.getType();
  mlir::arith::CmpIPredicate integerPredicate =
-      type.isUnsignedInteger()    ? extremum == Extremum::Max
-                                        ? mlir::arith::CmpIPredicate::ugt
-                                        : mlir::arith::CmpIPredicate::ult
-      : extremum == Extremum::Max ? mlir::arith::CmpIPredicate::sgt
-                                  : mlir::arith::CmpIPredicate::slt;
-  static constexpr mlir::arith::CmpFPredicate orderedCmp =
-      extremum == Extremum::Max ? mlir::arith::CmpFPredicate::OGT
-                                : mlir::arith::CmpFPredicate::OLT;
-  mlir::Value result;
+      type.isUnsignedInteger() ? isMax ? mlir::arith::CmpIPredicate::ugt
+                                       : mlir::arith::CmpIPredicate::ult
+      : isMax                  ? mlir::arith::CmpIPredicate::sgt
+                               : mlir::arith::CmpIPredicate::slt;
+  mlir::Value pred;
  if (fir::isa_real(type)) {
-    // Note: the signaling/quit aspect of the result required by IEEE
-    // cannot currently be obtained with LLVM without ad-hoc runtime.
-    if constexpr (behavior == ExtremumBehavior::IeeeMinMaximumNumber) {
-      // Return the number if one of the inputs is NaN and the other is
-      // a number.
-      auto leftIsResult =
-          mlir::arith::CmpFOp::create(builder, loc, orderedCmp, left, right);
-      auto rightIsNan = mlir::arith::CmpFOp::create(
-          builder, loc, mlir::arith::CmpFPredicate::UNE, right, right);
-      result =
-          mlir::arith::OrIOp::create(builder, loc, leftIsResult, rightIsNan);
-    } else if constexpr (behavior == ExtremumBehavior::IeeeMinMaximum) {
-      // Always return NaNs if one the input is NaNs
-      auto leftIsResult =
-          mlir::arith::CmpFOp::create(builder, loc, orderedCmp, left, right);
-      auto leftIsNan = mlir::arith::CmpFOp::create(
-          builder, loc, mlir::arith::CmpFPredicate::UNE, left, left);
-      result =
-          mlir::arith::OrIOp::create(builder, loc, leftIsResult, leftIsNan);
-    } else if constexpr (behavior == ExtremumBehavior::MinMaxss) {
-      // If the left is a NaN, return the right whatever it is.
-      result =
-          mlir::arith::CmpFOp::create(builder, loc, orderedCmp, left, right);
-    } else if constexpr (behavior == ExtremumBehavior::PgfortranLlvm) {
-      // If one of the operand is a NaN, return left whatever it is.
-      static constexpr auto unorderedCmp =
-          extremum == Extremum::Max ? mlir::arith::CmpFPredicate::UGT
-                                    : mlir::arith::CmpFPredicate::ULT;
-      result =
-          mlir::arith::CmpFOp::create(builder, loc, unorderedCmp, left, right);
-    } else {
-      // TODO: ieeeMinNum/ieeeMaxNum
-      static_assert(behavior == ExtremumBehavior::IeeeMinMaxNum,
-                    "ieeeMinNum/ieeeMaxNum behavior not implemented");
+    switch (builder.getFPMaxminBehavior()) {
+    case Fortran::common::FPMaxminBehavior::Portable:
+      // If the left is NaN, return the right whatever it is.
+      // Signed zeros are equal, so max/min(zero, zero) always
+      // returns the second 'zero'.
+      if (mlir::arith::bitEnumContainsAll(
+              builder.getFastMathFlags(),
+              mlir::arith::FastMathFlags::nnan |
+                  mlir::arith::FastMathFlags::nsz)) {
+        // If there are no NaNs and signed zeros, we can use a shorter
+        // arith.max/minnumf representation.
+        if constexpr (isMax)
+          return mlir::arith::MaxNumFOp::create(builder, loc, left, right);
+        else
+          return mlir::arith::MinNumFOp::create(builder, loc, left, right);
+      }
+      [[fallthrough]];
+    case Fortran::common::FPMaxminBehavior::Legacy: {
+      static constexpr mlir::arith::CmpFPredicate pred =
+          isMax ? mlir::arith::CmpFPredicate::OGT
+                : mlir::arith::CmpFPredicate::OLT;
+      mlir::Value cmp =
+          mlir::arith::CmpFOp::create(builder, loc, pred, left, right);
+      return mlir::arith::SelectOp::create(builder, loc, cmp, left, right);
    }
+    case Fortran::common::FPMaxminBehavior::Extremum:
+      if constexpr (isMax)
+        return mlir::arith::MaximumFOp::create(builder, loc, left, right);
+      else
+        return mlir::arith::MinimumFOp::create(builder, loc, left, right);
+    case Fortran::common::FPMaxminBehavior::ExtremeNum:
+      if constexpr (isMax)
+        return mlir::arith::MaxNumFOp::create(builder, loc, left, right);
+      else
+        return mlir::arith::MinNumFOp::create(builder, loc, left, right);
+    }
+
+    llvm_unreachable("unsupported FPMaxminBehavior");
  } else if (fir::isa_integer(type)) {
+    mlir::Value cmpLeft = left;
+    mlir::Value cmpRight = right;
    if (type.isUnsignedInteger()) {
      mlir::Type signlessType = mlir::IntegerType::get(
          builder.getContext(), type.getIntOrFloatBitWidth(),
          mlir::IntegerType::SignednessSemantics::Signless);
-      left = builder.createConvert(loc, signlessType, left);
-      right = builder.createConvert(loc, signlessType, right);
+      cmpLeft = builder.createConvert(loc, signlessType, left);
+      cmpRight = builder.createConvert(loc, signlessType, right);
    }
-    result = mlir::arith::CmpIOp::create(builder, loc, integerPredicate, left,
-                                         right);
+    pred = mlir::arith::CmpIOp::create(builder, loc, integerPredicate, cmpLeft,
+                                       cmpRight);
  } else if (fir::isa_char(type) || fir::isa_char(fir::unwrapRefType(type))) {
    // TODO: ! character min and max is tricky because the result
    // length is the length of the longest argument!
    // So we may need a temp.
    TODO(loc, "intrinsic: min and max for CHARACTER");
  }
-  assert(result && "result must be defined");
-  return result;
+  assert(pred && "pred must be defined");
+
+  return mlir::arith::SelectOp::create(builder, loc, pred, left, right);
 }

 // UNLINK
@ -9097,16 +9099,13 @@ IntrinsicLibrary::genMinval(mlir::Type resultType,
 }

 // MIN and MAX
-template <Extremum extremum, ExtremumBehavior behavior>
+template <bool isMax>
 mlir::Value IntrinsicLibrary::genExtremum(mlir::Type,
                                          llvm::ArrayRef<mlir::Value> args) {
  assert(args.size() >= 1);
  mlir::Value result = args[0];
-  for (auto arg : args.drop_front()) {
-    mlir::Value mask =
-        createExtremumCompare<extremum, behavior>(loc, builder, result, arg);
-    result = mlir::arith::SelectOp::create(builder, loc, mask, result, arg);
-  }
+  for (auto arg : args.drop_front())
+    result = genExtremumResult<isMax>(loc, builder, result, arg);
  return result;
 }

@ -9168,17 +9167,15 @@ genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Value genMax(fir::FirOpBuilder &builder, mlir::Location loc,
                   llvm::ArrayRef<mlir::Value> args) {
  assert(args.size() > 0 && "max requires at least one argument");
-  return IntrinsicLibrary{builder, loc}
-      .genExtremum<Extremum::Max, ExtremumBehavior::MinMaxss>(args[0].getType(),
-                                                              args);
+  return IntrinsicLibrary{builder, loc}.genExtremum</*isMax=*/true>(
+      args[0].getType(), args);
 }

 mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
                   llvm::ArrayRef<mlir::Value> args) {
  assert(args.size() > 0 && "min requires at least one argument");
-  return IntrinsicLibrary{builder, loc}
-      .genExtremum<Extremum::Min, ExtremumBehavior::MinMaxss>(args[0].getType(),
-                                                              args);
+  return IntrinsicLibrary{builder, loc}.genExtremum</*isMax=*/false>(
+      args[0].getType(), args);
 }

 mlir::Value genDivC(fir::FirOpBuilder &builder, mlir::Location loc,
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@ -1025,11 +1025,35 @@ static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder,
    TODO(loc, "reduction mul type");
  }

-  if (op == mlir::acc::ReductionOperator::AccMin)
-    return fir::genMin(builder, loc, {value1, value2});
+  if (op == mlir::acc::ReductionOperator::AccMin ||
+      op == mlir::acc::ReductionOperator::AccMinimumf ||
+      op == mlir::acc::ReductionOperator::AccMinnumf) {
+    Fortran::common::FPMaxminBehavior savedMode = builder.getFPMaxminBehavior();
+    if (op == mlir::acc::ReductionOperator::AccMinimumf)
+      builder.setFPMaxminBehavior(Fortran::common::FPMaxminBehavior::Extremum);
+    else if (op == mlir::acc::ReductionOperator::AccMinnumf)
+      builder.setFPMaxminBehavior(
+          Fortran::common::FPMaxminBehavior::ExtremeNum);

-  if (op == mlir::acc::ReductionOperator::AccMax)
-    return fir::genMax(builder, loc, {value1, value2});
+    mlir::Value result = fir::genMin(builder, loc, {value1, value2});
+    builder.setFPMaxminBehavior(savedMode);
+    return result;
+  }
+
+  if (op == mlir::acc::ReductionOperator::AccMax ||
+      op == mlir::acc::ReductionOperator::AccMaximumf ||
+      op == mlir::acc::ReductionOperator::AccMaxnumf) {
+    Fortran::common::FPMaxminBehavior savedMode = builder.getFPMaxminBehavior();
+    if (op == mlir::acc::ReductionOperator::AccMaximumf)
+      builder.setFPMaxminBehavior(Fortran::common::FPMaxminBehavior::Extremum);
+    else if (op == mlir::acc::ReductionOperator::AccMaxnumf)
+      builder.setFPMaxminBehavior(
+          Fortran::common::FPMaxminBehavior::ExtremeNum);
+
+    mlir::Value result = fir::genMax(builder, loc, {value1, value2});
+    builder.setFPMaxminBehavior(savedMode);
+    return result;
+  }

  if (op == mlir::acc::ReductionOperator::AccIand)
    return mlir::arith::AndIOp::create(builder, loc, value1, value2);
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@ -255,11 +255,12 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 /// Create a pass pipeline for lowering from HLFIR to FIR
 ///
 /// \param pm - MLIR pass manager that will hold the pipeline definition
-/// \param optLevel - optimization level used for creating FIR optimization
-///   passes pipeline
+/// \param enableOpenMP - whether OpenMP lowering is enabled
+/// \param config - pipeline config (OptLevel, etc.)
 void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
                                  EnableOpenMP enableOpenMP,
-                                  llvm::OptimizationLevel optLevel) {
+                                  const MLIRToLLVMPassPipelineConfig &config) {
+  llvm::OptimizationLevel optLevel = config.OptLevel;
  if (optLevel.getSizeLevel() > 0 || optLevel.getSpeedupLevel() > 0) {
    addNestedPassToAllTopLevelOperations<PassConstructor>(
        pm, hlfir::createExpressionSimplification);
@ -442,7 +443,7 @@ void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
    enableOpenMP = fir::EnableOpenMP::Full;
  if (config.EnableOpenMPSimd)
    enableOpenMP = fir::EnableOpenMP::Simd;
-  fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, config.OptLevel);
+  fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, config);

  // Add default optimizer pass pipeline.
  fir::createDefaultFIROptimizerPassPipeline(pm, config);
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@ -381,14 +381,16 @@ SemanticsContext::SemanticsContext(
    const common::IntrinsicTypeDefaultKinds &defaultKinds,
    const common::LanguageFeatureControl &languageFeatures,
    const common::LangOptions &langOpts,
-    parser::AllCookedSources &allCookedSources)
+    parser::AllCookedSources &allCookedSources,
+    common::FPMaxminBehavior fpMaxminBehavior)
    : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures},
      langOpts_{langOpts}, allCookedSources_{allCookedSources},
      intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)},
      globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope(
                               Scope::Kind::IntrinsicModules, nullptr)},
      foldingContext_{parser::ContextualMessages{&messages_}, defaultKinds_,
-          intrinsics_, targetCharacteristics_, languageFeatures_, tempNames_} {}
+          intrinsics_, targetCharacteristics_, languageFeatures_, tempNames_,
+          fpMaxminBehavior} {}

 SemanticsContext::~SemanticsContext() {}

--- a/flang/lib/Support/CMakeLists.txt
+++ b/flang/lib/Support/CMakeLists.txt
@ -44,6 +44,7 @@ endif()

 add_flang_library(FortranSupport
  default-kinds.cpp
+  FPMaxminBehavior.cpp
  Flags.cpp
  Fortran.cpp
  Fortran-features.cpp
--- a/flang/lib/Support/FPMaxminBehavior.cpp
+++ b/flang/lib/Support/FPMaxminBehavior.cpp
@ -0,0 +1,23 @@
+//===--- Support/FPMaxminBehavior.cpp - Parse FP max/min behavior ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Support/FPMaxminBehavior.h"
+#include "llvm/ADT/StringSwitch.h"
+
+namespace Fortran::common {
+
+FPMaxminBehavior parseFPMaxminBehavior(llvm::StringRef value) {
+  return llvm::StringSwitch<FPMaxminBehavior>(value)
+      .Case("legacy", FPMaxminBehavior::Legacy)
+      .Case("portable", FPMaxminBehavior::Portable)
+      .Case("extremum", FPMaxminBehavior::Extremum)
+      .Case("extremenum", FPMaxminBehavior::ExtremeNum)
+      .Default(FPMaxminBehavior::Legacy);
+}
+
+} // namespace Fortran::common
--- a/flang/test/Driver/ffp-maxmin-behavior.f90
+++ b/flang/test/Driver/ffp-maxmin-behavior.f90
@ -0,0 +1,19 @@
+! Test that -ffp-maxmin-behavior is accepted by flang -fc1 (all values and
+! unknown defaulting to legacy) and is not recognized by the flang driver.
+
+program p
+end program p
+
+! flang -fc1 accepts all valid values
+! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=legacy %s
+! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=portable %s
+! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=extremum %s
+! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=extremenum %s
+
+! flang -fc1 accepts unknown value (defaults to legacy, no error)
+! RUN: %flang_fc1 -fsyntax-only -ffp-maxmin-behavior=invalid %s
+
+! flang driver does not forward the option to -fc1 (fc1-only option)
+! RUN: not %flang -### -ffp-maxmin-behavior=legacy %s 2>&1 \
+! RUN:   | FileCheck %s -check-prefix=DRIVER-UNKNOWN-ARG
+! DRIVER-UNKNOWN-ARG: error: unknown argument '-ffp-maxmin-behavior=legacy'
--- a/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90
@ -0,0 +1,114 @@
+! This test checks lowering of OpenACC reduction clause.
+
+! RUN: %flang_fc1 -fopenacc -emit-hlfir -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY
+! RUN: %flang_fc1 -fopenacc -emit-hlfir -ffp-maxmin-behavior=legacy -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY
+! RUN: %flang_fc1 -fopenacc -emit-hlfir -ffp-maxmin-behavior=extremum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMUM
+! RUN: %flang_fc1 -fopenacc -emit-hlfir -ffp-maxmin-behavior=extremenum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMENUM
+
+! TODO: we should get rid of the legacy mode to make the generation of
+! arith.max/minnumf straightforward for portable mode + nsz + nnan:
+! RUN: %flang_fc1 -fopenacc -emit-hlfir -ffp-maxmin-behavior=portable -fno-signed-zeros -menable-no-nans -o - %s 2>&1 | FileCheck %s -check-prefix=PORTABLE-NANNSZ
+
+subroutine acc_scalar_reduction_max(a)
+  real :: a
+  !$acc parallel reduction(max:a)
+  !$acc end parallel
+end subroutine acc_scalar_reduction_max
+
+subroutine acc_array_reduction_max(a)
+  real :: a(10)
+  !$acc parallel reduction(max:a)
+  !$acc end parallel
+end subroutine acc_array_reduction_max
+
+subroutine acc_scalar_reduction_min(a)
+  real :: a
+  !$acc parallel reduction(min:a)
+  !$acc end parallel
+end subroutine acc_scalar_reduction_min
+
+subroutine acc_array_reduction_min(a)
+  real :: a(10)
+  !$acc parallel reduction(min:a)
+  !$acc end parallel
+end subroutine acc_array_reduction_min
+
+! LEGACY-LABEL:   acc.reduction.recipe @reduction_min_ref_10xf32 : !fir.ref<!fir.array<10xf32>> reduction_operator <min> init {
+! LEGACY:         } combiner {
+! LEGACY:           fir.do_loop
+! LEGACY:             %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath<contract> : f32
+! LEGACY:             %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
+
+! LEGACY-LABEL:   acc.reduction.recipe @reduction_min_ref_f32 : !fir.ref<f32> reduction_operator <min> init {
+! LEGACY:         } combiner {
+! LEGACY:           %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath<contract> : f32
+! LEGACY:           %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
+
+! LEGACY-LABEL:   acc.reduction.recipe @reduction_max_ref_10xf32 : !fir.ref<!fir.array<10xf32>> reduction_operator <max> init {
+! LEGACY:         } combiner {
+! LEGACY:           fir.do_loop
+! LEGACY:             %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath<contract> : f32
+! LEGACY:             %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
+
+! LEGACY-LABEL:   acc.reduction.recipe @reduction_max_ref_f32 : !fir.ref<f32> reduction_operator <max> init {
+! LEGACY:         } combiner {
+! LEGACY:           %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath<contract> : f32
+! LEGACY:           %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
+
+! EXTREMUM-LABEL:   acc.reduction.recipe @reduction_minimumf_ref_10xf32 : !fir.ref<!fir.array<10xf32>> reduction_operator <minimumf> init {
+! EXTREMUM:         } combiner {
+! EXTREMUM:           fir.do_loop
+! EXTREMUM:             %[[MINIMUMF_0:.*]] = arith.minimumf %{{.*}}, %{{.*}} fastmath<contract> : f32
+
+! EXTREMUM-LABEL:   acc.reduction.recipe @reduction_minimumf_ref_f32 : !fir.ref<f32> reduction_operator <minimumf> init {
+! EXTREMUM:         } combiner {
+! EXTREMUM:           %[[MINIMUMF_0:.*]] = arith.minimumf %{{.*}}, %{{.*}} fastmath<contract> : f32
+
+! EXTREMUM-LABEL:   acc.reduction.recipe @reduction_maximumf_ref_10xf32 : !fir.ref<!fir.array<10xf32>> reduction_operator <maximumf> init {
+! EXTREMUM:         } combiner {
+! EXTREMUM:           fir.do_loop
+! EXTREMUM:             %[[MAXIMUMF_0:.*]] = arith.maximumf %{{.*}}, %{{.*}} fastmath<contract> : f32
+
+! EXTREMUM-LABEL:   acc.reduction.recipe @reduction_maximumf_ref_f32 : !fir.ref<f32> reduction_operator <maximumf> init {
+! EXTREMUM-LABEL:   } combiner {
+! EXTREMUM:           %[[MAXIMUMF_0:.*]] = arith.maximumf %{{.*}}, %{{.*}} fastmath<contract> : f32
+
+! EXTREMENUM-LABEL:   acc.reduction.recipe @reduction_minnumf_ref_10xf32 : !fir.ref<!fir.array<10xf32>> reduction_operator <minnumf> init {
+! EXTREMENUM:         } combiner {
+! EXTREMENUM:           fir.do_loop
+! EXTREMENUM:             %[[MINNUMF_0:.*]] = arith.minnumf %{{.*}}, %{{.*}} fastmath<contract> : f32
+
+! EXTREMENUM-LABEL:   acc.reduction.recipe @reduction_minnumf_ref_f32 : !fir.ref<f32> reduction_operator <minnumf> init {
+! EXTREMENUM:         } combiner {
+! EXTREMENUM:           %[[MINNUMF_0:.*]] = arith.minnumf %{{.*}}, %{{.*}} fastmath<contract> : f32
+
+! EXTREMENUM-LABEL:   acc.reduction.recipe @reduction_maxnumf_ref_10xf32 : !fir.ref<!fir.array<10xf32>> reduction_operator <maxnumf> init {
+! EXTREMENUM:         } combiner {
+! EXTREMENUM:           fir.do_loop
+! EXTREMENUM:             %[[MAXNUMF_0:.*]] = arith.maxnumf %{{.*}}, %{{.*}} fastmath<contract> : f32
+
+! EXTREMENUM-LABEL:   acc.reduction.recipe @reduction_maxnumf_ref_f32 : !fir.ref<f32> reduction_operator <maxnumf> init {
+! EXTREMENUM-LABEL:   } combiner {
+! EXTREMENUM:           %[[MAXNUMF_0:.*]] = arith.maxnumf %{{.*}}, %{{.*}} fastmath<contract> : f32
+
+! PORTABLE-NANNSZ-LABEL:   acc.reduction.recipe @reduction_min_ref_10xf32 : !fir.ref<!fir.array<10xf32>> reduction_operator <min> init {
+! PORTABLE-NANNSZ:         } combiner {
+! PORTABLE-NANNSZ:           fir.do_loop
+! PORTABLE-NANNSZ:             %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath<nnan,nsz,contract> : f32
+! PORTABLE-NANNSZ:             %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
+
+! PORTABLE-NANNSZ-LABEL:   acc.reduction.recipe @reduction_min_ref_f32 : !fir.ref<f32> reduction_operator <min> init {
+! PORTABLE-NANNSZ:         } combiner {
+! PORTABLE-NANNSZ:           %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath<nnan,nsz,contract> : f32
+! PORTABLE-NANNSZ:           %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
+
+! PORTABLE-NANNSZ-LABEL:   acc.reduction.recipe @reduction_max_ref_10xf32 : !fir.ref<!fir.array<10xf32>> reduction_operator <max> init {
+! PORTABLE-NANNSZ:         } combiner {
+! PORTABLE-NANNSZ:           fir.do_loop
+! PORTABLE-NANNSZ:             %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath<nnan,nsz,contract> : f32
+! PORTABLE-NANNSZ:             %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
+
+! PORTABLE-NANNSZ-LABEL:   acc.reduction.recipe @reduction_max_ref_f32 : !fir.ref<f32> reduction_operator <max> init {
+! PORTABLE-NANNSZ:         } combiner {
+! PORTABLE-NANNSZ:           %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1:.*]], %[[LOAD_0:.*]] fastmath<nnan,nsz,contract> : f32
+! PORTABLE-NANNSZ:           %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
--- a/flang/test/Lower/fp-maxmin-behavior.f90
+++ b/flang/test/Lower/fp-maxmin-behavior.f90
@ -0,0 +1,52 @@
+! Test lowering of real MIN/MAX with -ffp-maxmin-behavior (legacy, portable, extremum, extremenum).
+! Legacy uses arith.cmpf + arith.select; extremum uses arith.maximumf/minimumf;
+! extremenum uses arith.maxnumf/minnumf; portable with -fno-signed-zeros -menable-no-nans uses maxnumf/minnumf.
+
+! bbc: legacy, extremum, extremenum
+! RUN: bbc -emit-hlfir -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY
+! RUN: bbc -emit-hlfir -ffp-maxmin-behavior=legacy -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY
+! RUN: bbc -emit-hlfir -ffp-maxmin-behavior=extremum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMUM
+! RUN: bbc -emit-hlfir -ffp-maxmin-behavior=extremenum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMENUM
+
+! flang -fc1: legacy, extremum, extremenum
+! RUN: %flang_fc1 -emit-hlfir -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY
+! RUN: %flang_fc1 -emit-hlfir -ffp-maxmin-behavior=legacy -o - %s 2>&1 | FileCheck %s -check-prefix=LEGACY
+! RUN: %flang_fc1 -emit-hlfir -ffp-maxmin-behavior=extremum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMUM
+! RUN: %flang_fc1 -emit-hlfir -ffp-maxmin-behavior=extremenum -o - %s 2>&1 | FileCheck %s -check-prefix=EXTREMENUM
+
+! portable with -fno-signed-zeros -menable-no-nans => maxnumf/minnumf (flang -fc1 only; bbc does not expose these flags)
+! RUN: %flang_fc1 -emit-hlfir -ffp-maxmin-behavior=portable -fno-signed-zeros -menable-no-nans -o - %s 2>&1 | FileCheck %s -check-prefix=PORTABLE-NANNSZ
+
+subroutine real_max(a, b, r)
+  real :: a, b, r
+  r = max(a, b)
+end subroutine
+! LEGACY-LABEL: func.func @_QPreal_max(
+! LEGACY: arith.cmpf ogt,
+! LEGACY: arith.select
+
+! EXTREMUM-LABEL: func.func @_QPreal_max(
+! EXTREMUM: arith.maximumf
+
+! EXTREMENUM-LABEL: func.func @_QPreal_max(
+! EXTREMENUM: arith.maxnumf
+
+! PORTABLE-NANNSZ-LABEL: func.func @_QPreal_max(
+! PORTABLE-NANNSZ: arith.maxnumf
+
+subroutine real_min(a, b, r)
+  real :: a, b, r
+  r = min(a, b)
+end subroutine
+! LEGACY-LABEL: func.func @_QPreal_min(
+! LEGACY: arith.cmpf olt,
+! LEGACY: arith.select
+
+! EXTREMUM-LABEL: func.func @_QPreal_min(
+! EXTREMUM: arith.minimumf
+
+! EXTREMENUM-LABEL: func.func @_QPreal_min(
+! EXTREMENUM: arith.minnumf
+
+! PORTABLE-NANNSZ-LABEL: func.func @_QPreal_min(
+! PORTABLE-NANNSZ: arith.minnumf
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@ -17,6 +17,7 @@
 #include "flang/Frontend/CodeGenOptions.h"
 #include "flang/Frontend/TargetOptions.h"
 #include "flang/Lower/Bridge.h"
+#include "flang/Lower/LoweringOptions.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/Support/Verifier.h"
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
@ -37,6 +38,7 @@
 #include "flang/Semantics/runtime-type-info.h"
 #include "flang/Semantics/semantics.h"
 #include "flang/Semantics/unparse-with-symbols.h"
+#include "flang/Support/FPMaxminBehavior.h"
 #include "flang/Support/Fortran-features.h"
 #include "flang/Support/LangOptions.h"
 #include "flang/Support/OpenMP-features.h"
@ -292,6 +294,22 @@ static llvm::cl::opt<std::string> complexRange(
                   "multiplication and division [full|improved|basic]"),
    llvm::cl::init(""));

+static llvm::cl::opt<Fortran::common::FPMaxminBehavior> fpMaxminBehavior(
+    "ffp-maxmin-behavior",
+    llvm::cl::desc("Control max/min and [max|min][loc|val] lowering "
+                   "[legacy|portable|extremum|extremenum]"),
+    llvm::cl::values(clEnumValN(Fortran::common::FPMaxminBehavior::Legacy,
+                                "legacy", "cmp+select"),
+                     clEnumValN(Fortran::common::FPMaxminBehavior::Portable,
+                                "portable",
+                                "cmp+select and arith.max/minnumf when nnan "
+                                "and nsz fast math flags are enabled"),
+                     clEnumValN(Fortran::common::FPMaxminBehavior::Extremum,
+                                "extremum", "arith.max/minimum"),
+                     clEnumValN(Fortran::common::FPMaxminBehavior::ExtremeNum,
+                                "extremenum", "arith.max/minnum")),
+    llvm::cl::init(Fortran::common::FPMaxminBehavior::Legacy));
+
 #define FLANG_EXCLUDE_CODEGEN
 #include "flang/Optimizer/Passes/CommandLineOpts.h"
 #include "flang/Optimizer/Passes/Pipelines.h"
@ -462,6 +480,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
    loweringOptions.setCUDARuntimeCheck(true);
  if (complexRange == "improved" || complexRange == "basic")
    loweringOptions.setComplexDivisionToRuntime(false);
+  loweringOptions.setFPMaxminBehavior(fpMaxminBehavior.getValue());
  std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};
  Fortran::frontend::TargetOptions targetOpts;
  Fortran::frontend::CodeGenOptions cgOpts;
@ -534,8 +553,9 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
      // lower HLFIR to FIR
      fir::EnableOpenMP enableOmp =
          enableOpenMP ? fir::EnableOpenMP::Full : fir::EnableOpenMP::None;
-      fir::createHLFIRToFIRPassPipeline(pm, enableOmp,
-                                        llvm::OptimizationLevel::O2);
+      MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2);
+      config.fpMaxminBehavior = loweringOptions.getFPMaxminBehavior();
+      fir::createHLFIRToFIRPassPipeline(pm, enableOmp, config);
      if (mlir::failed(pm.run(mlirModule))) {
        llvm::errs() << "FATAL: lowering from HLFIR to FIR failed";
        return mlir::failure();
@ -550,6 +570,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR(

    // Add O2 optimizer pass pipeline.
    MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2);
+    config.fpMaxminBehavior = loweringOptions.getFPMaxminBehavior();
    config.SkipConvertComplexPow = targetMachine.getTargetTriple().isAMDGCN();
    if (enableOpenMP)
      config.EnableOpenMP = true;
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@ -18,6 +18,7 @@
 #include "flang/Optimizer/Support/InitFIR.h"
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Transforms/Passes.h"
+#include "flang/Support/FPMaxminBehavior.h"
 #include "flang/Tools/CrossToolHelpers.h"
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
@ -95,6 +96,22 @@ static cl::opt<bool> testGeneratorMode(
    "test-gen", cl::desc("-emit-final-mlir -simplify-mlir -enable-aa=false"),
    cl::init(false));

+static cl::opt<Fortran::common::FPMaxminBehavior> fpMaxminBehavior(
+    "ffp-maxmin-behavior",
+    cl::desc("Control max/min and [max|min][loc|val] behavior "
+             "[legacy|portable|extremum|extremenum] (for future pass use)"),
+    cl::values(clEnumValN(Fortran::common::FPMaxminBehavior::Legacy, "legacy",
+                          "cmp+select"),
+               clEnumValN(Fortran::common::FPMaxminBehavior::Portable,
+                          "portable",
+                          "cmp+select and arith.max/minnumf when nnan and nsz "
+                          "fast math flags are enabled"),
+               clEnumValN(Fortran::common::FPMaxminBehavior::Extremum,
+                          "extremum", "arith.max/minimum"),
+               clEnumValN(Fortran::common::FPMaxminBehavior::ExtremeNum,
+                          "extremenum", "arith.max/minnum")),
+    cl::init(Fortran::common::FPMaxminBehavior::Legacy));
+
 #include "flang/Optimizer/Passes/CommandLineOpts.h"
 #include "flang/Optimizer/Passes/Pipelines.h"

@ -186,6 +203,7 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
      return mlir::failure();
    }
    MLIRToLLVMPassPipelineConfig config(*level);
+    config.fpMaxminBehavior = fpMaxminBehavior.getValue();
    // TODO: config.StackArrays should be set here?
    config.EnableOpenMP = true;  // assume the input contains OpenMP
    config.AliasAnalysis = enableAliasAnalysis && !testGeneratorMode;
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@ -38,6 +38,11 @@ class OpenACC_Op<string mnemonic, list<Trait> traits = []> :
 def OpenACC_ReductionOperatorNone    : I32EnumAttrCase<"AccNone", 0, "none">;
 def OpenACC_ReductionOperatorAdd     : I32EnumAttrCase<"AccAdd", 1, "add">;
 def OpenACC_ReductionOperatorMul     : I32EnumAttrCase<"AccMul", 2, "mul">;
+// When NaNs or signed zeros are possible, AccMax and AccMin parallel reductions
+// cannot guarantee stable results for floating-point values.
+// As such, auto-parallelization of such reductions cannot be done.
+// The FrontEnds can use alternative max/min reduction kinds (see below)
+// to enable auto-parallelization.
 def OpenACC_ReductionOperatorMax     : I32EnumAttrCase<"AccMax", 3, "max">;
 def OpenACC_ReductionOperatorMin     : I32EnumAttrCase<"AccMin", 4, "min">;
 def OpenACC_ReductionOperatorAnd     : I32EnumAttrCase<"AccIand", 5, "iand">;
@ -47,16 +52,62 @@ def OpenACC_ReductionOperatorLogEqv  : I32EnumAttrCase<"AccEqv", 8, "eqv">;
 def OpenACC_ReductionOperatorLogNeqv : I32EnumAttrCase<"AccNeqv", 9, "neqv">;
 def OpenACC_ReductionOperatorLogAnd  : I32EnumAttrCase<"AccLand", 10, "land">;
 def OpenACC_ReductionOperatorLogOr   : I32EnumAttrCase<"AccLor", 11, "lor">;
+// The following reduction operators correspond to arith::AtomicRMWKind kinds
+// named alike. They can only be applied to floating-point types.
+// These reductions can be auto-parallelized.
+def OpenACC_ReductionOperatorMaximum
+    : I32EnumAttrCase<"AccMaximumf", 12, "maximumf">;
+def OpenACC_ReductionOperatorMinimum
+    : I32EnumAttrCase<"AccMinimumf", 13, "minimumf">;
+def OpenACC_ReductionOperatorMaxnum
+    : I32EnumAttrCase<"AccMaxnumf", 14, "maxnumf">;
+def OpenACC_ReductionOperatorMinnum
+    : I32EnumAttrCase<"AccMinnumf", 15, "minnumf">;

-def OpenACC_ReductionOperator : I32EnumAttr<"ReductionOperator",
-    "built-in reduction operations supported by OpenACC",
-    [OpenACC_ReductionOperatorNone, OpenACC_ReductionOperatorAdd, 
-     OpenACC_ReductionOperatorMul, OpenACC_ReductionOperatorMax, OpenACC_ReductionOperatorMin,
-     OpenACC_ReductionOperatorAnd, OpenACC_ReductionOperatorOr,
-     OpenACC_ReductionOperatorXor, OpenACC_ReductionOperatorLogEqv,
-     OpenACC_ReductionOperatorLogNeqv, OpenACC_ReductionOperatorLogAnd,
-     OpenACC_ReductionOperatorLogOr
-    ]> {
+def OpenACC_ReductionOperator
+    : I32EnumAttr<
+          "ReductionOperator",
+          // Built-in reduction operations supported by OpenACC
+          // according OpenACC 3.3:
+          //
+          //  |-------------------|----------------------|
+          //  | Language operator |                      |
+          //  |-------------------|   ReductionOperator  |
+          //  | C/C++   | Fortran |                      |
+          //  |------------------------------------------|
+          //  |     +   |      +  |         add          |
+          //  |     *   |      *  |         mul          |
+          //  |   max   |    max  | max/maximumf/maxnumf |
+          //  |   min   |    min  | min/minimumf/minnumf |
+          //  |     &   |   iand  |        iand          |
+          //  |     |   |    ior  |         ior          |
+          //  |     ^   |   ieor  |         xor          |
+          //  |    &&   |  .and.  |        land          |
+          //  |    ||   |   .or.  |         lor          |
+          //  |         |  .eqv.  |         eqv          |
+          //  |         | .neqv.  |        neqv          |
+          //  |------------------------------------------|
+          //
+          //  The different max/min ReductionOperator's have different behavior
+          //  when the arguments may be NaNs or signed zeros:
+          //    * max/min - in general, produces inconsistent results
+          //      in parallel execution, because it is not commutative.
+          //      The max/min returns the second argument,
+          //      when one of the arguments is NaN,
+          //      or both arguments are zeros regardless of the sign.
+          //    * maximumf/minimumf - safe to parallelize, corresponds to
+          //      maximum/minimum defined in IEEE-754-2019.
+          //    * maxnumf/minnumf - safe to parallelize, corresponds to
+          //      maxNum/minNum defined in IEEE-754-2008.
+          "built-in reduction operations supported by OpenACC",
+          [OpenACC_ReductionOperatorNone, OpenACC_ReductionOperatorAdd,
+           OpenACC_ReductionOperatorMul, OpenACC_ReductionOperatorMax,
+           OpenACC_ReductionOperatorMin, OpenACC_ReductionOperatorAnd,
+           OpenACC_ReductionOperatorOr, OpenACC_ReductionOperatorXor,
+           OpenACC_ReductionOperatorLogEqv, OpenACC_ReductionOperatorLogNeqv,
+           OpenACC_ReductionOperatorLogAnd, OpenACC_ReductionOperatorLogOr,
+           OpenACC_ReductionOperatorMaximum, OpenACC_ReductionOperatorMinimum,
+           OpenACC_ReductionOperatorMaxnum, OpenACC_ReductionOperatorMinnum]> {
  let genSpecializedAttr = 0;
  let cppNamespace = "::mlir::acc";
 }
@ -1651,6 +1702,9 @@ def OpenACC_ReductionRecipeOp
    (`destroy` $destroyRegion^)?
  }];

+  // TODO: we need to verify that reduction operators
+  // maxnumf, maximumf, minnumf anf minimumf are only applied
+  // to FloatType element types.
  let hasRegionVerifier = 1;
 }

--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@ -2522,3 +2522,49 @@ func.func @test_getdeviceptr_opaque_ptr(%a: !llvm.ptr) -> () {
 // CHECK-SAME:    %[[A:.*]]: !llvm.ptr)
 // CHECK:         %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[A]] : !llvm.ptr) -> !llvm.ptr
 // CHECK:         acc.declare_enter dataOperands(%[[DEVPTR]] : !llvm.ptr)
+
+// -----
+
+acc.reduction.recipe @reduction_maximum_memref_f32 : memref<f32> reduction_operator <maximumf> init {
+^bb0(%arg0: memref<f32>):
+  %alloca = memref.alloca() : memref<f32>
+  acc.yield %alloca : memref<f32>
+} combiner {
+^bb0(%arg0: memref<f32>, %arg1: memref<f32>):
+  acc.yield %arg0 : memref<f32>
+}
+
+// CHECK-LABEL: acc.reduction.recipe @reduction_maximum_memref_f32 : memref<f32> reduction_operator <maximumf>
+
+acc.reduction.recipe @reduction_maxnum_memref_f32 : memref<f32> reduction_operator <maxnumf> init {
+^bb0(%arg0: memref<f32>):
+  %alloca = memref.alloca() : memref<f32>
+  acc.yield %alloca : memref<f32>
+} combiner {
+^bb0(%arg0: memref<f32>, %arg1: memref<f32>):
+  acc.yield %arg0 : memref<f32>
+}
+
+// CHECK-LABEL: acc.reduction.recipe @reduction_maxnum_memref_f32 : memref<f32> reduction_operator <maxnumf>
+
+acc.reduction.recipe @reduction_minimum_memref_f32 : memref<f32> reduction_operator <minimumf> init {
+^bb0(%arg0: memref<f32>):
+  %alloca = memref.alloca() : memref<f32>
+  acc.yield %alloca : memref<f32>
+} combiner {
+^bb0(%arg0: memref<f32>, %arg1: memref<f32>):
+  acc.yield %arg0 : memref<f32>
+}
+
+// CHECK-LABEL: acc.reduction.recipe @reduction_minimum_memref_f32 : memref<f32> reduction_operator <minimumf>
+
+acc.reduction.recipe @reduction_minnum_memref_f32 : memref<f32> reduction_operator <minnumf> init {
+^bb0(%arg0: memref<f32>):
+  %alloca = memref.alloca() : memref<f32>
+  acc.yield %alloca : memref<f32>
+} combiner {
+^bb0(%arg0: memref<f32>, %arg1: memref<f32>):
+  acc.yield %arg0 : memref<f32>
+}
+
+// CHECK-LABEL: acc.reduction.recipe @reduction_minnum_memref_f32 : memref<f32> reduction_operator <minnumf>