llvm-project/flang/lib/Frontend/CompilerInstance.cpp
Justin Fargnoli 9475f6af81
Reland "[NVPTX] Validate user-specified PTX version against SM version" (#180116)
Previous commit message: 

>Previous commit message: 
>
>> Original commit message:
>>
>>>When users explicitly specify a PTX version via -mattr=+ptxNN that's
insufficient for their target SM, we now emit a fatal error. Previously,
we silently upgraded the PTX version to the minimum required for the
target SM.
>>>
>>>When no SM or PTX version is specified, we now use PTX 3.2 (the
minimum for the default SM 3.0) instead of PTX 6.0.
>>
>>The following commits should fix the failures that arose when I
previously tried to land this commit:
>>
>>

>>9fc5fd0ad6
should address the llvm-nvptx*-nvidia-* build failures:
https://github.com/llvm/llvm-project/pull/174834#issuecomment-3742242651
>>
>>

>>600514a637
should address the MLIR failures
>
>The previous commit was reverted with
d23cb79ba4
because the
[mlir-nvidia](https://lab.llvm.org/buildbot/#/builders/138/builds/24797)
and
[mlir-nvidia-gcc7](https://lab.llvm.org/buildbot/#/builders/116/builds/23929)
Buildbots were failing.
>
>Those tests failed because MLIR's default SM was 5.0, which caused
NVPTX
to target PTX ISA v4.0, which did not support the intrinsics used in the
failing tests.
>

>243f011577
should address this by bumping MLIR's default SM to 7.5. Now, using
MLIR's new default SM, NVPTX
targets the PTX ISA v6.3, which supports the intrinsics used in the
failing tests.

---

The previous commit was reverted with
e9b578a4d77025e18318efedd0f3f3764338d859
[because](https://github.com/llvm/llvm-project/pull/179304#issuecomment-3856301333)
the clang driver set the default PTX ISA version to v4.2 when no CUDA
installation is found. However, given our patch, we should not set a
default; instead, let the LLVM backend select the appropriate PTX ISA
version for the target SM.
2026-02-10 18:11:54 +00:00

374 lines
14 KiB
C++

//===--- CompilerInstance.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//
#include "flang/Frontend/CompilerInstance.h"
#include "flang/Frontend/CompilerInvocation.h"
#include "flang/Frontend/TextDiagnosticPrinter.h"
#include "flang/Parser/parsing.h"
#include "flang/Parser/provenance.h"
#include "flang/Semantics/semantics.h"
#include "flang/Support/Fortran-features.h"
#include "flang/Support/Timing.h"
#include "mlir/Support/RawOstreamExtras.h"
#include "clang/Basic/DiagnosticFrontend.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/TargetParser.h"
#include "llvm/TargetParser/Triple.h"
using namespace Fortran::frontend;
CompilerInstance::CompilerInstance()
: invocation(new CompilerInvocation()),
allSources(new Fortran::parser::AllSources()),
allCookedSources(new Fortran::parser::AllCookedSources(*allSources)),
parsing(new Fortran::parser::Parsing(*allCookedSources)) {
// TODO: This is a good default during development, but ultimately we should
// give the user the opportunity to specify this.
allSources->set_encoding(Fortran::parser::Encoding::UTF_8);
}
CompilerInstance::~CompilerInstance() {
assert(outputFiles.empty() && "Still output files in flight?");
}
void CompilerInstance::setInvocation(
std::shared_ptr<CompilerInvocation> value) {
invocation = std::move(value);
}
void CompilerInstance::setSemaOutputStream(raw_ostream &value) {
ownedSemaOutputStream.release();
semaOutputStream = &value;
}
void CompilerInstance::setSemaOutputStream(std::unique_ptr<raw_ostream> value) {
ownedSemaOutputStream.swap(value);
semaOutputStream = ownedSemaOutputStream.get();
}
// Helper method to generate the path of the output file. The following logic
// applies:
// 1. If the user specifies the output file via `-o`, then use that (i.e.
// the outputFilename parameter).
// 2. If the user does not specify the name of the output file, derive it from
// the input file (i.e. inputFilename + extension)
// 3. If the output file is not specified and the input file is `-`, then set
// the output file to `-` as well.
static std::string getOutputFilePath(llvm::StringRef outputFilename,
llvm::StringRef inputFilename,
llvm::StringRef extension) {
// Output filename _is_ specified. Just use that.
if (!outputFilename.empty())
return std::string(outputFilename);
// Output filename _is not_ specified. Derive it from the input file name.
std::string outFile = "-";
if (!extension.empty() && (inputFilename != "-")) {
llvm::SmallString<128> path(inputFilename);
llvm::sys::path::replace_extension(path, extension);
outFile = std::string(path);
}
return outFile;
}
std::unique_ptr<llvm::raw_pwrite_stream>
CompilerInstance::createDefaultOutputFile(bool binary, llvm::StringRef baseName,
llvm::StringRef extension) {
// Get the path of the output file
std::string outputFilePath =
getOutputFilePath(getFrontendOpts().outputFile, baseName, extension);
// Create the output file
llvm::Expected<std::unique_ptr<llvm::raw_pwrite_stream>> os =
createOutputFileImpl(outputFilePath, binary);
// If successful, add the file to the list of tracked output files and
// return.
if (os) {
outputFiles.emplace_back(OutputFile(outputFilePath));
return std::move(*os);
}
// If unsuccessful, issue an error and return Null
unsigned diagID = getDiagnostics().getCustomDiagID(
clang::DiagnosticsEngine::Error, "unable to open output file '%0': '%1'");
getDiagnostics().Report(diagID)
<< outputFilePath << llvm::errorToErrorCode(os.takeError()).message();
return nullptr;
}
llvm::Expected<std::unique_ptr<llvm::raw_pwrite_stream>>
CompilerInstance::createOutputFileImpl(llvm::StringRef outputFilePath,
bool binary) {
// Creates the file descriptor for the output file
std::unique_ptr<llvm::raw_fd_ostream> os;
std::error_code error;
os.reset(new llvm::raw_fd_ostream(
outputFilePath, error,
(binary ? llvm::sys::fs::OF_None : llvm::sys::fs::OF_TextWithCRLF)));
if (error) {
return llvm::errorCodeToError(error);
}
// For seekable streams, just return the stream corresponding to the output
// file.
if (!binary || os->supportsSeeking())
return std::move(os);
// For non-seekable streams, we need to wrap the output stream into something
// that supports 'pwrite' and takes care of the ownership for us.
return std::make_unique<llvm::buffer_unique_ostream>(std::move(os));
}
void CompilerInstance::clearOutputFiles(bool eraseFiles) {
for (OutputFile &of : outputFiles)
if (!of.filename.empty() && eraseFiles)
llvm::sys::fs::remove(of.filename);
outputFiles.clear();
}
bool CompilerInstance::executeAction(FrontendAction &act) {
CompilerInvocation &invoc = this->getInvocation();
llvm::Triple targetTriple{llvm::Triple(invoc.getTargetOpts().triple)};
// Set some sane defaults for the frontend.
invoc.setDefaultFortranOpts();
// Update the fortran options based on user-based input.
invoc.setFortranOpts();
// Set the encoding to read all input files in based on user input.
allSources->set_encoding(invoc.getFortranOpts().encoding);
if (!setUpTargetMachine())
return false;
// Set options controlling lowering to FIR.
invoc.setLoweringOptions();
if (invoc.getEnableTimers()) {
llvm::TimePassesIsEnabled = true;
timingStreamMLIR = std::make_unique<Fortran::support::string_ostream>();
timingStreamLLVM = std::make_unique<Fortran::support::string_ostream>();
timingStreamCodeGen = std::make_unique<Fortran::support::string_ostream>();
timingMgr.setEnabled(true);
timingMgr.setDisplayMode(mlir::DefaultTimingManager::DisplayMode::Tree);
timingMgr.setOutput(
Fortran::support::createTimingFormatterText(*timingStreamMLIR));
// Creating a new TimingScope will automatically start the timer. Since this
// is the top-level timer, this is ok because it will end up capturing the
// time for all the bookkeeping and other tasks that take place between
// parsing, lowering etc. for which finer-grained timers will be created.
timingScopeRoot = timingMgr.getRootScope();
}
// Run the frontend action `act` for every input file.
for (const FrontendInputFile &fif : getFrontendOpts().inputs) {
if (act.beginSourceFile(*this, fif)) {
if (llvm::Error err = act.execute()) {
consumeError(std::move(err));
}
act.endSourceFile();
}
}
if (timingMgr.isEnabled()) {
timingScopeRoot.stop();
// Write the timings to the associated output stream and clear all timers.
// We need to provide another stream because the TimingManager will attempt
// to print in its destructor even if it has been cleared. By the time that
// destructor runs, the output streams will have been destroyed, so give it
// a null stream.
timingMgr.print();
timingMgr.setOutput(
Fortran::support::createTimingFormatterText(mlir::thread_safe_nulls()));
// This prints the timings in "reverse" order, starting from code
// generation, followed by LLVM-IR optimizations, then MLIR optimizations
// and transformations and the frontend. If any of the steps are disabled,
// for instance because code generation was not performed, the strings
// will be empty.
if (!timingStreamCodeGen->str().empty())
llvm::errs() << timingStreamCodeGen->str() << "\n";
if (!timingStreamLLVM->str().empty())
llvm::errs() << timingStreamLLVM->str() << "\n";
if (!timingStreamMLIR->str().empty())
llvm::errs() << timingStreamMLIR->str() << "\n";
}
return !getDiagnostics().getClient()->getNumErrors();
}
void CompilerInstance::createDiagnostics(clang::DiagnosticConsumer *client,
bool shouldOwnClient) {
diagnostics = createDiagnostics(getDiagnosticOpts(), client, shouldOwnClient);
}
clang::IntrusiveRefCntPtr<clang::DiagnosticsEngine>
CompilerInstance::createDiagnostics(clang::DiagnosticOptions &opts,
clang::DiagnosticConsumer *client,
bool shouldOwnClient) {
auto diags = llvm::makeIntrusiveRefCnt<clang::DiagnosticsEngine>(
clang::DiagnosticIDs::create(), opts);
// Create the diagnostic client for reporting errors or for
// implementing -verify.
if (client) {
diags->setClient(client, shouldOwnClient);
} else {
diags->setClient(new TextDiagnosticPrinter(llvm::errs(), opts));
}
return diags;
}
// Get feature string which represents combined explicit target features
// for AMD GPU and the target features specified by the user
static std::string
getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
const TargetOptions &targetOpts,
const llvm::Triple triple) {
llvm::StringRef cpu = targetOpts.cpu;
llvm::StringMap<bool> FeaturesMap;
// Add target features specified by the user
for (auto &userFeature : targetOpts.featuresAsWritten) {
std::string userKeyString = userFeature.substr(1);
FeaturesMap[userKeyString] = (userFeature[0] == '+');
}
auto HasError = llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, FeaturesMap);
if (HasError.first) {
unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
"Unsupported feature ID: %0");
diags.Report(diagID) << HasError.second;
return std::string();
}
llvm::SmallVector<std::string> featuresVec;
for (auto &FeatureItem : FeaturesMap) {
featuresVec.push_back((llvm::Twine(FeatureItem.second ? "+" : "-") +
FeatureItem.first().str())
.str());
}
llvm::sort(featuresVec);
return llvm::join(featuresVec, ",");
}
// Get feature string which represents combined explicit target features
// for NVPTX and the target features specified by the user/
// TODO: Have a more robust target conf like `clang/lib/Basic/Targets/NVPTX.cpp`
static std::string
getExplicitAndImplicitNVPTXTargetFeatures(clang::DiagnosticsEngine &diags,
const TargetOptions &targetOpts,
const llvm::Triple triple) {
llvm::StringRef cpu = targetOpts.cpu;
llvm::StringMap<bool> implicitFeaturesMap;
// Add target features specified by the user
for (auto &userFeature : targetOpts.featuresAsWritten) {
llvm::StringRef userKeyString(llvm::StringRef(userFeature).drop_front(1));
implicitFeaturesMap[userKeyString.str()] = (userFeature[0] == '+');
}
// Set the compute capability (only if one was explicitly provided).
if (!cpu.empty())
implicitFeaturesMap[cpu.str()] = true;
llvm::SmallVector<std::string> featuresVec;
for (auto &implicitFeatureItem : implicitFeaturesMap) {
featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
implicitFeatureItem.first().str())
.str());
}
llvm::sort(featuresVec);
return llvm::join(featuresVec, ",");
}
std::string CompilerInstance::getTargetFeatures() {
const TargetOptions &targetOpts = getInvocation().getTargetOpts();
const llvm::Triple triple(targetOpts.triple);
// Clang does not append all target features to the clang -cc1 invocation.
// Some target features are parsed implicitly by clang::TargetInfo child
// class. Clang::TargetInfo classes are the basic clang classes and
// they cannot be reused by Flang.
// That's why we need to extract implicit target features and add
// them to the target features specified by the user
if (triple.isAMDGPU()) {
return getExplicitAndImplicitAMDGPUTargetFeatures(getDiagnostics(),
targetOpts, triple);
} else if (triple.isNVPTX()) {
return getExplicitAndImplicitNVPTXTargetFeatures(getDiagnostics(),
targetOpts, triple);
}
return llvm::join(targetOpts.featuresAsWritten.begin(),
targetOpts.featuresAsWritten.end(), ",");
}
bool CompilerInstance::setUpTargetMachine() {
const TargetOptions &targetOpts = getInvocation().getTargetOpts();
const std::string &theTriple = targetOpts.triple;
// Create `Target`
const llvm::Triple triple(theTriple);
std::string error;
const llvm::Target *theTarget =
llvm::TargetRegistry::lookupTarget(triple, error);
if (!theTarget) {
getDiagnostics().Report(clang::diag::err_fe_unable_to_create_target)
<< error;
return false;
}
// Create `TargetMachine`
const auto &CGOpts = getInvocation().getCodeGenOpts();
std::optional<llvm::CodeGenOptLevel> OptLevelOrNone =
llvm::CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
assert(OptLevelOrNone && "Invalid optimization level!");
llvm::CodeGenOptLevel OptLevel = *OptLevelOrNone;
std::string featuresStr = getTargetFeatures();
std::optional<llvm::CodeModel::Model> cm = getCodeModel(CGOpts.CodeModel);
llvm::TargetOptions tOpts = llvm::TargetOptions();
tOpts.EnableAIXExtendedAltivecABI = targetOpts.EnableAIXExtendedAltivecABI;
tOpts.VecLib = convertDriverVectorLibraryToVectorLibrary(CGOpts.getVecLib());
targetMachine.reset(theTarget->createTargetMachine(
triple, /*CPU=*/targetOpts.cpu,
/*Features=*/featuresStr, /*Options=*/tOpts,
/*Reloc::Model=*/CGOpts.getRelocationModel(),
/*CodeModel::Model=*/cm, OptLevel));
assert(targetMachine && "Failed to create TargetMachine");
if (cm.has_value()) {
if ((cm == llvm::CodeModel::Medium || cm == llvm::CodeModel::Large) &&
triple.getArch() == llvm::Triple::x86_64) {
targetMachine->setLargeDataThreshold(CGOpts.LargeDataThreshold);
}
}
return true;
}