Previous commit message: >Previous commit message: > >> Original commit message: >> >>>When users explicitly specify a PTX version via -mattr=+ptxNN that's insufficient for their target SM, we now emit a fatal error. Previously, we silently upgraded the PTX version to the minimum required for the target SM. >>> >>>When no SM or PTX version is specified, we now use PTX 3.2 (the minimum for the default SM 3.0) instead of PTX 6.0. >> >>The following commits should fix the failures that arose when I previously tried to land this commit: >> >> >>9fc5fd0ad6should address the llvm-nvptx*-nvidia-* build failures: https://github.com/llvm/llvm-project/pull/174834#issuecomment-3742242651 >> >> >>600514a637should address the MLIR failures > >The previous commit was reverted withd23cb79ba4because the [mlir-nvidia](https://lab.llvm.org/buildbot/#/builders/138/builds/24797) and [mlir-nvidia-gcc7](https://lab.llvm.org/buildbot/#/builders/116/builds/23929) Buildbots were failing. > >Those tests failed because MLIR's default SM was 5.0, which caused NVPTX to target PTX ISA v4.0, which did not support the intrinsics used in the failing tests. > >243f011577should address this by bumping MLIR's default SM to 7.5. Now, using MLIR's new default SM, NVPTX targets the PTX ISA v6.3, which supports the intrinsics used in the failing tests. --- The previous commit was reverted with e9b578a4d77025e18318efedd0f3f3764338d859 [because](https://github.com/llvm/llvm-project/pull/179304#issuecomment-3856301333) the clang driver set the default PTX ISA version to v4.2 when no CUDA installation is found. However, given our patch, we should not set a default; instead, let the LLVM backend select the appropriate PTX ISA version for the target SM.
374 lines
14 KiB
C++
374 lines
14 KiB
C++
//===--- CompilerInstance.cpp ---------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Frontend/CompilerInstance.h"
|
|
#include "flang/Frontend/CompilerInvocation.h"
|
|
#include "flang/Frontend/TextDiagnosticPrinter.h"
|
|
#include "flang/Parser/parsing.h"
|
|
#include "flang/Parser/provenance.h"
|
|
#include "flang/Semantics/semantics.h"
|
|
#include "flang/Support/Fortran-features.h"
|
|
#include "flang/Support/Timing.h"
|
|
#include "mlir/Support/RawOstreamExtras.h"
|
|
#include "clang/Basic/DiagnosticFrontend.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/MC/TargetRegistry.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/Support/Errc.h"
|
|
#include "llvm/Support/Error.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/TargetParser/TargetParser.h"
|
|
#include "llvm/TargetParser/Triple.h"
|
|
|
|
using namespace Fortran::frontend;
|
|
|
|
CompilerInstance::CompilerInstance()
|
|
: invocation(new CompilerInvocation()),
|
|
allSources(new Fortran::parser::AllSources()),
|
|
allCookedSources(new Fortran::parser::AllCookedSources(*allSources)),
|
|
parsing(new Fortran::parser::Parsing(*allCookedSources)) {
|
|
// TODO: This is a good default during development, but ultimately we should
|
|
// give the user the opportunity to specify this.
|
|
allSources->set_encoding(Fortran::parser::Encoding::UTF_8);
|
|
}
|
|
|
|
CompilerInstance::~CompilerInstance() {
|
|
assert(outputFiles.empty() && "Still output files in flight?");
|
|
}
|
|
|
|
void CompilerInstance::setInvocation(
|
|
std::shared_ptr<CompilerInvocation> value) {
|
|
invocation = std::move(value);
|
|
}
|
|
|
|
void CompilerInstance::setSemaOutputStream(raw_ostream &value) {
|
|
ownedSemaOutputStream.release();
|
|
semaOutputStream = &value;
|
|
}
|
|
|
|
void CompilerInstance::setSemaOutputStream(std::unique_ptr<raw_ostream> value) {
|
|
ownedSemaOutputStream.swap(value);
|
|
semaOutputStream = ownedSemaOutputStream.get();
|
|
}
|
|
|
|
// Helper method to generate the path of the output file. The following logic
|
|
// applies:
|
|
// 1. If the user specifies the output file via `-o`, then use that (i.e.
|
|
// the outputFilename parameter).
|
|
// 2. If the user does not specify the name of the output file, derive it from
|
|
// the input file (i.e. inputFilename + extension)
|
|
// 3. If the output file is not specified and the input file is `-`, then set
|
|
// the output file to `-` as well.
|
|
static std::string getOutputFilePath(llvm::StringRef outputFilename,
|
|
llvm::StringRef inputFilename,
|
|
llvm::StringRef extension) {
|
|
|
|
// Output filename _is_ specified. Just use that.
|
|
if (!outputFilename.empty())
|
|
return std::string(outputFilename);
|
|
|
|
// Output filename _is not_ specified. Derive it from the input file name.
|
|
std::string outFile = "-";
|
|
if (!extension.empty() && (inputFilename != "-")) {
|
|
llvm::SmallString<128> path(inputFilename);
|
|
llvm::sys::path::replace_extension(path, extension);
|
|
outFile = std::string(path);
|
|
}
|
|
|
|
return outFile;
|
|
}
|
|
|
|
std::unique_ptr<llvm::raw_pwrite_stream>
|
|
CompilerInstance::createDefaultOutputFile(bool binary, llvm::StringRef baseName,
|
|
llvm::StringRef extension) {
|
|
|
|
// Get the path of the output file
|
|
std::string outputFilePath =
|
|
getOutputFilePath(getFrontendOpts().outputFile, baseName, extension);
|
|
|
|
// Create the output file
|
|
llvm::Expected<std::unique_ptr<llvm::raw_pwrite_stream>> os =
|
|
createOutputFileImpl(outputFilePath, binary);
|
|
|
|
// If successful, add the file to the list of tracked output files and
|
|
// return.
|
|
if (os) {
|
|
outputFiles.emplace_back(OutputFile(outputFilePath));
|
|
return std::move(*os);
|
|
}
|
|
|
|
// If unsuccessful, issue an error and return Null
|
|
unsigned diagID = getDiagnostics().getCustomDiagID(
|
|
clang::DiagnosticsEngine::Error, "unable to open output file '%0': '%1'");
|
|
getDiagnostics().Report(diagID)
|
|
<< outputFilePath << llvm::errorToErrorCode(os.takeError()).message();
|
|
return nullptr;
|
|
}
|
|
|
|
llvm::Expected<std::unique_ptr<llvm::raw_pwrite_stream>>
|
|
CompilerInstance::createOutputFileImpl(llvm::StringRef outputFilePath,
|
|
bool binary) {
|
|
|
|
// Creates the file descriptor for the output file
|
|
std::unique_ptr<llvm::raw_fd_ostream> os;
|
|
|
|
std::error_code error;
|
|
os.reset(new llvm::raw_fd_ostream(
|
|
outputFilePath, error,
|
|
(binary ? llvm::sys::fs::OF_None : llvm::sys::fs::OF_TextWithCRLF)));
|
|
if (error) {
|
|
return llvm::errorCodeToError(error);
|
|
}
|
|
|
|
// For seekable streams, just return the stream corresponding to the output
|
|
// file.
|
|
if (!binary || os->supportsSeeking())
|
|
return std::move(os);
|
|
|
|
// For non-seekable streams, we need to wrap the output stream into something
|
|
// that supports 'pwrite' and takes care of the ownership for us.
|
|
return std::make_unique<llvm::buffer_unique_ostream>(std::move(os));
|
|
}
|
|
|
|
void CompilerInstance::clearOutputFiles(bool eraseFiles) {
|
|
for (OutputFile &of : outputFiles)
|
|
if (!of.filename.empty() && eraseFiles)
|
|
llvm::sys::fs::remove(of.filename);
|
|
|
|
outputFiles.clear();
|
|
}
|
|
|
|
bool CompilerInstance::executeAction(FrontendAction &act) {
|
|
CompilerInvocation &invoc = this->getInvocation();
|
|
|
|
llvm::Triple targetTriple{llvm::Triple(invoc.getTargetOpts().triple)};
|
|
|
|
// Set some sane defaults for the frontend.
|
|
invoc.setDefaultFortranOpts();
|
|
// Update the fortran options based on user-based input.
|
|
invoc.setFortranOpts();
|
|
// Set the encoding to read all input files in based on user input.
|
|
allSources->set_encoding(invoc.getFortranOpts().encoding);
|
|
if (!setUpTargetMachine())
|
|
return false;
|
|
// Set options controlling lowering to FIR.
|
|
invoc.setLoweringOptions();
|
|
|
|
if (invoc.getEnableTimers()) {
|
|
llvm::TimePassesIsEnabled = true;
|
|
|
|
timingStreamMLIR = std::make_unique<Fortran::support::string_ostream>();
|
|
timingStreamLLVM = std::make_unique<Fortran::support::string_ostream>();
|
|
timingStreamCodeGen = std::make_unique<Fortran::support::string_ostream>();
|
|
|
|
timingMgr.setEnabled(true);
|
|
timingMgr.setDisplayMode(mlir::DefaultTimingManager::DisplayMode::Tree);
|
|
timingMgr.setOutput(
|
|
Fortran::support::createTimingFormatterText(*timingStreamMLIR));
|
|
|
|
// Creating a new TimingScope will automatically start the timer. Since this
|
|
// is the top-level timer, this is ok because it will end up capturing the
|
|
// time for all the bookkeeping and other tasks that take place between
|
|
// parsing, lowering etc. for which finer-grained timers will be created.
|
|
timingScopeRoot = timingMgr.getRootScope();
|
|
}
|
|
|
|
// Run the frontend action `act` for every input file.
|
|
for (const FrontendInputFile &fif : getFrontendOpts().inputs) {
|
|
if (act.beginSourceFile(*this, fif)) {
|
|
if (llvm::Error err = act.execute()) {
|
|
consumeError(std::move(err));
|
|
}
|
|
act.endSourceFile();
|
|
}
|
|
}
|
|
|
|
if (timingMgr.isEnabled()) {
|
|
timingScopeRoot.stop();
|
|
|
|
// Write the timings to the associated output stream and clear all timers.
|
|
// We need to provide another stream because the TimingManager will attempt
|
|
// to print in its destructor even if it has been cleared. By the time that
|
|
// destructor runs, the output streams will have been destroyed, so give it
|
|
// a null stream.
|
|
timingMgr.print();
|
|
timingMgr.setOutput(
|
|
Fortran::support::createTimingFormatterText(mlir::thread_safe_nulls()));
|
|
|
|
// This prints the timings in "reverse" order, starting from code
|
|
// generation, followed by LLVM-IR optimizations, then MLIR optimizations
|
|
// and transformations and the frontend. If any of the steps are disabled,
|
|
// for instance because code generation was not performed, the strings
|
|
// will be empty.
|
|
if (!timingStreamCodeGen->str().empty())
|
|
llvm::errs() << timingStreamCodeGen->str() << "\n";
|
|
|
|
if (!timingStreamLLVM->str().empty())
|
|
llvm::errs() << timingStreamLLVM->str() << "\n";
|
|
|
|
if (!timingStreamMLIR->str().empty())
|
|
llvm::errs() << timingStreamMLIR->str() << "\n";
|
|
}
|
|
|
|
return !getDiagnostics().getClient()->getNumErrors();
|
|
}
|
|
|
|
void CompilerInstance::createDiagnostics(clang::DiagnosticConsumer *client,
|
|
bool shouldOwnClient) {
|
|
diagnostics = createDiagnostics(getDiagnosticOpts(), client, shouldOwnClient);
|
|
}
|
|
|
|
clang::IntrusiveRefCntPtr<clang::DiagnosticsEngine>
|
|
CompilerInstance::createDiagnostics(clang::DiagnosticOptions &opts,
|
|
clang::DiagnosticConsumer *client,
|
|
bool shouldOwnClient) {
|
|
auto diags = llvm::makeIntrusiveRefCnt<clang::DiagnosticsEngine>(
|
|
clang::DiagnosticIDs::create(), opts);
|
|
|
|
// Create the diagnostic client for reporting errors or for
|
|
// implementing -verify.
|
|
if (client) {
|
|
diags->setClient(client, shouldOwnClient);
|
|
} else {
|
|
diags->setClient(new TextDiagnosticPrinter(llvm::errs(), opts));
|
|
}
|
|
return diags;
|
|
}
|
|
|
|
// Get feature string which represents combined explicit target features
|
|
// for AMD GPU and the target features specified by the user
|
|
static std::string
|
|
getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
|
|
const TargetOptions &targetOpts,
|
|
const llvm::Triple triple) {
|
|
llvm::StringRef cpu = targetOpts.cpu;
|
|
llvm::StringMap<bool> FeaturesMap;
|
|
|
|
// Add target features specified by the user
|
|
for (auto &userFeature : targetOpts.featuresAsWritten) {
|
|
std::string userKeyString = userFeature.substr(1);
|
|
FeaturesMap[userKeyString] = (userFeature[0] == '+');
|
|
}
|
|
|
|
auto HasError = llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, FeaturesMap);
|
|
if (HasError.first) {
|
|
unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
|
|
"Unsupported feature ID: %0");
|
|
diags.Report(diagID) << HasError.second;
|
|
return std::string();
|
|
}
|
|
|
|
llvm::SmallVector<std::string> featuresVec;
|
|
for (auto &FeatureItem : FeaturesMap) {
|
|
featuresVec.push_back((llvm::Twine(FeatureItem.second ? "+" : "-") +
|
|
FeatureItem.first().str())
|
|
.str());
|
|
}
|
|
llvm::sort(featuresVec);
|
|
return llvm::join(featuresVec, ",");
|
|
}
|
|
|
|
// Get feature string which represents combined explicit target features
|
|
// for NVPTX and the target features specified by the user/
|
|
// TODO: Have a more robust target conf like `clang/lib/Basic/Targets/NVPTX.cpp`
|
|
static std::string
|
|
getExplicitAndImplicitNVPTXTargetFeatures(clang::DiagnosticsEngine &diags,
|
|
const TargetOptions &targetOpts,
|
|
const llvm::Triple triple) {
|
|
llvm::StringRef cpu = targetOpts.cpu;
|
|
llvm::StringMap<bool> implicitFeaturesMap;
|
|
|
|
// Add target features specified by the user
|
|
for (auto &userFeature : targetOpts.featuresAsWritten) {
|
|
llvm::StringRef userKeyString(llvm::StringRef(userFeature).drop_front(1));
|
|
implicitFeaturesMap[userKeyString.str()] = (userFeature[0] == '+');
|
|
}
|
|
|
|
// Set the compute capability (only if one was explicitly provided).
|
|
if (!cpu.empty())
|
|
implicitFeaturesMap[cpu.str()] = true;
|
|
|
|
llvm::SmallVector<std::string> featuresVec;
|
|
for (auto &implicitFeatureItem : implicitFeaturesMap) {
|
|
featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
|
|
implicitFeatureItem.first().str())
|
|
.str());
|
|
}
|
|
llvm::sort(featuresVec);
|
|
return llvm::join(featuresVec, ",");
|
|
}
|
|
|
|
std::string CompilerInstance::getTargetFeatures() {
|
|
const TargetOptions &targetOpts = getInvocation().getTargetOpts();
|
|
const llvm::Triple triple(targetOpts.triple);
|
|
|
|
// Clang does not append all target features to the clang -cc1 invocation.
|
|
// Some target features are parsed implicitly by clang::TargetInfo child
|
|
// class. Clang::TargetInfo classes are the basic clang classes and
|
|
// they cannot be reused by Flang.
|
|
// That's why we need to extract implicit target features and add
|
|
// them to the target features specified by the user
|
|
if (triple.isAMDGPU()) {
|
|
return getExplicitAndImplicitAMDGPUTargetFeatures(getDiagnostics(),
|
|
targetOpts, triple);
|
|
} else if (triple.isNVPTX()) {
|
|
return getExplicitAndImplicitNVPTXTargetFeatures(getDiagnostics(),
|
|
targetOpts, triple);
|
|
}
|
|
return llvm::join(targetOpts.featuresAsWritten.begin(),
|
|
targetOpts.featuresAsWritten.end(), ",");
|
|
}
|
|
|
|
bool CompilerInstance::setUpTargetMachine() {
|
|
const TargetOptions &targetOpts = getInvocation().getTargetOpts();
|
|
const std::string &theTriple = targetOpts.triple;
|
|
|
|
// Create `Target`
|
|
const llvm::Triple triple(theTriple);
|
|
std::string error;
|
|
const llvm::Target *theTarget =
|
|
llvm::TargetRegistry::lookupTarget(triple, error);
|
|
if (!theTarget) {
|
|
getDiagnostics().Report(clang::diag::err_fe_unable_to_create_target)
|
|
<< error;
|
|
return false;
|
|
}
|
|
// Create `TargetMachine`
|
|
const auto &CGOpts = getInvocation().getCodeGenOpts();
|
|
std::optional<llvm::CodeGenOptLevel> OptLevelOrNone =
|
|
llvm::CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
|
|
assert(OptLevelOrNone && "Invalid optimization level!");
|
|
llvm::CodeGenOptLevel OptLevel = *OptLevelOrNone;
|
|
std::string featuresStr = getTargetFeatures();
|
|
std::optional<llvm::CodeModel::Model> cm = getCodeModel(CGOpts.CodeModel);
|
|
|
|
llvm::TargetOptions tOpts = llvm::TargetOptions();
|
|
tOpts.EnableAIXExtendedAltivecABI = targetOpts.EnableAIXExtendedAltivecABI;
|
|
tOpts.VecLib = convertDriverVectorLibraryToVectorLibrary(CGOpts.getVecLib());
|
|
|
|
targetMachine.reset(theTarget->createTargetMachine(
|
|
triple, /*CPU=*/targetOpts.cpu,
|
|
/*Features=*/featuresStr, /*Options=*/tOpts,
|
|
/*Reloc::Model=*/CGOpts.getRelocationModel(),
|
|
/*CodeModel::Model=*/cm, OptLevel));
|
|
assert(targetMachine && "Failed to create TargetMachine");
|
|
if (cm.has_value()) {
|
|
if ((cm == llvm::CodeModel::Medium || cm == llvm::CodeModel::Large) &&
|
|
triple.getArch() == llvm::Triple::x86_64) {
|
|
targetMachine->setLargeDataThreshold(CGOpts.LargeDataThreshold);
|
|
}
|
|
}
|
|
return true;
|
|
}
|