Peter Klausler f7a15e0021
[flang] Use module file hashes for more checking and disambiguation (#80354)
f18's module files are Fortran with a leading header comment containing
the module file format version and a hash of the following contents.
This hash is currently used only to protect module files against
corruption and truncation.

Extend the use of these hashes to catch or avoid some error cases. When
one module file depends upon another, note its hash in additional module
file header comments. This allows the compiler to detect when the module
dependency is on a module file that has been updated. Further, it allows
the compiler to find the right module file dependency when the same
module file name appears in multiple directories on the module search
path.

The order in which module files are written, when multiple modules
appear in a source file, is such that every dependency is written before
the module(s) that depend upon it, so that their hashes are known.

A warning is emitted when a module file is not the first hit on the
module file search path.

Further work is needed to add a compiler option that emits (larger)
stand-alone module files that incorporate copies of their dependencies
rather than relying on search paths. This will be desirable for
application libraries that want to ship only "top-level" module files
without needing to include their dependencies.

Another future work item would be to admit multiple modules in the same
compilation with the same name if they have distinct hashes.
2024-03-01 13:58:36 -08:00

220 lines
6.6 KiB
C++

//===-- lib/Parser/source.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "flang/Parser/source.h"
#include "flang/Common/idioms.h"
#include "flang/Parser/char-buffer.h"
#include "flang/Parser/characters.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstring>
#include <memory>
#include <string>
#include <vector>
namespace Fortran::parser {
SourceFile::~SourceFile() { Close(); }
void SourceFile::RecordLineStarts() {
if (std::size_t chars{bytes()}; chars > 0) {
origins_.emplace(1, SourcePositionOrigin{path_, 1});
const char *source{content().data()};
CHECK(source[chars - 1] == '\n' && "missing ultimate newline");
std::size_t at{0};
do { // "at" is always at the beginning of a source line
lineStart_.push_back(at);
at = reinterpret_cast<const char *>(
std::memchr(source + at, '\n', chars - at)) -
source + 1;
} while (at < chars);
CHECK(at == chars);
lineStart_.shrink_to_fit();
}
}
// Check for a Unicode byte order mark (BOM).
// Module files all have one; so can source files.
void SourceFile::IdentifyPayload() {
llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
if (content.starts_with(UTF8_BOM)) {
bom_end_ = UTF8_BOM.size();
encoding_ = Encoding::UTF_8;
}
}
std::string DirectoryName(std::string path) {
llvm::SmallString<128> pathBuf{path};
llvm::sys::path::remove_filename(pathBuf);
return pathBuf.str().str();
}
std::optional<std::string> LocateSourceFile(
std::string name, const std::list<std::string> &searchPath) {
if (name == "-" || llvm::sys::path::is_absolute(name)) {
return name;
}
for (const std::string &dir : searchPath) {
llvm::SmallString<128> path{dir};
llvm::sys::path::append(path, name);
bool isDir{false};
auto er = llvm::sys::fs::is_directory(path, isDir);
if (!er && !isDir) {
return path.str().str();
}
}
return std::nullopt;
}
std::vector<std::string> LocateSourceFileAll(
std::string name, const std::vector<std::string> &searchPath) {
if (name == "-" || llvm::sys::path::is_absolute(name)) {
return {name};
}
std::vector<std::string> result;
for (const std::string &dir : searchPath) {
llvm::SmallString<128> path{dir};
llvm::sys::path::append(path, name);
bool isDir{false};
auto er = llvm::sys::fs::is_directory(path, isDir);
if (!er && !isDir) {
result.emplace_back(path.str().str());
}
}
return result;
}
std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
std::size_t wrote{0};
char *buffer{buf.data()};
char *p{buf.data()};
std::size_t bytes = buf.size();
while (bytes > 0) {
void *vp{static_cast<void *>(p)};
void *crvp{std::memchr(vp, '\r', bytes)};
char *crcp{static_cast<char *>(crvp)};
if (!crcp) {
std::memmove(buffer + wrote, p, bytes);
wrote += bytes;
break;
}
std::size_t chunk = crcp - p;
auto advance{chunk + 1};
if (chunk + 1 >= bytes || crcp[1] == '\n') {
// CR followed by LF or EOF: omit
} else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
// CR preceded by LF or BOF: omit
} else {
// CR in line: retain
++chunk;
}
std::memmove(buffer + wrote, p, chunk);
wrote += chunk;
p += advance;
bytes -= advance;
}
return wrote;
}
bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
Close();
path_ = path;
std::string errorPath{"'"s + path_ + "'"};
auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
if (!bufOr) {
auto err = bufOr.getError();
error << "Could not open " << errorPath << ": " << err.message();
return false;
}
buf_ = std::move(bufOr.get());
ReadFile();
return true;
}
bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
Close();
path_ = "standard input";
auto buf_or = llvm::MemoryBuffer::getSTDIN();
if (!buf_or) {
auto err = buf_or.getError();
error << err.message();
return false;
}
auto inbuf = std::move(buf_or.get());
buf_ =
llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
ReadFile();
return true;
}
void SourceFile::ReadFile() {
buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
if (content().size() == 0 || content().back() != '\n') {
// Don't bother to copy if we have spare memory
if (content().size() >= buf_->getBufferSize()) {
auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
content().size() + 1)};
llvm::copy(content(), tmp_buf->getBufferStart());
buf_ = std::move(tmp_buf);
}
buf_end_++;
buf_->getBuffer()[buf_end_ - 1] = '\n';
}
IdentifyPayload();
RecordLineStarts();
}
void SourceFile::Close() {
path_.clear();
buf_.reset();
distinctPaths_.clear();
origins_.clear();
}
SourcePosition SourceFile::GetSourcePosition(std::size_t at) const {
CHECK(at < bytes());
auto it{llvm::upper_bound(lineStart_, at)};
auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1};
auto ub{origins_.upper_bound(trueLineNumber)};
auto column{static_cast<int>(at - lineStart_[trueLineNumber - 1] + 1)};
if (ub == origins_.begin()) {
return {*this, path_, static_cast<int>(trueLineNumber), column,
static_cast<int>(trueLineNumber)};
} else {
--ub;
const SourcePositionOrigin &origin{ub->second};
auto lineNumber{
trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)};
return {*this, origin.path, static_cast<int>(lineNumber), column,
static_cast<int>(trueLineNumber)};
}
}
const std::string &SourceFile::SavePath(std::string &&path) {
return *distinctPaths_.emplace(std::move(path)).first;
}
void SourceFile::LineDirective(
int trueLineNumber, const std::string &path, int lineNumber) {
origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber});
}
llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const {
o << "SourceFile '" << path_ << "'\n";
for (const auto &[at, spo] : origins_) {
o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n';
}
return o;
}
} // namespace Fortran::parser