
f18's module files are Fortran with a leading header comment containing the module file format version and a hash of the following contents. This hash is currently used only to protect module files against corruption and truncation. Extend the use of these hashes to catch or avoid some error cases. When one module file depends upon another, note its hash in additional module file header comments. This allows the compiler to detect when the module dependency is on a module file that has been updated. Further, it allows the compiler to find the right module file dependency when the same module file name appears in multiple directories on the module search path. The order in which module files are written, when multiple modules appear in a source file, is such that every dependency is written before the module(s) that depend upon it, so that their hashes are known. A warning is emitted when a module file is not the first hit on the module file search path. Further work is needed to add a compiler option that emits (larger) stand-alone module files that incorporate copies of their dependencies rather than relying on search paths. This will be desirable for application libraries that want to ship only "top-level" module files without needing to include their dependencies. Another future work item would be to admit multiple modules in the same compilation with the same name if they have distinct hashes.
220 lines
6.6 KiB
C++
220 lines
6.6 KiB
C++
//===-- lib/Parser/source.cpp ---------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Parser/source.h"
|
|
#include "flang/Common/idioms.h"
|
|
#include "flang/Parser/char-buffer.h"
|
|
#include "flang/Parser/characters.h"
|
|
#include "llvm/Support/Errno.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace Fortran::parser {
|
|
|
|
SourceFile::~SourceFile() { Close(); }
|
|
|
|
void SourceFile::RecordLineStarts() {
|
|
if (std::size_t chars{bytes()}; chars > 0) {
|
|
origins_.emplace(1, SourcePositionOrigin{path_, 1});
|
|
const char *source{content().data()};
|
|
CHECK(source[chars - 1] == '\n' && "missing ultimate newline");
|
|
std::size_t at{0};
|
|
do { // "at" is always at the beginning of a source line
|
|
lineStart_.push_back(at);
|
|
at = reinterpret_cast<const char *>(
|
|
std::memchr(source + at, '\n', chars - at)) -
|
|
source + 1;
|
|
} while (at < chars);
|
|
CHECK(at == chars);
|
|
lineStart_.shrink_to_fit();
|
|
}
|
|
}
|
|
|
|
// Check for a Unicode byte order mark (BOM).
|
|
// Module files all have one; so can source files.
|
|
void SourceFile::IdentifyPayload() {
|
|
llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
|
|
constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
|
|
if (content.starts_with(UTF8_BOM)) {
|
|
bom_end_ = UTF8_BOM.size();
|
|
encoding_ = Encoding::UTF_8;
|
|
}
|
|
}
|
|
|
|
std::string DirectoryName(std::string path) {
|
|
llvm::SmallString<128> pathBuf{path};
|
|
llvm::sys::path::remove_filename(pathBuf);
|
|
return pathBuf.str().str();
|
|
}
|
|
|
|
std::optional<std::string> LocateSourceFile(
|
|
std::string name, const std::list<std::string> &searchPath) {
|
|
if (name == "-" || llvm::sys::path::is_absolute(name)) {
|
|
return name;
|
|
}
|
|
for (const std::string &dir : searchPath) {
|
|
llvm::SmallString<128> path{dir};
|
|
llvm::sys::path::append(path, name);
|
|
bool isDir{false};
|
|
auto er = llvm::sys::fs::is_directory(path, isDir);
|
|
if (!er && !isDir) {
|
|
return path.str().str();
|
|
}
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::vector<std::string> LocateSourceFileAll(
|
|
std::string name, const std::vector<std::string> &searchPath) {
|
|
if (name == "-" || llvm::sys::path::is_absolute(name)) {
|
|
return {name};
|
|
}
|
|
std::vector<std::string> result;
|
|
for (const std::string &dir : searchPath) {
|
|
llvm::SmallString<128> path{dir};
|
|
llvm::sys::path::append(path, name);
|
|
bool isDir{false};
|
|
auto er = llvm::sys::fs::is_directory(path, isDir);
|
|
if (!er && !isDir) {
|
|
result.emplace_back(path.str().str());
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
|
|
std::size_t wrote{0};
|
|
char *buffer{buf.data()};
|
|
char *p{buf.data()};
|
|
std::size_t bytes = buf.size();
|
|
while (bytes > 0) {
|
|
void *vp{static_cast<void *>(p)};
|
|
void *crvp{std::memchr(vp, '\r', bytes)};
|
|
char *crcp{static_cast<char *>(crvp)};
|
|
if (!crcp) {
|
|
std::memmove(buffer + wrote, p, bytes);
|
|
wrote += bytes;
|
|
break;
|
|
}
|
|
std::size_t chunk = crcp - p;
|
|
auto advance{chunk + 1};
|
|
if (chunk + 1 >= bytes || crcp[1] == '\n') {
|
|
// CR followed by LF or EOF: omit
|
|
} else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
|
|
// CR preceded by LF or BOF: omit
|
|
} else {
|
|
// CR in line: retain
|
|
++chunk;
|
|
}
|
|
std::memmove(buffer + wrote, p, chunk);
|
|
wrote += chunk;
|
|
p += advance;
|
|
bytes -= advance;
|
|
}
|
|
return wrote;
|
|
}
|
|
|
|
bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
|
|
Close();
|
|
path_ = path;
|
|
std::string errorPath{"'"s + path_ + "'"};
|
|
auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
|
|
if (!bufOr) {
|
|
auto err = bufOr.getError();
|
|
error << "Could not open " << errorPath << ": " << err.message();
|
|
return false;
|
|
}
|
|
buf_ = std::move(bufOr.get());
|
|
ReadFile();
|
|
return true;
|
|
}
|
|
|
|
bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
|
|
Close();
|
|
path_ = "standard input";
|
|
auto buf_or = llvm::MemoryBuffer::getSTDIN();
|
|
if (!buf_or) {
|
|
auto err = buf_or.getError();
|
|
error << err.message();
|
|
return false;
|
|
}
|
|
auto inbuf = std::move(buf_or.get());
|
|
buf_ =
|
|
llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
|
|
llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
|
|
ReadFile();
|
|
return true;
|
|
}
|
|
|
|
void SourceFile::ReadFile() {
|
|
buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
|
|
if (content().size() == 0 || content().back() != '\n') {
|
|
// Don't bother to copy if we have spare memory
|
|
if (content().size() >= buf_->getBufferSize()) {
|
|
auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
|
|
content().size() + 1)};
|
|
llvm::copy(content(), tmp_buf->getBufferStart());
|
|
buf_ = std::move(tmp_buf);
|
|
}
|
|
buf_end_++;
|
|
buf_->getBuffer()[buf_end_ - 1] = '\n';
|
|
}
|
|
IdentifyPayload();
|
|
RecordLineStarts();
|
|
}
|
|
|
|
void SourceFile::Close() {
|
|
path_.clear();
|
|
buf_.reset();
|
|
distinctPaths_.clear();
|
|
origins_.clear();
|
|
}
|
|
|
|
SourcePosition SourceFile::GetSourcePosition(std::size_t at) const {
|
|
CHECK(at < bytes());
|
|
auto it{llvm::upper_bound(lineStart_, at)};
|
|
auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1};
|
|
auto ub{origins_.upper_bound(trueLineNumber)};
|
|
auto column{static_cast<int>(at - lineStart_[trueLineNumber - 1] + 1)};
|
|
if (ub == origins_.begin()) {
|
|
return {*this, path_, static_cast<int>(trueLineNumber), column,
|
|
static_cast<int>(trueLineNumber)};
|
|
} else {
|
|
--ub;
|
|
const SourcePositionOrigin &origin{ub->second};
|
|
auto lineNumber{
|
|
trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)};
|
|
return {*this, origin.path, static_cast<int>(lineNumber), column,
|
|
static_cast<int>(trueLineNumber)};
|
|
}
|
|
}
|
|
|
|
const std::string &SourceFile::SavePath(std::string &&path) {
|
|
return *distinctPaths_.emplace(std::move(path)).first;
|
|
}
|
|
|
|
void SourceFile::LineDirective(
|
|
int trueLineNumber, const std::string &path, int lineNumber) {
|
|
origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber});
|
|
}
|
|
|
|
llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const {
|
|
o << "SourceFile '" << path_ << "'\n";
|
|
for (const auto &[at, spo] : origins_) {
|
|
o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n';
|
|
}
|
|
return o;
|
|
}
|
|
} // namespace Fortran::parser
|