This is the implementation of part of F2023 new feature US 03. Extracting tokens from a string, SPLIT intrinsic. It's section 16.9.196 SPLIT (STRING, SET, POS [, BACK]) of Fortran 2023 Standard. It's part of Flang issue [#178044](https://github.com/llvm/llvm-project/issues/178044). Note that I work with @kwyatt-ext on this issue. He implemented the other part, TOKENIZE. A test will be added into [llvm-test-suite](https://github.com/llvm/llvm-test-suite) later after this PR is merged.
1432 lines
49 KiB
C++
1432 lines
49 KiB
C++
//===-- lib/runtime/character.cpp -------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Runtime/character.h"
|
|
#include "flang-rt/runtime/descriptor.h"
|
|
#include "flang-rt/runtime/terminator.h"
|
|
#include "flang-rt/runtime/tools.h"
|
|
#include "flang/Common/bit-population-count.h"
|
|
#include "flang/Common/uint128.h"
|
|
#include "flang/Runtime/character.h"
|
|
#include "flang/Runtime/cpp-type.h"
|
|
#include "flang/Runtime/freestanding-tools.h"
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
|
|
namespace Fortran::runtime {
|
|
|
|
template <typename CHAR>
|
|
inline RT_API_ATTRS int CompareToBlankPadding(
|
|
const CHAR *x, std::size_t chars) {
|
|
using UNSIGNED_CHAR = std::make_unsigned_t<CHAR>;
|
|
const auto blank{static_cast<UNSIGNED_CHAR>(' ')};
|
|
for (; chars-- > 0; ++x) {
|
|
const UNSIGNED_CHAR ux{*reinterpret_cast<const UNSIGNED_CHAR *>(x)};
|
|
if (ux < blank) {
|
|
return -1;
|
|
}
|
|
if (ux > blank) {
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
RT_OFFLOAD_API_GROUP_BEGIN
|
|
|
|
template <typename CHAR>
|
|
RT_API_ATTRS int CharacterScalarCompare(
|
|
const CHAR *x, const CHAR *y, std::size_t xChars, std::size_t yChars) {
|
|
auto minChars{std::min(xChars, yChars)};
|
|
if constexpr (sizeof(CHAR) == 1) {
|
|
// don't use for kind=2 or =4, that would fail on little-endian machines
|
|
int cmp{Fortran::runtime::memcmp(x, y, minChars)};
|
|
if (cmp < 0) {
|
|
return -1;
|
|
}
|
|
if (cmp > 0) {
|
|
return 1;
|
|
}
|
|
if (xChars == yChars) {
|
|
return 0;
|
|
}
|
|
x += minChars;
|
|
y += minChars;
|
|
} else {
|
|
for (std::size_t n{minChars}; n-- > 0; ++x, ++y) {
|
|
if (*x < *y) {
|
|
return -1;
|
|
}
|
|
if (*x > *y) {
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
if (int cmp{CompareToBlankPadding(x, xChars - minChars)}) {
|
|
return cmp;
|
|
}
|
|
return -CompareToBlankPadding(y, yChars - minChars);
|
|
}
|
|
|
|
template RT_API_ATTRS int CharacterScalarCompare<char>(
|
|
const char *x, const char *y, std::size_t xChars, std::size_t yChars);
|
|
template RT_API_ATTRS int CharacterScalarCompare<char16_t>(const char16_t *x,
|
|
const char16_t *y, std::size_t xChars, std::size_t yChars);
|
|
template RT_API_ATTRS int CharacterScalarCompare<char32_t>(const char32_t *x,
|
|
const char32_t *y, std::size_t xChars, std::size_t yChars);
|
|
|
|
RT_OFFLOAD_API_GROUP_END
|
|
|
|
// Shift count to use when converting between character lengths
|
|
// and byte counts.
|
|
template <typename CHAR>
|
|
constexpr int shift{common::TrailingZeroBitCount(sizeof(CHAR))};
|
|
|
|
template <typename CHAR>
|
|
static RT_API_ATTRS void Compare(Descriptor &result, const Descriptor &x,
|
|
const Descriptor &y, const Terminator &terminator) {
|
|
RUNTIME_CHECK(
|
|
terminator, x.rank() == y.rank() || x.rank() == 0 || y.rank() == 0);
|
|
int rank{std::max(x.rank(), y.rank())};
|
|
SubscriptValue ub[maxRank], xAt[maxRank], yAt[maxRank];
|
|
SubscriptValue elements{1};
|
|
for (int j{0}; j < rank; ++j) {
|
|
if (x.rank() > 0 && y.rank() > 0) {
|
|
SubscriptValue xUB{x.GetDimension(j).Extent()};
|
|
SubscriptValue yUB{y.GetDimension(j).Extent()};
|
|
if (xUB != yUB) {
|
|
terminator.Crash("Character array comparison: operands are not "
|
|
"conforming on dimension %d (%jd != %jd)",
|
|
j + 1, static_cast<std::intmax_t>(xUB),
|
|
static_cast<std::intmax_t>(yUB));
|
|
}
|
|
ub[j] = xUB;
|
|
} else {
|
|
ub[j] = (x.rank() ? x : y).GetDimension(j).Extent();
|
|
}
|
|
elements *= ub[j];
|
|
}
|
|
x.GetLowerBounds(xAt);
|
|
y.GetLowerBounds(yAt);
|
|
result.Establish(
|
|
TypeCategory::Logical, 1, nullptr, rank, ub, CFI_attribute_allocatable);
|
|
for (int j{0}; j < rank; ++j) {
|
|
result.GetDimension(j).SetBounds(1, ub[j]);
|
|
}
|
|
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash("Compare: could not allocate storage for result");
|
|
}
|
|
std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
|
|
std::size_t yChars{y.ElementBytes() >> shift<char>};
|
|
for (SubscriptValue resultAt{0}; elements-- > 0;
|
|
++resultAt, x.IncrementSubscripts(xAt), y.IncrementSubscripts(yAt)) {
|
|
*result.OffsetElement<char>(resultAt) = CharacterScalarCompare<CHAR>(
|
|
x.Element<CHAR>(xAt), y.Element<CHAR>(yAt), xChars, yChars);
|
|
}
|
|
}
|
|
|
|
template <typename CHAR, bool ADJUSTR>
|
|
static RT_API_ATTRS void Adjust(CHAR *to, const CHAR *from, std::size_t chars) {
|
|
if constexpr (ADJUSTR) {
|
|
std::size_t j{chars}, k{chars};
|
|
for (; k > 0 && from[k - 1] == ' '; --k) {
|
|
}
|
|
while (k > 0) {
|
|
to[--j] = from[--k];
|
|
}
|
|
while (j > 0) {
|
|
to[--j] = ' ';
|
|
}
|
|
} else { // ADJUSTL
|
|
std::size_t j{0}, k{0};
|
|
for (; k < chars && from[k] == ' '; ++k) {
|
|
}
|
|
while (k < chars) {
|
|
to[j++] = from[k++];
|
|
}
|
|
while (j < chars) {
|
|
to[j++] = ' ';
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename CHAR, bool ADJUSTR>
|
|
static RT_API_ATTRS void AdjustLRHelper(Descriptor &result,
|
|
const Descriptor &string, const Terminator &terminator) {
|
|
int rank{string.rank()};
|
|
SubscriptValue ub[maxRank], stringAt[maxRank];
|
|
SubscriptValue elements{1};
|
|
for (int j{0}; j < rank; ++j) {
|
|
ub[j] = string.GetDimension(j).Extent();
|
|
elements *= ub[j];
|
|
stringAt[j] = 1;
|
|
}
|
|
string.GetLowerBounds(stringAt);
|
|
std::size_t elementBytes{string.ElementBytes()};
|
|
result.Establish(string.type(), elementBytes, nullptr, rank, ub,
|
|
CFI_attribute_allocatable);
|
|
for (int j{0}; j < rank; ++j) {
|
|
result.GetDimension(j).SetBounds(1, ub[j]);
|
|
}
|
|
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash("ADJUSTL/R: could not allocate storage for result");
|
|
}
|
|
for (SubscriptValue resultAt{0}; elements-- > 0;
|
|
resultAt += elementBytes, string.IncrementSubscripts(stringAt)) {
|
|
Adjust<CHAR, ADJUSTR>(result.OffsetElement<CHAR>(resultAt),
|
|
string.Element<const CHAR>(stringAt), elementBytes >> shift<CHAR>);
|
|
}
|
|
}
|
|
|
|
template <bool ADJUSTR>
|
|
RT_API_ATTRS void AdjustLR(Descriptor &result, const Descriptor &string,
|
|
const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
switch (string.raw().type) {
|
|
case CFI_type_char:
|
|
AdjustLRHelper<char, ADJUSTR>(result, string, terminator);
|
|
break;
|
|
case CFI_type_char16_t:
|
|
AdjustLRHelper<char16_t, ADJUSTR>(result, string, terminator);
|
|
break;
|
|
case CFI_type_char32_t:
|
|
AdjustLRHelper<char32_t, ADJUSTR>(result, string, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash("ADJUSTL/R: bad string type code %d",
|
|
static_cast<int>(string.raw().type));
|
|
}
|
|
}
|
|
|
|
template <typename CHAR>
|
|
inline RT_API_ATTRS std::size_t LenTrim(const CHAR *x, std::size_t chars) {
|
|
while (chars > 0 && x[chars - 1] == ' ') {
|
|
--chars;
|
|
}
|
|
return chars;
|
|
}
|
|
|
|
template <typename INT, typename CHAR>
|
|
static RT_API_ATTRS void LenTrim(Descriptor &result, const Descriptor &string,
|
|
const Terminator &terminator) {
|
|
int rank{string.rank()};
|
|
SubscriptValue ub[maxRank], stringAt[maxRank];
|
|
SubscriptValue elements{1};
|
|
for (int j{0}; j < rank; ++j) {
|
|
ub[j] = string.GetDimension(j).Extent();
|
|
elements *= ub[j];
|
|
}
|
|
string.GetLowerBounds(stringAt);
|
|
result.Establish(TypeCategory::Integer, sizeof(INT), nullptr, rank, ub,
|
|
CFI_attribute_allocatable);
|
|
for (int j{0}; j < rank; ++j) {
|
|
result.GetDimension(j).SetBounds(1, ub[j]);
|
|
}
|
|
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash("LEN_TRIM: could not allocate storage for result");
|
|
}
|
|
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
|
|
for (SubscriptValue resultAt{0}; elements-- > 0;
|
|
resultAt += sizeof(INT), string.IncrementSubscripts(stringAt)) {
|
|
*result.OffsetElement<INT>(resultAt) =
|
|
LenTrim(string.Element<CHAR>(stringAt), stringElementChars);
|
|
}
|
|
}
|
|
|
|
template <typename CHAR>
|
|
static RT_API_ATTRS void LenTrimKind(Descriptor &result,
|
|
const Descriptor &string, int kind, const Terminator &terminator) {
|
|
switch (kind) {
|
|
case 1:
|
|
LenTrim<CppTypeFor<TypeCategory::Integer, 1>, CHAR>(
|
|
result, string, terminator);
|
|
break;
|
|
case 2:
|
|
LenTrim<CppTypeFor<TypeCategory::Integer, 2>, CHAR>(
|
|
result, string, terminator);
|
|
break;
|
|
case 4:
|
|
LenTrim<CppTypeFor<TypeCategory::Integer, 4>, CHAR>(
|
|
result, string, terminator);
|
|
break;
|
|
case 8:
|
|
LenTrim<CppTypeFor<TypeCategory::Integer, 8>, CHAR>(
|
|
result, string, terminator);
|
|
break;
|
|
case 16:
|
|
LenTrim<CppTypeFor<TypeCategory::Integer, 16>, CHAR>(
|
|
result, string, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash(
|
|
"not yet implemented: CHARACTER(KIND=%d) in LEN_TRIM intrinsic", kind);
|
|
}
|
|
}
|
|
|
|
// INDEX implementation
|
|
template <typename CHAR>
|
|
inline RT_API_ATTRS std::size_t Index(const CHAR *x, std::size_t xLen,
|
|
const CHAR *want, std::size_t wantLen, bool back) {
|
|
if (xLen < wantLen) {
|
|
return 0;
|
|
}
|
|
if (xLen == 0) {
|
|
return 1; // wantLen is also 0, so trivial match
|
|
}
|
|
if (back) {
|
|
// If wantLen==0, returns xLen + 1 per standard (and all other compilers)
|
|
std::size_t at{xLen - wantLen + 1};
|
|
for (; at > 0; --at) {
|
|
std::size_t j{1};
|
|
for (; j <= wantLen; ++j) {
|
|
if (x[at + j - 2] != want[j - 1]) {
|
|
break;
|
|
}
|
|
}
|
|
if (j > wantLen) {
|
|
return at;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
if (wantLen == 1) {
|
|
// Trivial case for single character lookup.
|
|
// We can use simple forward search.
|
|
CHAR ch{want[0]};
|
|
if constexpr (std::is_same_v<CHAR, char>) {
|
|
if (auto pos{reinterpret_cast<const CHAR *>(
|
|
Fortran::runtime::memchr(x, ch, xLen))}) {
|
|
return pos - x + 1;
|
|
}
|
|
} else {
|
|
for (std::size_t at{0}; at < xLen; ++at) {
|
|
if (x[at] == ch) {
|
|
return at + 1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
// Non-trivial forward substring search: use a simplified form of
|
|
// Boyer-Moore substring searching.
|
|
for (std::size_t at{1}; at + wantLen - 1 <= xLen;) {
|
|
// Compare x(at:at+wantLen-1) with want(1:wantLen).
|
|
// The comparison proceeds from the ends of the substrings forward
|
|
// so that we can skip ahead by multiple positions on a miss.
|
|
std::size_t j{wantLen};
|
|
CHAR ch;
|
|
for (; j > 0; --j) {
|
|
ch = x[at + j - 2];
|
|
if (ch != want[j - 1]) {
|
|
break;
|
|
}
|
|
}
|
|
if (j == 0) {
|
|
return at; // found a match
|
|
}
|
|
// Suppose we have at==2:
|
|
// "THAT FORTRAN THAT I RAN" <- the string (x) in which we search
|
|
// "THAT I RAN" <- the string (want) for which we search
|
|
// ^------------------ j==7, ch=='T'
|
|
// We can shift ahead 3 positions to at==5 to align the 'T's:
|
|
// "THAT FORTRAN THAT I RAN"
|
|
// "THAT I RAN"
|
|
std::size_t shift{1};
|
|
for (; shift < j; ++shift) {
|
|
if (want[j - shift - 1] == ch) {
|
|
break;
|
|
}
|
|
}
|
|
at += shift;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// SCAN and VERIFY implementation help. These intrinsic functions
|
|
// do pretty much the same thing, so they're templatized with a
|
|
// distinguishing flag.
|
|
|
|
enum class CharFunc { Index, Scan, Verify };
|
|
|
|
template <typename CHAR, CharFunc FUNC>
|
|
inline RT_API_ATTRS std::size_t ScanVerify(const CHAR *x, std::size_t xLen,
|
|
const CHAR *set, std::size_t setLen, bool back) {
|
|
std::size_t at{back ? xLen : 1};
|
|
int increment{back ? -1 : 1};
|
|
for (; xLen-- > 0; at += increment) {
|
|
CHAR ch{x[at - 1]};
|
|
bool inSet{false};
|
|
// TODO: If set is sorted, could use binary search
|
|
for (std::size_t j{0}; j < setLen; ++j) {
|
|
if (set[j] == ch) {
|
|
inSet = true;
|
|
break;
|
|
}
|
|
}
|
|
if (inSet != (FUNC == CharFunc::Verify)) {
|
|
return at;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Specialization for one-byte characters
|
|
template <bool IS_VERIFY = false>
|
|
inline RT_API_ATTRS std::size_t ScanVerify(const char *x, std::size_t xLen,
|
|
const char *set, std::size_t setLen, bool back) {
|
|
std::size_t at{back ? xLen : 1};
|
|
int increment{back ? -1 : 1};
|
|
if (xLen > 0) {
|
|
std::uint64_t bitSet[256 / 64]{0};
|
|
std::uint64_t one{1};
|
|
for (std::size_t j{0}; j < setLen; ++j) {
|
|
unsigned setCh{static_cast<unsigned char>(set[j])};
|
|
bitSet[setCh / 64] |= one << (setCh % 64);
|
|
}
|
|
for (; xLen-- > 0; at += increment) {
|
|
unsigned ch{static_cast<unsigned char>(x[at - 1])};
|
|
bool inSet{((bitSet[ch / 64] >> (ch % 64)) & 1) != 0};
|
|
if (inSet != IS_VERIFY) {
|
|
return at;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
template <typename INT, typename CHAR, CharFunc FUNC>
|
|
static RT_API_ATTRS void GeneralCharFunc(Descriptor &result,
|
|
const Descriptor &string, const Descriptor &arg, const Descriptor *back,
|
|
const Terminator &terminator) {
|
|
int rank{string.rank() ? string.rank()
|
|
: arg.rank() ? arg.rank()
|
|
: back ? back->rank()
|
|
: 0};
|
|
SubscriptValue ub[maxRank], stringAt[maxRank], argAt[maxRank],
|
|
backAt[maxRank];
|
|
SubscriptValue elements{1};
|
|
for (int j{0}; j < rank; ++j) {
|
|
ub[j] = string.rank() ? string.GetDimension(j).Extent()
|
|
: arg.rank() ? arg.GetDimension(j).Extent()
|
|
: back ? back->GetDimension(j).Extent()
|
|
: 1;
|
|
elements *= ub[j];
|
|
}
|
|
string.GetLowerBounds(stringAt);
|
|
arg.GetLowerBounds(argAt);
|
|
if (back) {
|
|
back->GetLowerBounds(backAt);
|
|
}
|
|
result.Establish(TypeCategory::Integer, sizeof(INT), nullptr, rank, ub,
|
|
CFI_attribute_allocatable);
|
|
for (int j{0}; j < rank; ++j) {
|
|
result.GetDimension(j).SetBounds(1, ub[j]);
|
|
}
|
|
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash(
|
|
"INDEX/SCAN/VERIFY: could not allocate storage for result");
|
|
}
|
|
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
|
|
std::size_t argElementChars{arg.ElementBytes() >> shift<CHAR>};
|
|
for (SubscriptValue resultAt{0}; elements-- > 0; resultAt += sizeof(INT),
|
|
string.IncrementSubscripts(stringAt), arg.IncrementSubscripts(argAt),
|
|
back && back->IncrementSubscripts(backAt)) {
|
|
if constexpr (FUNC == CharFunc::Index) {
|
|
*result.OffsetElement<INT>(resultAt) =
|
|
Index<CHAR>(string.Element<CHAR>(stringAt), stringElementChars,
|
|
arg.Element<CHAR>(argAt), argElementChars,
|
|
back && IsLogicalElementTrue(*back, backAt));
|
|
} else if constexpr (FUNC == CharFunc::Scan) {
|
|
*result.OffsetElement<INT>(resultAt) =
|
|
ScanVerify<CHAR, CharFunc::Scan>(string.Element<CHAR>(stringAt),
|
|
stringElementChars, arg.Element<CHAR>(argAt), argElementChars,
|
|
back && IsLogicalElementTrue(*back, backAt));
|
|
} else if constexpr (FUNC == CharFunc::Verify) {
|
|
*result.OffsetElement<INT>(resultAt) =
|
|
ScanVerify<CHAR, CharFunc::Verify>(string.Element<CHAR>(stringAt),
|
|
stringElementChars, arg.Element<CHAR>(argAt), argElementChars,
|
|
back && IsLogicalElementTrue(*back, backAt));
|
|
} else {
|
|
static_assert(FUNC == CharFunc::Index || FUNC == CharFunc::Scan ||
|
|
FUNC == CharFunc::Verify);
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename CHAR, CharFunc FUNC>
|
|
static RT_API_ATTRS void GeneralCharFuncKind(Descriptor &result,
|
|
const Descriptor &string, const Descriptor &arg, const Descriptor *back,
|
|
int kind, const Terminator &terminator) {
|
|
switch (kind) {
|
|
case 1:
|
|
GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 1>, CHAR, FUNC>(
|
|
result, string, arg, back, terminator);
|
|
break;
|
|
case 2:
|
|
GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 2>, CHAR, FUNC>(
|
|
result, string, arg, back, terminator);
|
|
break;
|
|
case 4:
|
|
GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 4>, CHAR, FUNC>(
|
|
result, string, arg, back, terminator);
|
|
break;
|
|
case 8:
|
|
GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 8>, CHAR, FUNC>(
|
|
result, string, arg, back, terminator);
|
|
break;
|
|
case 16:
|
|
GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 16>, CHAR, FUNC>(
|
|
result, string, arg, back, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash("not yet implemented: CHARACTER(KIND=%d) in "
|
|
"INDEX/SCAN/VERIFY intrinsic",
|
|
kind);
|
|
}
|
|
}
|
|
|
|
template <typename CHAR, bool ISMIN>
|
|
static RT_API_ATTRS void MaxMinHelper(Descriptor &accumulator,
|
|
const Descriptor &x, const Terminator &terminator) {
|
|
RUNTIME_CHECK(terminator,
|
|
accumulator.rank() == 0 || x.rank() == 0 ||
|
|
accumulator.rank() == x.rank());
|
|
SubscriptValue ub[maxRank], xAt[maxRank];
|
|
SubscriptValue elements{1};
|
|
std::size_t accumChars{accumulator.ElementBytes() >> shift<CHAR>};
|
|
std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
|
|
std::size_t chars{std::max(accumChars, xChars)};
|
|
bool reallocate{accumulator.raw().base_addr == nullptr ||
|
|
accumChars != chars || (accumulator.rank() == 0 && x.rank() > 0)};
|
|
int rank{std::max(accumulator.rank(), x.rank())};
|
|
for (int j{0}; j < rank; ++j) {
|
|
if (x.rank() > 0) {
|
|
ub[j] = x.GetDimension(j).Extent();
|
|
if (accumulator.rank() > 0) {
|
|
SubscriptValue accumExt{accumulator.GetDimension(j).Extent()};
|
|
if (accumExt != ub[j]) {
|
|
terminator.Crash("Character MAX/MIN: operands are not "
|
|
"conforming on dimension %d (%jd != %jd)",
|
|
j + 1, static_cast<std::intmax_t>(accumExt),
|
|
static_cast<std::intmax_t>(ub[j]));
|
|
}
|
|
}
|
|
} else {
|
|
ub[j] = accumulator.GetDimension(j).Extent();
|
|
}
|
|
elements *= ub[j];
|
|
}
|
|
x.GetLowerBounds(xAt);
|
|
void *old{nullptr};
|
|
const CHAR *accumData{accumulator.OffsetElement<CHAR>()};
|
|
if (reallocate) {
|
|
old = accumulator.raw().base_addr;
|
|
accumulator.set_base_addr(nullptr);
|
|
accumulator.raw().elem_len = chars << shift<CHAR>;
|
|
for (int j{0}; j < rank; ++j) {
|
|
accumulator.GetDimension(j).SetBounds(1, ub[j]);
|
|
}
|
|
RUNTIME_CHECK(
|
|
terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
|
}
|
|
for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
|
|
accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
|
|
const CHAR *xData{x.Element<CHAR>(xAt)};
|
|
int cmp{CharacterScalarCompare(accumData, xData, accumChars, xChars)};
|
|
if constexpr (ISMIN) {
|
|
cmp = -cmp;
|
|
}
|
|
if (cmp < 0) {
|
|
CopyAndPad(result, xData, chars, xChars);
|
|
} else if (result != accumData) {
|
|
CopyAndPad(result, accumData, chars, accumChars);
|
|
}
|
|
}
|
|
FreeMemory(old);
|
|
}
|
|
|
|
template <bool ISMIN>
|
|
static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x,
|
|
const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
RUNTIME_CHECK(terminator, accumulator.raw().type == x.raw().type);
|
|
switch (accumulator.raw().type) {
|
|
case CFI_type_char:
|
|
MaxMinHelper<char, ISMIN>(accumulator, x, terminator);
|
|
break;
|
|
case CFI_type_char16_t:
|
|
MaxMinHelper<char16_t, ISMIN>(accumulator, x, terminator);
|
|
break;
|
|
case CFI_type_char32_t:
|
|
MaxMinHelper<char32_t, ISMIN>(accumulator, x, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash(
|
|
"Character MAX/MIN: result does not have a character type");
|
|
}
|
|
}
|
|
|
|
template <typename CHAR>
|
|
static inline RT_API_ATTRS bool TokenizeIsInSet(
|
|
CHAR ch, const CHAR *set, std::size_t setChars) {
|
|
for (std::size_t j{0}; j < setChars; ++j) {
|
|
if (set[j] == ch) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Pad the token with spaces.
|
|
template <typename CHAR>
|
|
static inline RT_API_ATTRS void TokenizeFillBlanks(
|
|
CHAR *to, std::size_t chars) {
|
|
if (chars == 0) {
|
|
return;
|
|
}
|
|
if constexpr (std::is_same_v<CHAR, char>) {
|
|
runtime::memset(to, ' ', chars);
|
|
} else {
|
|
for (std::size_t j{0}; j < chars; ++j) {
|
|
to[j] = static_cast<CHAR>(' ');
|
|
}
|
|
}
|
|
}
|
|
|
|
struct TokenizeAnalysis {
|
|
std::size_t tokenCount{0};
|
|
std::size_t maxTokenLen{0}; // in characters
|
|
};
|
|
|
|
template <typename CHAR>
|
|
static RT_API_ATTRS TokenizeAnalysis AnalyzeTokenize(const CHAR *str,
|
|
std::size_t strChars, const CHAR *set, std::size_t setChars) {
|
|
TokenizeAnalysis analysis;
|
|
|
|
// Empty STRING should return one empty token, per Fortran standard.
|
|
if (strChars == 0) {
|
|
analysis.tokenCount = 1;
|
|
analysis.maxTokenLen = 0;
|
|
return analysis;
|
|
}
|
|
if (setChars == 0) {
|
|
analysis.tokenCount = 1;
|
|
analysis.maxTokenLen = strChars;
|
|
return analysis;
|
|
}
|
|
|
|
// Split STRING at each delimiter character. This produces empty tokens
|
|
// when delimiters are consecutive or when STRING starts/ends with a
|
|
// delimiter.
|
|
std::size_t tokenStart{0};
|
|
for (std::size_t pos{0}; pos < strChars; ++pos) {
|
|
if (TokenizeIsInSet(str[pos], set, setChars)) {
|
|
analysis.maxTokenLen = std::max(analysis.maxTokenLen, pos - tokenStart);
|
|
analysis.tokenCount++;
|
|
tokenStart = pos + 1;
|
|
}
|
|
}
|
|
analysis.maxTokenLen = std::max(analysis.maxTokenLen, strChars - tokenStart);
|
|
analysis.tokenCount++;
|
|
|
|
return analysis;
|
|
}
|
|
// Allocates and populates the result arrays for TOKENIZE Form 1.
|
|
template <typename CHAR>
|
|
static RT_API_ATTRS void TokenizeFillForm1(Descriptor &tokens,
|
|
Descriptor *separator, const Descriptor &string, const CHAR *str,
|
|
std::size_t strChars, const CHAR *set, std::size_t setChars,
|
|
const TokenizeAnalysis &analysis, Terminator &terminator) {
|
|
|
|
// (Re)allocate TOKENS.
|
|
if (tokens.IsAllocated()) {
|
|
tokens.Deallocate();
|
|
}
|
|
SubscriptValue tokensExtent[1]{
|
|
static_cast<SubscriptValue>(analysis.tokenCount)};
|
|
std::size_t tokenElemBytes{
|
|
analysis.tokenCount == 0 ? 0 : analysis.maxTokenLen * sizeof(CHAR)};
|
|
tokens.Establish(string.type(), tokenElemBytes, nullptr, 1, tokensExtent,
|
|
CFI_attribute_allocatable);
|
|
tokens.GetDimension(0).SetBounds(1, tokensExtent[0]);
|
|
if (tokens.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash("TOKENIZE: could not allocate TOKENS array");
|
|
}
|
|
|
|
// (Re)allocate SEPARATOR if present.
|
|
std::size_t sepCount{analysis.tokenCount > 0 ? analysis.tokenCount - 1 : 0};
|
|
std::size_t sepElemBytes{sizeof(CHAR)};
|
|
if (separator) {
|
|
if (separator->IsAllocated()) {
|
|
separator->Deallocate();
|
|
}
|
|
SubscriptValue sepExtent[1]{static_cast<SubscriptValue>(sepCount)};
|
|
separator->Establish(string.type(), sepElemBytes, nullptr, 1, sepExtent,
|
|
CFI_attribute_allocatable);
|
|
separator->GetDimension(0).SetBounds(1, sepExtent[0]);
|
|
if (separator->Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash("TOKENIZE: could not allocate SEPARATOR array");
|
|
}
|
|
}
|
|
|
|
if (analysis.tokenCount == 0) {
|
|
return;
|
|
}
|
|
|
|
// Populate tokens and separators.
|
|
if (setChars == 0) {
|
|
// One token (possibly empty) equal to STRING.
|
|
if (tokenElemBytes > 0) {
|
|
CHAR *tokDest{tokens.OffsetElement<CHAR>(0)};
|
|
if (strChars > 0) {
|
|
runtime::memcpy(tokDest, str, strChars * sizeof(CHAR));
|
|
}
|
|
TokenizeFillBlanks(tokDest + strChars, analysis.maxTokenLen - strChars);
|
|
}
|
|
return;
|
|
}
|
|
|
|
std::size_t tokenIndex{0};
|
|
std::size_t sepIndex{0};
|
|
|
|
auto storeToken = [&](std::size_t tokenStart, std::size_t tokenEnd) {
|
|
std::size_t tokenLen{tokenEnd - tokenStart};
|
|
if (tokenElemBytes > 0) {
|
|
// Each element is stored in a fixed-size slot of `tokenElemBytes`.
|
|
CHAR *tokDest{tokens.OffsetElement<CHAR>(tokenIndex * tokenElemBytes)};
|
|
if (tokenLen > 0) {
|
|
runtime::memcpy(tokDest, str + tokenStart, tokenLen * sizeof(CHAR));
|
|
}
|
|
TokenizeFillBlanks(tokDest + tokenLen, analysis.maxTokenLen - tokenLen);
|
|
}
|
|
++tokenIndex;
|
|
};
|
|
|
|
// Split at each delimiter character, producing empty tokens at boundaries
|
|
// and between consecutive delimiters.
|
|
std::size_t tokenStart{0};
|
|
for (std::size_t pos{0}; pos < strChars; ++pos) {
|
|
if (TokenizeIsInSet(str[pos], set, setChars)) {
|
|
storeToken(tokenStart, pos);
|
|
if (separator) {
|
|
CHAR *sepDest{separator->OffsetElement<CHAR>(sepIndex * sepElemBytes)};
|
|
sepDest[0] = str[pos];
|
|
++sepIndex;
|
|
}
|
|
tokenStart = pos + 1;
|
|
}
|
|
}
|
|
storeToken(tokenStart, strChars);
|
|
}
|
|
|
|
template <int KIND>
|
|
static RT_API_ATTRS void TokenizeStoreIntAt(
|
|
const Descriptor &result, std::size_t at, std::int64_t value) {
|
|
StoreIntegerAt<KIND>{}(result, at, value);
|
|
}
|
|
|
|
using TokenizeStoreIntFn = void (*)(
|
|
const Descriptor &, std::size_t, std::int64_t);
|
|
|
|
static RT_API_ATTRS TokenizeStoreIntFn GetTokenizeStoreIntFn(
|
|
int kind, Terminator &terminator, const char *which) {
|
|
switch (kind) {
|
|
case 1:
|
|
return &TokenizeStoreIntAt<1>;
|
|
case 2:
|
|
return &TokenizeStoreIntAt<2>;
|
|
case 4:
|
|
return &TokenizeStoreIntAt<4>;
|
|
case 8:
|
|
return &TokenizeStoreIntAt<8>;
|
|
case 16:
|
|
return &TokenizeStoreIntAt<16>;
|
|
default:
|
|
terminator.Crash(
|
|
"TOKENIZE: unsupported INTEGER kind=%d for %s", kind, which);
|
|
}
|
|
}
|
|
|
|
template <typename CHAR>
|
|
static RT_API_ATTRS void TokenizeFillPositions(Descriptor &first,
|
|
Descriptor &last, const CHAR *str, std::size_t strChars, const CHAR *set,
|
|
std::size_t setChars, TokenizeStoreIntFn storeFirst,
|
|
TokenizeStoreIntFn storeLast, Terminator &terminator) {
|
|
|
|
// Empty STRING should return one empty token, per Fortran standard.
|
|
if (strChars == 0) {
|
|
storeFirst(first, 0, 1);
|
|
storeLast(last, 0, 0);
|
|
return;
|
|
}
|
|
if (setChars == 0) {
|
|
storeFirst(first, 0, 1);
|
|
storeLast(last, 0, static_cast<std::int64_t>(strChars));
|
|
return;
|
|
}
|
|
|
|
std::size_t tokenIndex{0};
|
|
std::size_t tokenStart{0};
|
|
for (std::size_t pos{0}; pos < strChars; ++pos) {
|
|
if (TokenizeIsInSet(str[pos], set, setChars)) {
|
|
storeFirst(first, tokenIndex, static_cast<std::int64_t>(tokenStart + 1));
|
|
storeLast(last, tokenIndex, static_cast<std::int64_t>(pos));
|
|
++tokenIndex;
|
|
tokenStart = pos + 1;
|
|
}
|
|
}
|
|
storeFirst(first, tokenIndex, static_cast<std::int64_t>(tokenStart + 1));
|
|
storeLast(last, tokenIndex, static_cast<std::int64_t>(strChars));
|
|
++tokenIndex;
|
|
|
|
// Sanity check: we should have filled exactly the allocated extent.
|
|
if (tokenIndex != static_cast<std::size_t>(first.GetDimension(0).Extent())) {
|
|
terminator.Crash("TOKENIZE: internal error populating FIRST/LAST");
|
|
}
|
|
}
|
|
|
|
// Tokenize Form 1 implementation.
|
|
static RT_API_ATTRS void TokenizeImpl(Descriptor &tokens, Descriptor *separator,
|
|
const Descriptor &string, const Descriptor &set, Terminator &terminator) {
|
|
RUNTIME_CHECK(terminator, string.rank() == 0);
|
|
RUNTIME_CHECK(terminator, set.rank() == 0);
|
|
RUNTIME_CHECK(terminator, string.raw().type == set.raw().type);
|
|
RUNTIME_CHECK(terminator, tokens.rank() == 1);
|
|
RUNTIME_CHECK(terminator, tokens.IsAllocatable());
|
|
if (separator) {
|
|
RUNTIME_CHECK(terminator, separator->rank() == 1);
|
|
RUNTIME_CHECK(terminator, separator->IsAllocatable());
|
|
}
|
|
|
|
switch (string.raw().type) {
|
|
case CFI_type_char: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes};
|
|
std::size_t setChars{setBytes};
|
|
const char *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char>()};
|
|
const char *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char>()};
|
|
auto analysis{AnalyzeTokenize(str, strChars, setPtr, setChars)};
|
|
TokenizeFillForm1(tokens, separator, string, str, strChars, setPtr,
|
|
setChars, analysis, terminator);
|
|
break;
|
|
}
|
|
case CFI_type_char16_t: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes >> 1};
|
|
std::size_t setChars{setBytes >> 1};
|
|
const char16_t *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char16_t>()};
|
|
const char16_t *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char16_t>()};
|
|
auto analysis{AnalyzeTokenize(str, strChars, setPtr, setChars)};
|
|
TokenizeFillForm1(tokens, separator, string, str, strChars, setPtr,
|
|
setChars, analysis, terminator);
|
|
break;
|
|
}
|
|
case CFI_type_char32_t: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes >> 2};
|
|
std::size_t setChars{setBytes >> 2};
|
|
const char32_t *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char32_t>()};
|
|
const char32_t *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char32_t>()};
|
|
auto analysis{AnalyzeTokenize(str, strChars, setPtr, setChars)};
|
|
TokenizeFillForm1(tokens, separator, string, str, strChars, setPtr,
|
|
setChars, analysis, terminator);
|
|
break;
|
|
}
|
|
default:
|
|
terminator.Crash("TOKENIZE: bad string type code %d",
|
|
static_cast<int>(string.raw().type));
|
|
}
|
|
}
|
|
|
|
// Tokenize Form 2 implementation.
|
|
static RT_API_ATTRS void TokenizePositionsImpl(Descriptor &first,
|
|
Descriptor &last, const Descriptor &string, const Descriptor &set,
|
|
Terminator &terminator) {
|
|
RUNTIME_CHECK(terminator, string.rank() == 0);
|
|
RUNTIME_CHECK(terminator, set.rank() == 0);
|
|
RUNTIME_CHECK(terminator, string.raw().type == set.raw().type);
|
|
RUNTIME_CHECK(terminator, first.rank() == 1);
|
|
RUNTIME_CHECK(terminator, last.rank() == 1);
|
|
RUNTIME_CHECK(terminator, first.IsAllocatable());
|
|
RUNTIME_CHECK(terminator, last.IsAllocatable());
|
|
|
|
auto firstCK{first.type().GetCategoryAndKind()};
|
|
auto lastCK{last.type().GetCategoryAndKind()};
|
|
if (!firstCK || firstCK->first != TypeCategory::Integer) {
|
|
terminator.Crash("TOKENIZE: FIRST is not an INTEGER array");
|
|
}
|
|
if (!lastCK || lastCK->first != TypeCategory::Integer) {
|
|
terminator.Crash("TOKENIZE: LAST is not an INTEGER array");
|
|
}
|
|
int firstKind{firstCK->second};
|
|
int lastKind{lastCK->second};
|
|
auto storeFirst{GetTokenizeStoreIntFn(firstKind, terminator, "FIRST")};
|
|
auto storeLast{GetTokenizeStoreIntFn(lastKind, terminator, "LAST")};
|
|
|
|
// Count tokens.
|
|
std::size_t tokenCount{0};
|
|
switch (string.raw().type) {
|
|
case CFI_type_char: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes};
|
|
std::size_t setChars{setBytes};
|
|
const char *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char>()};
|
|
const char *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char>()};
|
|
tokenCount = AnalyzeTokenize(str, strChars, setPtr, setChars).tokenCount;
|
|
break;
|
|
}
|
|
case CFI_type_char16_t: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes >> 1};
|
|
std::size_t setChars{setBytes >> 1};
|
|
const char16_t *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char16_t>()};
|
|
const char16_t *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char16_t>()};
|
|
tokenCount = AnalyzeTokenize(str, strChars, setPtr, setChars).tokenCount;
|
|
break;
|
|
}
|
|
case CFI_type_char32_t: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes >> 2};
|
|
std::size_t setChars{setBytes >> 2};
|
|
const char32_t *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char32_t>()};
|
|
const char32_t *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char32_t>()};
|
|
tokenCount = AnalyzeTokenize(str, strChars, setPtr, setChars).tokenCount;
|
|
break;
|
|
}
|
|
default:
|
|
terminator.Crash("TOKENIZE: bad string type code %d",
|
|
static_cast<int>(string.raw().type));
|
|
}
|
|
|
|
// (Re)allocate FIRST/LAST.
|
|
if (first.IsAllocated()) {
|
|
first.Deallocate();
|
|
}
|
|
if (last.IsAllocated()) {
|
|
last.Deallocate();
|
|
}
|
|
SubscriptValue extent[1]{static_cast<SubscriptValue>(tokenCount)};
|
|
first.Establish(TypeCategory::Integer, firstKind, nullptr, 1, extent,
|
|
CFI_attribute_allocatable);
|
|
first.GetDimension(0).SetBounds(1, extent[0]);
|
|
last.Establish(TypeCategory::Integer, lastKind, nullptr, 1, extent,
|
|
CFI_attribute_allocatable);
|
|
last.GetDimension(0).SetBounds(1, extent[0]);
|
|
if (first.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash("TOKENIZE: could not allocate FIRST array");
|
|
}
|
|
if (last.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash("TOKENIZE: could not allocate LAST array");
|
|
}
|
|
|
|
if (tokenCount == 0) {
|
|
return;
|
|
}
|
|
|
|
// Populate FIRST/LAST.
|
|
switch (string.raw().type) {
|
|
case CFI_type_char: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes};
|
|
std::size_t setChars{setBytes};
|
|
const char *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char>()};
|
|
const char *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char>()};
|
|
TokenizeFillPositions(first, last, str, strChars, setPtr, setChars,
|
|
storeFirst, storeLast, terminator);
|
|
break;
|
|
}
|
|
case CFI_type_char16_t: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes >> 1};
|
|
std::size_t setChars{setBytes >> 1};
|
|
const char16_t *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char16_t>()};
|
|
const char16_t *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char16_t>()};
|
|
TokenizeFillPositions(first, last, str, strChars, setPtr, setChars,
|
|
storeFirst, storeLast, terminator);
|
|
break;
|
|
}
|
|
case CFI_type_char32_t: {
|
|
std::size_t strBytes{string.ElementBytes()};
|
|
std::size_t setBytes{set.ElementBytes()};
|
|
std::size_t strChars{strBytes >> 2};
|
|
std::size_t setChars{setBytes >> 2};
|
|
const char32_t *str{
|
|
strBytes == 0 ? nullptr : string.OffsetElement<const char32_t>()};
|
|
const char32_t *setPtr{
|
|
setBytes == 0 ? nullptr : set.OffsetElement<const char32_t>()};
|
|
TokenizeFillPositions(first, last, str, strChars, setPtr, setChars,
|
|
storeFirst, storeLast, terminator);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
// SPLIT — implemented in terms of SCAN.
|
|
// When BACK is false, returns the position of the leftmost character in SET
|
|
// at a position > POS, or LEN(STRING)+1 if none.
|
|
// When BACK is true, returns the position of the rightmost character in SET
|
|
// at a position < POS, or 0 if none.
|
|
template <typename CHAR>
|
|
static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
|
|
std::size_t stringLen, const CHAR *set, std::size_t setLen, std::size_t pos,
|
|
bool back) {
|
|
if (back) {
|
|
std::size_t scanLen{pos > 1 ? pos - 1 : std::size_t{0}};
|
|
if (scanLen > stringLen) {
|
|
scanLen = stringLen;
|
|
}
|
|
if constexpr (sizeof(CHAR) == 1) {
|
|
return ScanVerify<false>(string, scanLen, set, setLen, true);
|
|
} else {
|
|
return ScanVerify<CHAR, CharFunc::Scan>(
|
|
string, scanLen, set, setLen, true);
|
|
}
|
|
} else {
|
|
if (pos >= stringLen) {
|
|
return stringLen + 1;
|
|
}
|
|
std::size_t npos;
|
|
if constexpr (sizeof(CHAR) == 1) {
|
|
npos =
|
|
ScanVerify<false>(string + pos, stringLen - pos, set, setLen, false);
|
|
} else {
|
|
npos = ScanVerify<CHAR, CharFunc::Scan>(
|
|
string + pos, stringLen - pos, set, setLen, false);
|
|
}
|
|
return npos != 0 ? pos + npos : stringLen + 1;
|
|
}
|
|
}
|
|
|
|
extern "C" {
|
|
RT_EXT_API_GROUP_BEGIN
|
|
|
|
void RTDEF(CharacterConcatenate)(Descriptor &accumulator,
|
|
const Descriptor &from, const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
RUNTIME_CHECK(terminator,
|
|
accumulator.rank() == 0 || from.rank() == 0 ||
|
|
accumulator.rank() == from.rank());
|
|
int rank{std::max(accumulator.rank(), from.rank())};
|
|
SubscriptValue ub[maxRank], fromAt[maxRank];
|
|
SubscriptValue elements{1};
|
|
for (int j{0}; j < rank; ++j) {
|
|
if (accumulator.rank() > 0 && from.rank() > 0) {
|
|
ub[j] = accumulator.GetDimension(j).Extent();
|
|
SubscriptValue fromUB{from.GetDimension(j).Extent()};
|
|
if (ub[j] != fromUB) {
|
|
terminator.Crash("Character array concatenation: operands are not "
|
|
"conforming on dimension %d (%jd != %jd)",
|
|
j + 1, static_cast<std::intmax_t>(ub[j]),
|
|
static_cast<std::intmax_t>(fromUB));
|
|
}
|
|
} else {
|
|
ub[j] =
|
|
(accumulator.rank() ? accumulator : from).GetDimension(j).Extent();
|
|
}
|
|
elements *= ub[j];
|
|
}
|
|
std::size_t oldBytes{accumulator.ElementBytes()};
|
|
void *old{accumulator.raw().base_addr};
|
|
accumulator.set_base_addr(nullptr);
|
|
std::size_t fromBytes{from.ElementBytes()};
|
|
accumulator.raw().elem_len += fromBytes;
|
|
std::size_t newBytes{accumulator.ElementBytes()};
|
|
for (int j{0}; j < rank; ++j) {
|
|
accumulator.GetDimension(j).SetBounds(1, ub[j]);
|
|
}
|
|
if (accumulator.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash(
|
|
"CharacterConcatenate: could not allocate storage for result");
|
|
}
|
|
const char *p{static_cast<const char *>(old)};
|
|
char *to{static_cast<char *>(accumulator.raw().base_addr)};
|
|
from.GetLowerBounds(fromAt);
|
|
for (; elements-- > 0;
|
|
to += newBytes, p += oldBytes, from.IncrementSubscripts(fromAt)) {
|
|
runtime::memcpy(to, p, oldBytes);
|
|
runtime::memcpy(to + oldBytes, from.Element<char>(fromAt), fromBytes);
|
|
}
|
|
FreeMemory(old);
|
|
}
|
|
|
|
void RTDEF(CharacterConcatenateScalar1)(
|
|
Descriptor &accumulator, const char *from, std::size_t chars) {
|
|
Terminator terminator{__FILE__, __LINE__};
|
|
RUNTIME_CHECK(terminator, accumulator.rank() == 0);
|
|
void *old{accumulator.raw().base_addr};
|
|
accumulator.set_base_addr(nullptr);
|
|
std::size_t oldLen{accumulator.ElementBytes()};
|
|
accumulator.raw().elem_len += chars;
|
|
RUNTIME_CHECK(
|
|
terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
|
std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
|
|
FreeMemory(old);
|
|
}
|
|
|
|
int RTDEF(CharacterCompareScalar)(const Descriptor &x, const Descriptor &y) {
|
|
Terminator terminator{__FILE__, __LINE__};
|
|
RUNTIME_CHECK(terminator, x.rank() == 0);
|
|
RUNTIME_CHECK(terminator, y.rank() == 0);
|
|
RUNTIME_CHECK(terminator, x.raw().type == y.raw().type);
|
|
switch (x.raw().type) {
|
|
case CFI_type_char:
|
|
return CharacterScalarCompare<char>(x.OffsetElement<char>(),
|
|
y.OffsetElement<char>(), x.ElementBytes(), y.ElementBytes());
|
|
case CFI_type_char16_t:
|
|
return CharacterScalarCompare<char16_t>(x.OffsetElement<char16_t>(),
|
|
y.OffsetElement<char16_t>(), x.ElementBytes() >> 1,
|
|
y.ElementBytes() >> 1);
|
|
case CFI_type_char32_t:
|
|
return CharacterScalarCompare<char32_t>(x.OffsetElement<char32_t>(),
|
|
y.OffsetElement<char32_t>(), x.ElementBytes() >> 2,
|
|
y.ElementBytes() >> 2);
|
|
default:
|
|
terminator.Crash("CharacterCompareScalar: bad string type code %d",
|
|
static_cast<int>(x.raw().type));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int RTDEF(CharacterCompareScalar1)(
|
|
const char *x, const char *y, std::size_t xChars, std::size_t yChars) {
|
|
return CharacterScalarCompare(x, y, xChars, yChars);
|
|
}
|
|
|
|
int RTDEF(CharacterCompareScalar2)(const char16_t *x, const char16_t *y,
|
|
std::size_t xChars, std::size_t yChars) {
|
|
return CharacterScalarCompare(x, y, xChars, yChars);
|
|
}
|
|
|
|
int RTDEF(CharacterCompareScalar4)(const char32_t *x, const char32_t *y,
|
|
std::size_t xChars, std::size_t yChars) {
|
|
return CharacterScalarCompare(x, y, xChars, yChars);
|
|
}
|
|
|
|
void RTDEF(CharacterCompare)(
|
|
Descriptor &result, const Descriptor &x, const Descriptor &y) {
|
|
Terminator terminator{__FILE__, __LINE__};
|
|
RUNTIME_CHECK(terminator, x.raw().type == y.raw().type);
|
|
switch (x.raw().type) {
|
|
case CFI_type_char:
|
|
Compare<char>(result, x, y, terminator);
|
|
break;
|
|
case CFI_type_char16_t:
|
|
Compare<char16_t>(result, x, y, terminator);
|
|
break;
|
|
case CFI_type_char32_t:
|
|
Compare<char32_t>(result, x, y, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash("CharacterCompareScalar: bad string type code %d",
|
|
static_cast<int>(x.raw().type));
|
|
}
|
|
}
|
|
|
|
std::size_t RTDEF(CharacterAppend1)(char *lhs, std::size_t lhsBytes,
|
|
std::size_t offset, const char *rhs, std::size_t rhsBytes) {
|
|
if (auto n{std::min(lhsBytes - offset, rhsBytes)}) {
|
|
runtime::memcpy(lhs + offset, rhs, n);
|
|
offset += n;
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
void RTDEF(CharacterPad1)(char *lhs, std::size_t bytes, std::size_t offset) {
|
|
if (bytes > offset) {
|
|
runtime::memset(lhs + offset, ' ', bytes - offset);
|
|
}
|
|
}
|
|
|
|
// Intrinsic function entry points
|
|
|
|
void RTDEF(Adjustl)(Descriptor &result, const Descriptor &string,
|
|
const char *sourceFile, int sourceLine) {
|
|
AdjustLR<false>(result, string, sourceFile, sourceLine);
|
|
}
|
|
|
|
void RTDEF(Adjustr)(Descriptor &result, const Descriptor &string,
|
|
const char *sourceFile, int sourceLine) {
|
|
AdjustLR<true>(result, string, sourceFile, sourceLine);
|
|
}
|
|
|
|
std::size_t RTDEF(Index1)(const char *x, std::size_t xLen, const char *set,
|
|
std::size_t setLen, bool back) {
|
|
return Index<char>(x, xLen, set, setLen, back);
|
|
}
|
|
std::size_t RTDEF(Index2)(const char16_t *x, std::size_t xLen,
|
|
const char16_t *set, std::size_t setLen, bool back) {
|
|
return Index<char16_t>(x, xLen, set, setLen, back);
|
|
}
|
|
std::size_t RTDEF(Index4)(const char32_t *x, std::size_t xLen,
|
|
const char32_t *set, std::size_t setLen, bool back) {
|
|
return Index<char32_t>(x, xLen, set, setLen, back);
|
|
}
|
|
|
|
void RTDEF(Index)(Descriptor &result, const Descriptor &string,
|
|
const Descriptor &substring, const Descriptor *back, int kind,
|
|
const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
switch (string.raw().type) {
|
|
case CFI_type_char:
|
|
GeneralCharFuncKind<char, CharFunc::Index>(
|
|
result, string, substring, back, kind, terminator);
|
|
break;
|
|
case CFI_type_char16_t:
|
|
GeneralCharFuncKind<char16_t, CharFunc::Index>(
|
|
result, string, substring, back, kind, terminator);
|
|
break;
|
|
case CFI_type_char32_t:
|
|
GeneralCharFuncKind<char32_t, CharFunc::Index>(
|
|
result, string, substring, back, kind, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash(
|
|
"INDEX: bad string type code %d", static_cast<int>(string.raw().type));
|
|
}
|
|
}
|
|
|
|
std::size_t RTDEF(LenTrim1)(const char *x, std::size_t chars) {
|
|
return LenTrim(x, chars);
|
|
}
|
|
std::size_t RTDEF(LenTrim2)(const char16_t *x, std::size_t chars) {
|
|
return LenTrim(x, chars);
|
|
}
|
|
std::size_t RTDEF(LenTrim4)(const char32_t *x, std::size_t chars) {
|
|
return LenTrim(x, chars);
|
|
}
|
|
|
|
void RTDEF(LenTrim)(Descriptor &result, const Descriptor &string, int kind,
|
|
const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
switch (string.raw().type) {
|
|
case CFI_type_char:
|
|
LenTrimKind<char>(result, string, kind, terminator);
|
|
break;
|
|
case CFI_type_char16_t:
|
|
LenTrimKind<char16_t>(result, string, kind, terminator);
|
|
break;
|
|
case CFI_type_char32_t:
|
|
LenTrimKind<char32_t>(result, string, kind, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash("LEN_TRIM: bad string type code %d",
|
|
static_cast<int>(string.raw().type));
|
|
}
|
|
}
|
|
|
|
std::size_t RTDEF(Scan1)(const char *x, std::size_t xLen, const char *set,
|
|
std::size_t setLen, bool back) {
|
|
return ScanVerify<false>(x, xLen, set, setLen, back);
|
|
}
|
|
std::size_t RTDEF(Scan2)(const char16_t *x, std::size_t xLen,
|
|
const char16_t *set, std::size_t setLen, bool back) {
|
|
return ScanVerify<char16_t, CharFunc::Scan>(x, xLen, set, setLen, back);
|
|
}
|
|
std::size_t RTDEF(Scan4)(const char32_t *x, std::size_t xLen,
|
|
const char32_t *set, std::size_t setLen, bool back) {
|
|
return ScanVerify<char32_t, CharFunc::Scan>(x, xLen, set, setLen, back);
|
|
}
|
|
|
|
void RTDEF(Scan)(Descriptor &result, const Descriptor &string,
|
|
const Descriptor &set, const Descriptor *back, int kind,
|
|
const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
switch (string.raw().type) {
|
|
case CFI_type_char:
|
|
GeneralCharFuncKind<char, CharFunc::Scan>(
|
|
result, string, set, back, kind, terminator);
|
|
break;
|
|
case CFI_type_char16_t:
|
|
GeneralCharFuncKind<char16_t, CharFunc::Scan>(
|
|
result, string, set, back, kind, terminator);
|
|
break;
|
|
case CFI_type_char32_t:
|
|
GeneralCharFuncKind<char32_t, CharFunc::Scan>(
|
|
result, string, set, back, kind, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash(
|
|
"SCAN: bad string type code %d", static_cast<int>(string.raw().type));
|
|
}
|
|
}
|
|
|
|
void RTDEF(Repeat)(Descriptor &result, const Descriptor &string,
|
|
std::int64_t ncopies, const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
if (ncopies < 0) {
|
|
terminator.Crash(
|
|
"REPEAT has negative NCOPIES=%jd", static_cast<std::intmax_t>(ncopies));
|
|
}
|
|
std::size_t origBytes{string.ElementBytes()};
|
|
result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
|
|
CFI_attribute_allocatable);
|
|
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
|
terminator.Crash("REPEAT could not allocate storage for result");
|
|
}
|
|
const char *from{string.OffsetElement()};
|
|
for (char *to{result.OffsetElement()}; ncopies-- > 0; to += origBytes) {
|
|
runtime::memcpy(to, from, origBytes);
|
|
}
|
|
}
|
|
|
|
// F_C_STRING - Appends null terminator to create C-compatible string
|
|
// If asis is false, trailing blanks are trimmed first
|
|
void RTDEF(FCString)(Descriptor &result, const Descriptor &string, bool asis,
|
|
const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
RUNTIME_CHECK(terminator, string.raw().type == CFI_type_char);
|
|
std::size_t chars{string.ElementBytes()};
|
|
if (!asis) {
|
|
chars = LenTrim(string.OffsetElement<const char>(), chars);
|
|
}
|
|
std::size_t resultBytes{chars + 1};
|
|
result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
|
|
CFI_attribute_allocatable);
|
|
RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
|
if (chars > 0) {
|
|
std::memcpy(result.OffsetElement(), string.OffsetElement(), chars);
|
|
}
|
|
*result.OffsetElement<char>(chars) = '\0';
|
|
}
|
|
|
|
void RTDEF(Trim)(Descriptor &result, const Descriptor &string,
|
|
const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
std::size_t resultBytes{0};
|
|
switch (string.raw().type) {
|
|
case CFI_type_char:
|
|
resultBytes =
|
|
LenTrim(string.OffsetElement<const char>(), string.ElementBytes());
|
|
break;
|
|
case CFI_type_char16_t:
|
|
resultBytes = LenTrim(string.OffsetElement<const char16_t>(),
|
|
string.ElementBytes() >> 1)
|
|
<< 1;
|
|
break;
|
|
case CFI_type_char32_t:
|
|
resultBytes = LenTrim(string.OffsetElement<const char32_t>(),
|
|
string.ElementBytes() >> 2)
|
|
<< 2;
|
|
break;
|
|
default:
|
|
terminator.Crash(
|
|
"TRIM: bad string type code %d", static_cast<int>(string.raw().type));
|
|
}
|
|
result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
|
|
CFI_attribute_allocatable);
|
|
RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
|
std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
|
|
}
|
|
|
|
std::size_t RTDEF(Verify1)(const char *x, std::size_t xLen, const char *set,
|
|
std::size_t setLen, bool back) {
|
|
return ScanVerify<true>(x, xLen, set, setLen, back);
|
|
}
|
|
std::size_t RTDEF(Verify2)(const char16_t *x, std::size_t xLen,
|
|
const char16_t *set, std::size_t setLen, bool back) {
|
|
return ScanVerify<char16_t, CharFunc::Verify>(x, xLen, set, setLen, back);
|
|
}
|
|
std::size_t RTDEF(Verify4)(const char32_t *x, std::size_t xLen,
|
|
const char32_t *set, std::size_t setLen, bool back) {
|
|
return ScanVerify<char32_t, CharFunc::Verify>(x, xLen, set, setLen, back);
|
|
}
|
|
|
|
void RTDEF(Verify)(Descriptor &result, const Descriptor &string,
|
|
const Descriptor &set, const Descriptor *back, int kind,
|
|
const char *sourceFile, int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
switch (string.raw().type) {
|
|
case CFI_type_char:
|
|
GeneralCharFuncKind<char, CharFunc::Verify>(
|
|
result, string, set, back, kind, terminator);
|
|
break;
|
|
case CFI_type_char16_t:
|
|
GeneralCharFuncKind<char16_t, CharFunc::Verify>(
|
|
result, string, set, back, kind, terminator);
|
|
break;
|
|
case CFI_type_char32_t:
|
|
GeneralCharFuncKind<char32_t, CharFunc::Verify>(
|
|
result, string, set, back, kind, terminator);
|
|
break;
|
|
default:
|
|
terminator.Crash(
|
|
"VERIFY: bad string type code %d", static_cast<int>(string.raw().type));
|
|
}
|
|
}
|
|
|
|
void RTDEF(CharacterMax)(Descriptor &accumulator, const Descriptor &x,
|
|
const char *sourceFile, int sourceLine) {
|
|
MaxMin<false>(accumulator, x, sourceFile, sourceLine);
|
|
}
|
|
|
|
void RTDEF(CharacterMin)(Descriptor &accumulator, const Descriptor &x,
|
|
const char *sourceFile, int sourceLine) {
|
|
MaxMin<true>(accumulator, x, sourceFile, sourceLine);
|
|
}
|
|
|
|
// TOKENIZE Form 1 entry point
|
|
void RTDEF(Tokenize)(Descriptor &tokens, Descriptor *separator,
|
|
const Descriptor &string, const Descriptor &set, const char *sourceFile,
|
|
int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
TokenizeImpl(tokens, separator, string, set, terminator);
|
|
}
|
|
|
|
// TOKENIZE Form 2 entry point
|
|
void RTDEF(TokenizePositions)(Descriptor &first, Descriptor &last,
|
|
const Descriptor &string, const Descriptor &set, const char *sourceFile,
|
|
int sourceLine) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
TokenizePositionsImpl(first, last, string, set, terminator);
|
|
}
|
|
|
|
std::size_t RTDEF(Split1)(const char *string, std::size_t stringLen,
|
|
const char *set, std::size_t setLen, std::size_t pos, bool back) {
|
|
return SplitImpl(string, stringLen, set, setLen, pos, back);
|
|
}
|
|
|
|
std::size_t RTDEF(Split2)(const char16_t *string, std::size_t stringLen,
|
|
const char16_t *set, std::size_t setLen, std::size_t pos, bool back) {
|
|
return SplitImpl(string, stringLen, set, setLen, pos, back);
|
|
}
|
|
|
|
std::size_t RTDEF(Split4)(const char32_t *string, std::size_t stringLen,
|
|
const char32_t *set, std::size_t setLen, std::size_t pos, bool back) {
|
|
return SplitImpl(string, stringLen, set, setLen, pos, back);
|
|
}
|
|
|
|
RT_EXT_API_GROUP_END
|
|
}
|
|
} // namespace Fortran::runtime
|