[flang] [flang-rt] Addition of the Fortran 2023 TOKENIZE intrinsic. (#181030)

This implements the TOKENIZE intrinsic per the Fortran 2023 Standard.

TOKENIZE is a more complicated addition to the flang intrinsics, as it
is the first subroutine that has multiple unique footprints. Intrinsic
functions have already addressed this challenge, however subroutines and
functions are processed slightly differently and the function code was
not a good 1:1 solution for the subroutines. To solve this the function
code was used as an example to create error buffering within the
intrinsics Process and select the most appropriate error message for a
given subroutine footprint.

A simple FIR compile test was added to show the proper compilation of
each case. A thorough negative path test has also been added, ensuring
that all possible errors are reported as expected.

Testing prior to commit:

= check-flang ==========================================
```
Testing Time: 139.51s

Total Discovered Tests: 4153
  Unsupported      :   77 (1.85%)
  Passed           : 4065 (97.88%)
  Expectedly Failed:   11 (0.26%)


FLANG Container Test completed 2 minutes (160 s).

Total Time: 2 minutes (160 s)
Completed : Wed Feb 11 04:05:50 PM CST 2026
```

= check-flang-rt ==========================================
```
Testing Time: 1.55s

Total Discovered Tests: 258
  Passed: 258 (100.00%)


FLANG Container Test completed 0 minutes (55 s).

Total Time: 0 minutes (56 s)
Completed : Wed Feb 11 04:08:32 PM CST 2026
```

= llvm-test-suite ==========================================
```
Testing Time: 1886.64s

Total Discovered Tests: 6926
  Passed: 6926 (100.00%)


CCE SLES Container debug compile completed 31 minutes (1895 s).
CCE SLES Container debug install completed in 0 minutes (0 s).

Total Time: 31 minutes (1895 s)
Completed : Wed Feb 11 05:46:52 PM CST 2026
```

Additionally, (FYI) an executable test has been written and will be
added to the llvm-test-suite under a separate PR.

---------

Co-authored-by: Kevin Wyatt <kwyatt@hpe.com>
This commit is contained in:
kwyatt-ext 2026-02-27 12:43:18 -06:00 committed by GitHub
parent 6301243a5d
commit ca0e7d31d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 1200 additions and 1 deletions

View File

@ -571,6 +571,427 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x,
}
}
template <typename CHAR>
static inline RT_API_ATTRS bool TokenizeIsInSet(
CHAR ch, const CHAR *set, std::size_t setChars) {
for (std::size_t j{0}; j < setChars; ++j) {
if (set[j] == ch) {
return true;
}
}
return false;
}
// Pad the token with spaces.
template <typename CHAR>
static inline RT_API_ATTRS void TokenizeFillBlanks(
CHAR *to, std::size_t chars) {
if (chars == 0) {
return;
}
if constexpr (std::is_same_v<CHAR, char>) {
runtime::memset(to, ' ', chars);
} else {
for (std::size_t j{0}; j < chars; ++j) {
to[j] = static_cast<CHAR>(' ');
}
}
}
struct TokenizeAnalysis {
std::size_t tokenCount{0};
std::size_t maxTokenLen{0}; // in characters
};
template <typename CHAR>
static RT_API_ATTRS TokenizeAnalysis AnalyzeTokenize(const CHAR *str,
std::size_t strChars, const CHAR *set, std::size_t setChars) {
TokenizeAnalysis analysis;
// Empty STRING should return one empty token, per Fortran standard.
if (strChars == 0) {
analysis.tokenCount = 1;
analysis.maxTokenLen = 0;
return analysis;
}
if (setChars == 0) {
analysis.tokenCount = 1;
analysis.maxTokenLen = strChars;
return analysis;
}
// Split STRING at each delimiter character. This produces empty tokens
// when delimiters are consecutive or when STRING starts/ends with a
// delimiter.
std::size_t tokenStart{0};
for (std::size_t pos{0}; pos < strChars; ++pos) {
if (TokenizeIsInSet(str[pos], set, setChars)) {
analysis.maxTokenLen = std::max(analysis.maxTokenLen, pos - tokenStart);
analysis.tokenCount++;
tokenStart = pos + 1;
}
}
analysis.maxTokenLen = std::max(analysis.maxTokenLen, strChars - tokenStart);
analysis.tokenCount++;
return analysis;
}
// Allocates and populates the result arrays for TOKENIZE Form 1.
template <typename CHAR>
static RT_API_ATTRS void TokenizeFillForm1(Descriptor &tokens,
Descriptor *separator, const Descriptor &string, const CHAR *str,
std::size_t strChars, const CHAR *set, std::size_t setChars,
const TokenizeAnalysis &analysis, Terminator &terminator) {
// (Re)allocate TOKENS.
if (tokens.IsAllocated()) {
tokens.Deallocate();
}
SubscriptValue tokensExtent[1]{
static_cast<SubscriptValue>(analysis.tokenCount)};
std::size_t tokenElemBytes{
analysis.tokenCount == 0 ? 0 : analysis.maxTokenLen * sizeof(CHAR)};
tokens.Establish(string.type(), tokenElemBytes, nullptr, 1, tokensExtent,
CFI_attribute_allocatable);
tokens.GetDimension(0).SetBounds(1, tokensExtent[0]);
if (tokens.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("TOKENIZE: could not allocate TOKENS array");
}
// (Re)allocate SEPARATOR if present.
std::size_t sepCount{analysis.tokenCount > 0 ? analysis.tokenCount - 1 : 0};
std::size_t sepElemBytes{sizeof(CHAR)};
if (separator) {
if (separator->IsAllocated()) {
separator->Deallocate();
}
SubscriptValue sepExtent[1]{static_cast<SubscriptValue>(sepCount)};
separator->Establish(string.type(), sepElemBytes, nullptr, 1, sepExtent,
CFI_attribute_allocatable);
separator->GetDimension(0).SetBounds(1, sepExtent[0]);
if (separator->Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("TOKENIZE: could not allocate SEPARATOR array");
}
}
if (analysis.tokenCount == 0) {
return;
}
// Populate tokens and separators.
if (setChars == 0) {
// One token (possibly empty) equal to STRING.
if (tokenElemBytes > 0) {
CHAR *tokDest{tokens.OffsetElement<CHAR>(0)};
if (strChars > 0) {
runtime::memcpy(tokDest, str, strChars * sizeof(CHAR));
}
TokenizeFillBlanks(tokDest + strChars, analysis.maxTokenLen - strChars);
}
return;
}
std::size_t tokenIndex{0};
std::size_t sepIndex{0};
auto storeToken = [&](std::size_t tokenStart, std::size_t tokenEnd) {
std::size_t tokenLen{tokenEnd - tokenStart};
if (tokenElemBytes > 0) {
// Each element is stored in a fixed-size slot of `tokenElemBytes`.
CHAR *tokDest{tokens.OffsetElement<CHAR>(tokenIndex * tokenElemBytes)};
if (tokenLen > 0) {
runtime::memcpy(tokDest, str + tokenStart, tokenLen * sizeof(CHAR));
}
TokenizeFillBlanks(tokDest + tokenLen, analysis.maxTokenLen - tokenLen);
}
++tokenIndex;
};
// Split at each delimiter character, producing empty tokens at boundaries
// and between consecutive delimiters.
std::size_t tokenStart{0};
for (std::size_t pos{0}; pos < strChars; ++pos) {
if (TokenizeIsInSet(str[pos], set, setChars)) {
storeToken(tokenStart, pos);
if (separator) {
CHAR *sepDest{separator->OffsetElement<CHAR>(sepIndex * sepElemBytes)};
sepDest[0] = str[pos];
++sepIndex;
}
tokenStart = pos + 1;
}
}
storeToken(tokenStart, strChars);
}
template <int KIND>
static RT_API_ATTRS void TokenizeStoreIntAt(
const Descriptor &result, std::size_t at, std::int64_t value) {
StoreIntegerAt<KIND>{}(result, at, value);
}
using TokenizeStoreIntFn = void (*)(
const Descriptor &, std::size_t, std::int64_t);
static RT_API_ATTRS TokenizeStoreIntFn GetTokenizeStoreIntFn(
int kind, Terminator &terminator, const char *which) {
switch (kind) {
case 1:
return &TokenizeStoreIntAt<1>;
case 2:
return &TokenizeStoreIntAt<2>;
case 4:
return &TokenizeStoreIntAt<4>;
case 8:
return &TokenizeStoreIntAt<8>;
case 16:
return &TokenizeStoreIntAt<16>;
default:
terminator.Crash(
"TOKENIZE: unsupported INTEGER kind=%d for %s", kind, which);
}
}
template <typename CHAR>
static RT_API_ATTRS void TokenizeFillPositions(Descriptor &first,
Descriptor &last, const CHAR *str, std::size_t strChars, const CHAR *set,
std::size_t setChars, TokenizeStoreIntFn storeFirst,
TokenizeStoreIntFn storeLast, Terminator &terminator) {
// Empty STRING should return one empty token, per Fortran standard.
if (strChars == 0) {
storeFirst(first, 0, 1);
storeLast(last, 0, 0);
return;
}
if (setChars == 0) {
storeFirst(first, 0, 1);
storeLast(last, 0, static_cast<std::int64_t>(strChars));
return;
}
std::size_t tokenIndex{0};
std::size_t tokenStart{0};
for (std::size_t pos{0}; pos < strChars; ++pos) {
if (TokenizeIsInSet(str[pos], set, setChars)) {
storeFirst(first, tokenIndex, static_cast<std::int64_t>(tokenStart + 1));
storeLast(last, tokenIndex, static_cast<std::int64_t>(pos));
++tokenIndex;
tokenStart = pos + 1;
}
}
storeFirst(first, tokenIndex, static_cast<std::int64_t>(tokenStart + 1));
storeLast(last, tokenIndex, static_cast<std::int64_t>(strChars));
++tokenIndex;
// Sanity check: we should have filled exactly the allocated extent.
if (tokenIndex != static_cast<std::size_t>(first.GetDimension(0).Extent())) {
terminator.Crash("TOKENIZE: internal error populating FIRST/LAST");
}
}
// Tokenize Form 1 implementation.
static RT_API_ATTRS void TokenizeImpl(Descriptor &tokens, Descriptor *separator,
const Descriptor &string, const Descriptor &set, Terminator &terminator) {
RUNTIME_CHECK(terminator, string.rank() == 0);
RUNTIME_CHECK(terminator, set.rank() == 0);
RUNTIME_CHECK(terminator, string.raw().type == set.raw().type);
RUNTIME_CHECK(terminator, tokens.rank() == 1);
RUNTIME_CHECK(terminator, tokens.IsAllocatable());
if (separator) {
RUNTIME_CHECK(terminator, separator->rank() == 1);
RUNTIME_CHECK(terminator, separator->IsAllocatable());
}
switch (string.raw().type) {
case CFI_type_char: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes};
std::size_t setChars{setBytes};
const char *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char>()};
const char *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char>()};
auto analysis{AnalyzeTokenize(str, strChars, setPtr, setChars)};
TokenizeFillForm1(tokens, separator, string, str, strChars, setPtr,
setChars, analysis, terminator);
break;
}
case CFI_type_char16_t: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes >> 1};
std::size_t setChars{setBytes >> 1};
const char16_t *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char16_t>()};
const char16_t *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char16_t>()};
auto analysis{AnalyzeTokenize(str, strChars, setPtr, setChars)};
TokenizeFillForm1(tokens, separator, string, str, strChars, setPtr,
setChars, analysis, terminator);
break;
}
case CFI_type_char32_t: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes >> 2};
std::size_t setChars{setBytes >> 2};
const char32_t *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char32_t>()};
const char32_t *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char32_t>()};
auto analysis{AnalyzeTokenize(str, strChars, setPtr, setChars)};
TokenizeFillForm1(tokens, separator, string, str, strChars, setPtr,
setChars, analysis, terminator);
break;
}
default:
terminator.Crash("TOKENIZE: bad string type code %d",
static_cast<int>(string.raw().type));
}
}
// Tokenize Form 2 implementation.
static RT_API_ATTRS void TokenizePositionsImpl(Descriptor &first,
Descriptor &last, const Descriptor &string, const Descriptor &set,
Terminator &terminator) {
RUNTIME_CHECK(terminator, string.rank() == 0);
RUNTIME_CHECK(terminator, set.rank() == 0);
RUNTIME_CHECK(terminator, string.raw().type == set.raw().type);
RUNTIME_CHECK(terminator, first.rank() == 1);
RUNTIME_CHECK(terminator, last.rank() == 1);
RUNTIME_CHECK(terminator, first.IsAllocatable());
RUNTIME_CHECK(terminator, last.IsAllocatable());
auto firstCK{first.type().GetCategoryAndKind()};
auto lastCK{last.type().GetCategoryAndKind()};
if (!firstCK || firstCK->first != TypeCategory::Integer) {
terminator.Crash("TOKENIZE: FIRST is not an INTEGER array");
}
if (!lastCK || lastCK->first != TypeCategory::Integer) {
terminator.Crash("TOKENIZE: LAST is not an INTEGER array");
}
int firstKind{firstCK->second};
int lastKind{lastCK->second};
auto storeFirst{GetTokenizeStoreIntFn(firstKind, terminator, "FIRST")};
auto storeLast{GetTokenizeStoreIntFn(lastKind, terminator, "LAST")};
// Count tokens.
std::size_t tokenCount{0};
switch (string.raw().type) {
case CFI_type_char: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes};
std::size_t setChars{setBytes};
const char *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char>()};
const char *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char>()};
tokenCount = AnalyzeTokenize(str, strChars, setPtr, setChars).tokenCount;
break;
}
case CFI_type_char16_t: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes >> 1};
std::size_t setChars{setBytes >> 1};
const char16_t *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char16_t>()};
const char16_t *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char16_t>()};
tokenCount = AnalyzeTokenize(str, strChars, setPtr, setChars).tokenCount;
break;
}
case CFI_type_char32_t: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes >> 2};
std::size_t setChars{setBytes >> 2};
const char32_t *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char32_t>()};
const char32_t *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char32_t>()};
tokenCount = AnalyzeTokenize(str, strChars, setPtr, setChars).tokenCount;
break;
}
default:
terminator.Crash("TOKENIZE: bad string type code %d",
static_cast<int>(string.raw().type));
}
// (Re)allocate FIRST/LAST.
if (first.IsAllocated()) {
first.Deallocate();
}
if (last.IsAllocated()) {
last.Deallocate();
}
SubscriptValue extent[1]{static_cast<SubscriptValue>(tokenCount)};
first.Establish(TypeCategory::Integer, firstKind, nullptr, 1, extent,
CFI_attribute_allocatable);
first.GetDimension(0).SetBounds(1, extent[0]);
last.Establish(TypeCategory::Integer, lastKind, nullptr, 1, extent,
CFI_attribute_allocatable);
last.GetDimension(0).SetBounds(1, extent[0]);
if (first.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("TOKENIZE: could not allocate FIRST array");
}
if (last.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("TOKENIZE: could not allocate LAST array");
}
if (tokenCount == 0) {
return;
}
// Populate FIRST/LAST.
switch (string.raw().type) {
case CFI_type_char: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes};
std::size_t setChars{setBytes};
const char *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char>()};
const char *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char>()};
TokenizeFillPositions(first, last, str, strChars, setPtr, setChars,
storeFirst, storeLast, terminator);
break;
}
case CFI_type_char16_t: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes >> 1};
std::size_t setChars{setBytes >> 1};
const char16_t *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char16_t>()};
const char16_t *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char16_t>()};
TokenizeFillPositions(first, last, str, strChars, setPtr, setChars,
storeFirst, storeLast, terminator);
break;
}
case CFI_type_char32_t: {
std::size_t strBytes{string.ElementBytes()};
std::size_t setBytes{set.ElementBytes()};
std::size_t strChars{strBytes >> 2};
std::size_t setChars{setBytes >> 2};
const char32_t *str{
strBytes == 0 ? nullptr : string.OffsetElement<const char32_t>()};
const char32_t *setPtr{
setBytes == 0 ? nullptr : set.OffsetElement<const char32_t>()};
TokenizeFillPositions(first, last, str, strChars, setPtr, setChars,
storeFirst, storeLast, terminator);
break;
}
default:
break;
}
}
extern "C" {
RT_EXT_API_GROUP_BEGIN
@ -938,6 +1359,22 @@ void RTDEF(CharacterMin)(Descriptor &accumulator, const Descriptor &x,
MaxMin<true>(accumulator, x, sourceFile, sourceLine);
}
// TOKENIZE Form 1 entry point
void RTDEF(Tokenize)(Descriptor &tokens, Descriptor *separator,
const Descriptor &string, const Descriptor &set, const char *sourceFile,
int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
TokenizeImpl(tokens, separator, string, set, terminator);
}
// TOKENIZE Form 2 entry point
void RTDEF(TokenizePositions)(Descriptor &first, Descriptor &last,
const Descriptor &string, const Descriptor &set, const char *sourceFile,
int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
TokenizePositionsImpl(first, last, string, set, terminator);
}
RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime

View File

@ -431,6 +431,214 @@ TYPED_TEST(RepeatTests, Repeat) {
}
}
// Test TOKENIZE() - Form 1 and Form 2
// Helper to create a scalar character descriptor from a raw C string.
template <typename CHAR>
OwningPtr<Descriptor> CreateScalarDescriptor(const char *raw) {
std::size_t len{std::strlen(raw)};
OwningPtr<Descriptor> desc{Descriptor::Create(
sizeof(CHAR), len, nullptr, 0, nullptr, CFI_attribute_other)};
if (desc->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
std::basic_string<CHAR> converted{raw, raw + len};
std::copy(converted.begin(), converted.end(), desc->OffsetElement<CHAR>(0));
return desc;
}
// Helper to create an unallocated allocatable character descriptor (rank 1,
// deferred length) for TOKENS or SEPARATOR output.
template <typename CHAR> StaticDescriptor<1> CreateAllocatableCharDescriptor() {
StaticDescriptor<1> staticDesc;
Descriptor &desc{staticDesc.descriptor()};
desc.Establish(static_cast<int>(sizeof(CHAR)), static_cast<SubscriptValue>(0),
nullptr, 1, nullptr, CFI_attribute_allocatable);
desc.GetDimension(0).SetBounds(1, 0);
return staticDesc;
}
// Helper to create an unallocated allocatable integer descriptor (rank 1)
// for FIRST or LAST output.
static StaticDescriptor<1> CreateAllocatableIntDescriptor() {
StaticDescriptor<1> staticDesc;
Descriptor &desc{staticDesc.descriptor()};
desc.Establish(
TypeCategory::Integer, 4, nullptr, 1, nullptr, CFI_attribute_allocatable);
desc.GetDimension(0).SetBounds(1, 0);
return staticDesc;
}
// Helper to extract a token string from the TOKENS descriptor.
template <typename CHAR>
std::basic_string<CHAR> GetToken(Descriptor &tokens, std::size_t index) {
std::size_t elemBytes{tokens.ElementBytes()};
std::size_t charLen{elemBytes / sizeof(CHAR)};
const CHAR *data{tokens.OffsetElement<CHAR>(index * elemBytes)};
return std::basic_string<CHAR>(data, charLen);
}
template <typename CHAR> struct TokenizeTests : public ::testing::Test {};
TYPED_TEST_SUITE(TokenizeTests, CharacterTypes, );
// Form 1: basic tokenization
TYPED_TEST(TokenizeTests, Form1Basic) {
auto string{CreateScalarDescriptor<TypeParam>("first,second,third")};
auto set{CreateScalarDescriptor<TypeParam>(",")};
ASSERT_NE(string, nullptr);
ASSERT_NE(set, nullptr);
auto tokensStatic{CreateAllocatableCharDescriptor<TypeParam>()};
Descriptor &tokens{tokensStatic.descriptor()};
RTNAME(Tokenize)(tokens, nullptr, *string, *set);
// Expect 3 tokens: "first", "second", "third"
ASSERT_TRUE(tokens.IsAllocated());
EXPECT_EQ(tokens.GetDimension(0).Extent(), 3);
// Longest token is "second" (6 chars)
EXPECT_EQ(tokens.ElementBytes(), 6u * sizeof(TypeParam));
// Tokens are blank-padded to max length
std::basic_string<TypeParam> t0{GetToken<TypeParam>(tokens, 0)};
std::basic_string<TypeParam> t1{GetToken<TypeParam>(tokens, 1)};
std::basic_string<TypeParam> t2{GetToken<TypeParam>(tokens, 2)};
std::basic_string<TypeParam> e0{'f', 'i', 'r', 's', 't', ' '};
std::basic_string<TypeParam> e1{'s', 'e', 'c', 'o', 'n', 'd'};
std::basic_string<TypeParam> e2{'t', 'h', 'i', 'r', 'd', ' '};
EXPECT_EQ(t0, e0);
EXPECT_EQ(t1, e1);
EXPECT_EQ(t2, e2);
tokens.Deallocate();
}
// Form 1: empty string produces one zero-length token
TYPED_TEST(TokenizeTests, Form1EmptyString) {
auto string{CreateScalarDescriptor<TypeParam>("")};
auto set{CreateScalarDescriptor<TypeParam>(",")};
ASSERT_NE(string, nullptr);
ASSERT_NE(set, nullptr);
auto tokensStatic{CreateAllocatableCharDescriptor<TypeParam>()};
Descriptor &tokens{tokensStatic.descriptor()};
RTNAME(Tokenize)(tokens, nullptr, *string, *set);
ASSERT_TRUE(tokens.IsAllocated());
EXPECT_EQ(tokens.GetDimension(0).Extent(), 1) << "empty string = 1 token";
EXPECT_EQ(tokens.ElementBytes(), 0u) << "token length should be 0";
EXPECT_EQ(tokens.GetDimension(0).LowerBound(), 1);
tokens.Deallocate();
}
// Form 1: consecutive delimiters produce empty tokens
TYPED_TEST(TokenizeTests, Form1ConsecutiveDelimiters) {
auto string{CreateScalarDescriptor<TypeParam>("a,,b")};
auto set{CreateScalarDescriptor<TypeParam>(",")};
ASSERT_NE(string, nullptr);
ASSERT_NE(set, nullptr);
auto tokensStatic{CreateAllocatableCharDescriptor<TypeParam>()};
Descriptor &tokens{tokensStatic.descriptor()};
RTNAME(Tokenize)(tokens, nullptr, *string, *set);
ASSERT_TRUE(tokens.IsAllocated());
// Expect 3 tokens: "a", "", "b"
EXPECT_EQ(tokens.GetDimension(0).Extent(), 3);
tokens.Deallocate();
}
// Form 1: with SEPARATOR output
TYPED_TEST(TokenizeTests, Form1WithSeparator) {
auto string{CreateScalarDescriptor<TypeParam>("a,b;c")};
auto set{CreateScalarDescriptor<TypeParam>(",;")};
ASSERT_NE(string, nullptr);
ASSERT_NE(set, nullptr);
auto tokensStatic{CreateAllocatableCharDescriptor<TypeParam>()};
Descriptor &tokens{tokensStatic.descriptor()};
auto sepStatic{CreateAllocatableCharDescriptor<TypeParam>()};
Descriptor &separator{sepStatic.descriptor()};
RTNAME(Tokenize)(tokens, &separator, *string, *set);
// Expect 3 tokens: "a", "b", "c"
ASSERT_TRUE(tokens.IsAllocated());
EXPECT_EQ(tokens.GetDimension(0).Extent(), 3);
ASSERT_TRUE(separator.IsAllocated());
// Expect 2 separators: ',' then ';'
EXPECT_EQ(separator.GetDimension(0).Extent(), 2);
EXPECT_EQ(separator.ElementBytes(), sizeof(TypeParam));
// Check separator values: ',' then ';'
const TypeParam *sep0{separator.OffsetElement<TypeParam>(0)};
const TypeParam *sep1{
separator.OffsetElement<TypeParam>(separator.ElementBytes())};
EXPECT_EQ(*sep0, static_cast<TypeParam>(','));
EXPECT_EQ(*sep1, static_cast<TypeParam>(';'));
tokens.Deallocate();
separator.Deallocate();
}
// Form 2: basic position output
TYPED_TEST(TokenizeTests, Form2Basic) {
// From the standard example: "first,second,,fourth"
auto string{CreateScalarDescriptor<TypeParam>("first,second,,fourth")};
auto set{CreateScalarDescriptor<TypeParam>(",;")};
ASSERT_NE(string, nullptr);
ASSERT_NE(set, nullptr);
auto firstStatic{CreateAllocatableIntDescriptor()};
Descriptor &first{firstStatic.descriptor()};
auto lastStatic{CreateAllocatableIntDescriptor()};
Descriptor &last{lastStatic.descriptor()};
RTNAME(TokenizePositions)(first, last, *string, *set);
ASSERT_TRUE(first.IsAllocated());
ASSERT_TRUE(last.IsAllocated());
EXPECT_EQ(first.GetDimension(0).Extent(), 4);
EXPECT_EQ(last.GetDimension(0).Extent(), 4);
// Expect: FIRST = [1, 7, 14, 15], LAST = [5, 12, 13, 20]
EXPECT_EQ(*first.OffsetElement<std::int32_t>(0 * sizeof(std::int32_t)), 1);
EXPECT_EQ(*first.OffsetElement<std::int32_t>(1 * sizeof(std::int32_t)), 7);
EXPECT_EQ(*first.OffsetElement<std::int32_t>(2 * sizeof(std::int32_t)), 14);
EXPECT_EQ(*first.OffsetElement<std::int32_t>(3 * sizeof(std::int32_t)), 15);
EXPECT_EQ(*last.OffsetElement<std::int32_t>(0 * sizeof(std::int32_t)), 5);
EXPECT_EQ(*last.OffsetElement<std::int32_t>(1 * sizeof(std::int32_t)), 12);
EXPECT_EQ(*last.OffsetElement<std::int32_t>(2 * sizeof(std::int32_t)), 13);
EXPECT_EQ(*last.OffsetElement<std::int32_t>(3 * sizeof(std::int32_t)), 20);
first.Deallocate();
last.Deallocate();
}
// Form 2: empty string produces one token with FIRST=1, LAST=0
TYPED_TEST(TokenizeTests, Form2EmptyString) {
auto string{CreateScalarDescriptor<TypeParam>("")};
auto set{CreateScalarDescriptor<TypeParam>(",")};
ASSERT_NE(string, nullptr);
ASSERT_NE(set, nullptr);
auto firstStatic{CreateAllocatableIntDescriptor()};
Descriptor &first{firstStatic.descriptor()};
auto lastStatic{CreateAllocatableIntDescriptor()};
Descriptor &last{lastStatic.descriptor()};
RTNAME(TokenizePositions)(first, last, *string, *set);
ASSERT_TRUE(first.IsAllocated());
ASSERT_TRUE(last.IsAllocated());
EXPECT_EQ(first.GetDimension(0).Extent(), 1) << "empty string = 1 token";
EXPECT_EQ(last.GetDimension(0).Extent(), 1);
// Expect FIRST(1)=1, LAST(1)=0
EXPECT_EQ(*first.OffsetElement<std::int32_t>(0), 1) << "FIRST(1) = 1";
EXPECT_EQ(*last.OffsetElement<std::int32_t>(0), 0) << "LAST(1) = 0";
first.Deallocate();
last.Deallocate();
}
// Test F_C_STRING()
TEST(CharacterTests, FCString) {
// Test 1: Default behavior (trim trailing blanks)

View File

@ -447,6 +447,7 @@ struct IntrinsicLibrary {
fir::ExtendedValue genTeamNumber(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genTokenize(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genTransfer(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);

View File

@ -142,6 +142,22 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
mlir::Value setBase, mlir::Value setLen,
mlir::Value back);
/// Generate call to TOKENIZE runtime (Form 1).
/// Splits \p stringBox into tokens based on separator characters in \p setBox.
/// \p tokensBox must be an unallocated allocatable array that receives the
/// token substrings. \p separatorBox is optional and receives separator chars.
void genTokenize(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value tokensBox, mlir::Value separatorBox,
mlir::Value stringBox, mlir::Value setBox);
/// Generate call to TOKENIZE runtime (Form 2).
/// Returns token positions rather than substrings.
/// \p firstBox and \p lastBox must be unallocated allocatable integer arrays
/// that receive the starting and ending positions of each token.
void genTokenizePositions(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value firstBox, mlir::Value lastBox,
mlir::Value stringBox, mlir::Value setBox);
} // namespace fir::runtime
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CHARACTER_H

View File

@ -130,6 +130,13 @@ std::size_t RTDECL(Verify4)(const char32_t *, std::size_t, const char32_t *set,
void RTDECL(Verify)(Descriptor &result, const Descriptor &string,
const Descriptor &set, const Descriptor *back /*can be null*/, int kind,
const char *sourceFile = nullptr, int sourceLine = 0);
void RTDECL(Tokenize)(Descriptor &tokens, Descriptor *separator,
const Descriptor &string, const Descriptor &set,
const char *sourceFile = nullptr, int sourceLine = 0);
void RTDECL(TokenizePositions)(Descriptor &first, Descriptor &last,
const Descriptor &string, const Descriptor &set,
const char *sourceFile = nullptr, int sourceLine = 0);
}
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_CHARACTER_H_

View File

@ -22,6 +22,7 @@
#include "flang/Support/Fortran.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <climits>
#include <cmath>
#include <map>
#include <string>
@ -1749,6 +1750,26 @@ static const IntrinsicInterface intrinsicSubroutine[]{
{{"seconds", AnyInt, Rank::scalar, Optionality::required,
common::Intent::In}},
{}, Rank::elemental, IntrinsicClass::impureSubroutine},
{"tokenize",
{{"string", SameCharNoLen, Rank::scalar, Optionality::required,
common::Intent::In},
{"set", SameCharNoLen, Rank::scalar, Optionality::required,
common::Intent::In},
{"tokens", SameCharNoLen, Rank::vector, Optionality::required,
common::Intent::Out},
{"separator", SameCharNoLen, Rank::vector, Optionality::optional,
common::Intent::Out}},
{}, Rank::elemental, IntrinsicClass::pureSubroutine},
{"tokenize",
{{"string", SameCharNoLen, Rank::scalar, Optionality::required,
common::Intent::In},
{"set", SameCharNoLen, Rank::scalar, Optionality::required,
common::Intent::In},
{"first", AnyInt, Rank::vector, Optionality::required,
common::Intent::Out},
{"last", AnyInt, Rank::vector, Optionality::required,
common::Intent::Out}},
{}, Rank::elemental, IntrinsicClass::pureSubroutine},
{"unlink",
{{"path", DefaultChar, Rank::scalar, Optionality::required,
common::Intent::In},
@ -3704,6 +3725,24 @@ static bool ApplySpecificChecks(SpecificCall &call, FoldingContext &context) {
arg ? arg->sourceLocation() : context.messages().at(),
"Argument of LOC() must be an object or procedure"_err_en_US);
}
} else if (name == "tokenize") {
// Both forms of TOKENIZE have at least 4 dummy arguments, and the last two
// must be allocatable.
const auto &dummies{
call.specificIntrinsic.characteristics.value().dummyArguments};
for (int i{2}; i < 4; ++i) {
const auto &arg{call.arguments[i]};
if (arg) {
if (const auto *expr{arg->UnwrapExpr()}) {
if (!IsAllocatableDesignator(*expr)) {
ok = false;
context.messages().Say(arg->sourceLocation(),
"'%s=' argument to 'tokenize' must be ALLOCATABLE"_err_en_US,
dummies[i].name);
}
}
}
}
}
return ok;
}
@ -3766,15 +3805,81 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
}
}
// Find the specific subroutine and match the actual arguments against its
// dummy argument patterns. If there are multiple specific subroutines with
// the same name, try them in order. If one matches, clear out the errors.
// If none match, keep the messages from the form whose dummy argument
// types and keywords best match the actual arguments supplied.
if (call.isSubroutineCall) {
const std::string &name{ResolveAlias(call.name)};
auto subrRange{subroutines_.equal_range(name)};
parser::Messages subrErrors;
int bestScore{INT_MIN};
parser::Messages localBuffer;
parser::Messages *finalBuffer{context.messages().messages()};
parser::ContextualMessages localMessages{
context.messages().at(), finalBuffer ? &localBuffer : nullptr};
FoldingContext localContext{context, localMessages};
for (auto iter{subrRange.first}; iter != subrRange.second; ++iter) {
if (auto specificCall{iter->second->Match(
call, defaults_, arguments, context, builtinsScope_)}) {
call, defaults_, arguments, localContext, builtinsScope_)}) {
if (finalBuffer) {
finalBuffer->Annex(std::move(localBuffer));
}
ApplySpecificChecks(*specificCall, context);
return specificCall;
}
// Match failed. Compute a score reflecting how well the actual
// arguments correspond to this form's dummy arguments: count the
// number of positional arguments whose type category matches the
// corresponding dummy's expected categories, plus the number of
// keyword arguments whose keyword name matches a dummy in this form,
// minus the number of required dummies that cannot be satisfied by
// the number of arguments provided. Keep the error messages from
// the form with the highest score, preferring an earlier form on
// ties.
const auto *iface{iter->second};
int dummyCount{iface->CountArguments()};
int numRequired{0};
for (int j{0}; j < dummyCount; ++j) {
if (iface->dummy[j].optionality == Optionality::required) {
++numRequired;
}
}
int score{0};
int positionalIndex{0};
for (const auto &arg : arguments) {
if (arg) {
if (auto kw{arg->keyword()}) {
for (int k{0}; k < dummyCount; ++k) {
if (kw == iface->dummy[k].keyword) {
++score;
break;
}
}
} else {
if (positionalIndex < dummyCount) {
if (auto type{arg->GetType()}) {
if (iface->dummy[positionalIndex].typePattern.categorySet.test(
type->category())) {
++score;
}
}
}
++positionalIndex;
}
}
}
score -= std::max(0, numRequired - static_cast<int>(arguments.size()));
if (score > bestScore) {
bestScore = score;
subrErrors = std::move(localBuffer);
} else {
localBuffer.clear();
}
}
if (finalBuffer) {
finalBuffer->Annex(std::move(subrErrors));
}
if (IsIntrinsicFunction(call.name) && !IsDualIntrinsic(call.name)) {
context.messages().Say(

View File

@ -797,6 +797,13 @@ static constexpr IntrinsicHandler handlers[]{
{"team", asBox, handleDynamicOptional}}},
/*isElemental=*/false},
{"time", &I::genTime, {}, /*isElemental=*/false},
{"tokenize",
&I::genTokenize,
{{{"string", asAddr},
{"set", asAddr},
{"out1", asInquired},
{"out2", asInquired, handleDynamicOptional}}},
/*isElemental=*/false},
{"trailz", &I::genTrailz},
{"transfer",
&I::genTransfer,
@ -8529,6 +8536,101 @@ void IntrinsicLibrary::genSleep(llvm::ArrayRef<fir::ExtendedValue> args) {
fir::runtime::genSleep(builder, loc, fir::getBase(args[0]));
}
// TOKENIZE
void IntrinsicLibrary::genTokenize(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 4 && "TOKENIZE requires 3 or 4 arguments");
const fir::ExtendedValue &string = args[0];
const fir::ExtendedValue &set = args[1];
// Distinguish forms by the element type of the third argument. For form 1,
// TOKENS is CHARACTER. For form 2, FIRST is INTEGER.
mlir::Type thirdArgEleTy = fir::getElementTypeOf(args[2]);
bool isForm1 = fir::isa_char(thirdArgEleTy);
bool isForm2 = fir::isa_integer(thirdArgEleTy);
assert((isForm1 || isForm2) &&
"TOKENIZE third argument must be CHARACTER or INTEGER");
mlir::Value stringBox = builder.createBox(loc, string);
mlir::Value setBox = builder.createBox(loc, set);
mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType());
mlir::Type boxNoneRefTy = fir::ReferenceType::get(boxNoneTy);
// A lambda to return the address of the descriptor storage to pass to the
// runtime. For MutableBoxValue, this also handles any required syncing
// before/after the runtime call.
auto getBoxStorageAddr =
[&](const fir::ExtendedValue &exv, llvm::StringRef what,
const fir::MutableBoxValue **mutableBoxOut) -> mlir::Value {
if (const auto *mb = exv.getBoxOf<fir::MutableBoxValue>()) {
if (mutableBoxOut)
*mutableBoxOut = mb;
mlir::Value addr = fir::factory::getMutableIRBox(builder, loc, *mb);
return builder.createConvert(loc, boxNoneRefTy, addr);
}
if (const auto *bv = exv.getBoxOf<fir::BoxValue>()) {
mlir::Value addr = bv->getAddr();
if (auto boxTy = fir::dyn_cast_ptrEleTy(addr.getType())) {
if (mlir::isa<fir::BaseBoxType>(boxTy))
return builder.createConvert(loc, boxNoneRefTy, addr);
}
fir::emitFatalError(loc, llvm::Twine("TOKENIZE: ") + what +
" must be a descriptor address");
}
fir::emitFatalError(loc, llvm::Twine("TOKENIZE: ") + what +
" not lowered as a boxed entity");
};
if (isForm1) {
// Form 1: TOKENIZE(STRING, SET, TOKENS [, SEPARATOR])
const fir::ExtendedValue &tokens = args[2];
const fir::MutableBoxValue *tokensMutableBox{nullptr};
mlir::Value tokensBoxAddr =
getBoxStorageAddr(tokens, "TOKENS", &tokensMutableBox);
// Handle optional SEPARATOR argument
mlir::Value separatorBoxAddr;
const fir::MutableBoxValue *separatorMutableBox{nullptr};
if (!isStaticallyAbsent(args[3])) {
const fir::ExtendedValue &separator = args[3];
separatorBoxAddr =
getBoxStorageAddr(separator, "SEPARATOR", &separatorMutableBox);
} else {
separatorBoxAddr = builder.createNullConstant(loc, boxNoneRefTy);
}
// Call the Form 1 runtime function
fir::runtime::genTokenize(builder, loc, tokensBoxAddr, separatorBoxAddr,
stringBox, setBox);
if (tokensMutableBox)
fir::factory::syncMutableBoxFromIRBox(builder, loc, *tokensMutableBox);
if (separatorMutableBox)
fir::factory::syncMutableBoxFromIRBox(builder, loc, *separatorMutableBox);
} else {
// Form 2: TOKENIZE(STRING, SET, FIRST, LAST)
const fir::ExtendedValue &first = args[2];
const fir::ExtendedValue &last = args[3];
const fir::MutableBoxValue *firstMutableBox{nullptr};
const fir::MutableBoxValue *lastMutableBox{nullptr};
mlir::Value firstBoxAddr =
getBoxStorageAddr(first, "FIRST", &firstMutableBox);
mlir::Value lastBoxAddr = getBoxStorageAddr(last, "LAST", &lastMutableBox);
// Call the Form 2 runtime function
fir::runtime::genTokenizePositions(builder, loc, firstBoxAddr, lastBoxAddr,
stringBox, setBox);
if (firstMutableBox)
fir::factory::syncMutableBoxFromIRBox(builder, loc, *firstMutableBox);
if (lastMutableBox)
fir::factory::syncMutableBoxFromIRBox(builder, loc, *lastMutableBox);
}
}
// TRANSFER
fir::ExtendedValue
IntrinsicLibrary::genTransfer(mlir::Type resultType,

View File

@ -285,6 +285,35 @@ void fir::runtime::genVerifyDescriptor(fir::FirOpBuilder &builder,
kind);
}
void fir::runtime::genTokenize(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value tokensBox, mlir::Value separatorBox,
mlir::Value stringBox, mlir::Value setBox) {
auto func = fir::runtime::getRuntimeFunc<mkRTKey(Tokenize)>(loc, builder);
auto fTy = func.getFunctionType();
auto sourceFile = fir::factory::locationToFilename(builder, loc);
auto sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(5));
auto args =
fir::runtime::createArguments(builder, loc, fTy, tokensBox, separatorBox,
stringBox, setBox, sourceFile, sourceLine);
fir::CallOp::create(builder, loc, func, args);
}
void fir::runtime::genTokenizePositions(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value firstBox,
mlir::Value lastBox, mlir::Value stringBox, mlir::Value setBox) {
auto func =
fir::runtime::getRuntimeFunc<mkRTKey(TokenizePositions)>(loc, builder);
auto fTy = func.getFunctionType();
auto sourceFile = fir::factory::locationToFilename(builder, loc);
auto sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(5));
auto args =
fir::runtime::createArguments(builder, loc, fTy, firstBox, lastBox,
stringBox, setBox, sourceFile, sourceLine);
fir::CallOp::create(builder, loc, func, args);
}
mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
mlir::Location loc, int kind,
mlir::Value stringBase,

View File

@ -0,0 +1,27 @@
! RUN: %flang_fc1 -emit-hlfir %s -o - | FileCheck %s
! CHECK-LABEL: tok_form1
subroutine tok_form1()
implicit none
character(:), allocatable :: tokens(:)
call tokenize("a,b", ",", tokens)
! CHECK-DAG: %[[TOKENS:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>
! CHECK-DAG: %[[TOKENS_DECL:.*]]:2 = hlfir.declare %[[TOKENS]] {fortran_attrs = #fir.var_attrs<allocatable>
! CHECK-DAG: %[[TOKENS_NONE:.*]] = fir.convert %[[TOKENS_DECL]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>) -> !fir.ref<!fir.box<none>>
! CHECK-DAG: %[[SEP_NONE:.*]] = fir.zero_bits !fir.ref<!fir.box<none>>
! CHECK: fir.call @_FortranATokenize(%[[TOKENS_NONE]], %[[SEP_NONE]],
end subroutine tok_form1
! CHECK-LABEL: tok_form2
subroutine tok_form2()
implicit none
integer, allocatable :: first(:), last(:)
call tokenize("a,,b", ",", first, last)
! CHECK-DAG: %[[FIRST:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
! CHECK-DAG: %[[LAST:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
! CHECK-DAG: %[[FIRST_DECL:.*]]:2 = hlfir.declare %[[FIRST]] {fortran_attrs = #fir.var_attrs<allocatable>
! CHECK-DAG: %[[LAST_DECL:.*]]:2 = hlfir.declare %[[LAST]] {fortran_attrs = #fir.var_attrs<allocatable>
! CHECK-DAG: %[[FIRST_NONE:.*]] = fir.convert %[[FIRST_DECL]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK-DAG: %[[LAST_NONE:.*]] = fir.convert %[[LAST_DECL]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: fir.call @_FortranATokenizePositions(%[[FIRST_NONE]], %[[LAST_NONE]],
end subroutine tok_form2

View File

@ -0,0 +1,267 @@
! RUN: %python %S/test_errors.py %s %flang_fc1
! Check for semantic errors in tokenize() subroutine calls
! Based on Fortran 2023 standard requirements
program test_tokenize_errors
implicit none
! Valid declarations for reference
character(:), allocatable :: tokens(:), separator(:)
integer, allocatable :: first(:), last(:)
character(20) :: string
character(5) :: set
! Invalid declarations for testing
integer :: int_scalar, int_array(10)
real :: real_scalar
character(10) :: fixed_tokens(10), fixed_separator(10)
integer :: fixed_first(10), fixed_last(10)
character(10) :: string_array(5)
character(5) :: set_array(5)
character(len=10, kind=2) :: wide_string, wide_set
character(len=10, kind=2), allocatable :: wide_tokens(:)
real, allocatable :: real_first(:), real_last(:)
logical, allocatable :: logical_array(:)
integer, allocatable :: first_2d(:,:), last_2d(:,:)
character(:), allocatable :: tokens_2d(:,:)
type t_coarray
integer, allocatable :: a(:)
end type
type(t_coarray) :: coa[*]
integer, allocatable :: coindexed_first(:)[:]
integer, allocatable :: coindexed_last(:)[:]
character(:), allocatable :: coindexed_tokens(:)[:]
!========================================================================
! Test Form 1: TOKENIZE(STRING, SET, TOKENS [, SEPARATOR])
!========================================================================
! Valid call (reference)
call tokenize("hello world", " ", tokens)
call tokenize(string, set, tokens, separator)
!========================================================================
! Form 1: Wrong types for STRING argument
!========================================================================
!ERROR: Actual argument for 'string=' has bad type 'INTEGER(4)'
call tokenize(int_scalar, set, tokens)
!ERROR: Actual argument for 'string=' has bad type 'REAL(4)'
call tokenize(real_scalar, set, tokens)
!========================================================================
! Form 1: Wrong rank for STRING (must be scalar)
!========================================================================
! Fails
!ERROR: 'string=' argument has unacceptable rank 1
call tokenize(string_array, set, tokens)
!========================================================================
! Form 1: Wrong types for SET argument
!========================================================================
!ERROR: Actual argument for 'set=' has bad type 'INTEGER(4)'
call tokenize(string, int_scalar, tokens)
!ERROR: Actual argument for 'set=' has bad type 'REAL(4)'
call tokenize(string, real_scalar, tokens)
!========================================================================
! Form 1: Wrong rank for SET (must be scalar)
!========================================================================
! Fails
!ERROR: 'set=' argument has unacceptable rank 1
call tokenize(string, set_array, tokens)
!========================================================================
! Form 1: Wrong types for TOKENS argument
!========================================================================
! Fails
!ERROR: Actual argument for 'tokens=' has bad type 'INTEGER(4)'
call tokenize(string, set, int_array)
! Fails
!ERROR: Actual argument for 'tokens=' has bad type 'REAL(4)'
call tokenize(string, set, real_first)
! Fails
!ERROR: Actual argument for 'tokens=' has bad type 'LOGICAL(4)'
call tokenize(string, set, logical_array)
!========================================================================
! Form 1: Wrong rank for TOKENS (must be rank-1 array)
!========================================================================
! Fails
!ERROR: 'tokens=' argument has unacceptable rank 0
call tokenize(string, set, string)
! Fails
!ERROR: 'tokens=' argument has unacceptable rank 2
call tokenize(string, set, tokens_2d)
!========================================================================
! Form 1: TOKENS must be allocatable
!========================================================================
!ERROR: 'tokens=' argument to 'tokenize' must be ALLOCATABLE
call tokenize(string, set, fixed_tokens)
!========================================================================
! Form 1: Wrong types for optional SEPARATOR argument
!========================================================================
! Fails
!ERROR: Actual argument for 'separator=' has bad type 'INTEGER(4)'
call tokenize(string, set, tokens, int_array)
! Fails
!ERROR: Actual argument for 'separator=' has bad type 'REAL(4)'
call tokenize(string, set, tokens, real_first)
!========================================================================
! Form 1: Wrong rank for SEPARATOR (must be rank-1 array)
!========================================================================
! Fails
!ERROR: 'separator=' argument has unacceptable rank 0
call tokenize(string, set, tokens, set)
!========================================================================
! Form 1: SEPARATOR must be allocatable
!========================================================================
!ERROR: 'separator=' argument to 'tokenize' must be ALLOCATABLE
call tokenize(string, set, tokens, fixed_separator)
!========================================================================
! Form 1: Character kind mismatches
!========================================================================
! Fails
! wide_string (kind=2) becomes sameArg; set (kind=1) fails sameKind check
!ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=1,LEN=5_8)'
call tokenize(wide_string, set, tokens)
! Fails
!ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=2,LEN=10_8)'
call tokenize(string, wide_set, tokens)
! Fails
!ERROR: Actual argument for 'tokens=' has bad type or kind 'CHARACTER(KIND=2,LEN=10_8)'
call tokenize(string, set, wide_tokens)
!========================================================================
! Test Form 2: TOKENIZE(STRING, SET, FIRST, LAST)
!========================================================================
! Valid call (reference)
call tokenize("hello world", " ", first, last)
!========================================================================
! Form 2: Wrong types for STRING argument (same as Form 1)
!========================================================================
!ERROR: Actual argument for 'string=' has bad type 'INTEGER(4)'
call tokenize(int_scalar, set, first, last)
!========================================================================
! Form 2: Wrong types for SET argument (same as Form 1)
!========================================================================
!ERROR: Actual argument for 'set=' has bad type 'INTEGER(4)'
call tokenize(string, int_scalar, first, last)
!========================================================================
! Form 2: Wrong types for FIRST argument
!========================================================================
!ERROR: Actual argument for 'first=' has bad type 'REAL(4)'
call tokenize(string, set, real_first, last)
!ERROR: Actual argument for 'first=' has bad type 'LOGICAL(4)'
call tokenize(string, set, logical_array, last)
!========================================================================
! Form 2: Wrong rank for FIRST (must be rank-1 array)
!========================================================================
!ERROR: 'first=' argument has unacceptable rank 0
call tokenize(string, set, int_scalar, last)
!ERROR: 'first=' argument has unacceptable rank 2
call tokenize(string, set, first_2d, last)
!========================================================================
! Form 2: FIRST must be allocatable
!========================================================================
!ERROR: 'first=' argument to 'tokenize' must be ALLOCATABLE
call tokenize(string, set, fixed_first, last)
!========================================================================
! Form 2: Wrong types for LAST argument
!========================================================================
!ERROR: Actual argument for 'last=' has bad type 'REAL(4)'
call tokenize(string, set, first, real_first)
!========================================================================
! Form 2: Wrong rank for LAST (must be rank-1 array)
!========================================================================
!ERROR: 'last=' argument has unacceptable rank 0
call tokenize(string, set, first, int_scalar)
!ERROR: 'last=' argument has unacceptable rank 2
call tokenize(string, set, first, last_2d)
!========================================================================
! Form 2: LAST must be allocatable
!========================================================================
!ERROR: 'last=' argument to 'tokenize' must be ALLOCATABLE
call tokenize(string, set, first, fixed_last)
!========================================================================
! Argument count errors
!========================================================================
!ERROR: missing mandatory 'set=' argument
call tokenize(string)
! Fails
!ERROR: missing mandatory 'tokens=' argument
call tokenize(string, set)
!ERROR: too many actual arguments for intrinsic 'tokenize'
call tokenize(string, set, tokens, separator, first)
!========================================================================
! Coindexed object restrictions (if applicable)
!========================================================================
! Note: Coarray tests depend on whether the standard allows coindexed
! objects for TOKENIZE. Uncomment if compiler version enforces this.
! !ERROR: 'first=' argument to 'tokenize' may not be a coindexed object
! call tokenize(string, set, coindexed_first[1], last)
! !ERROR: 'last=' argument to 'tokenize' may not be a coindexed object
! call tokenize(string, set, first, coindexed_last[1])
! !ERROR: 'tokens=' argument to 'tokenize' may not be a coindexed object
! call tokenize(string, set, coindexed_tokens[1])
!========================================================================
! Keyword argument errors
!========================================================================
!ERROR: unknown keyword argument to intrinsic 'tokenize'
call tokenize(string, set, tokens, invalid_keyword=separator)
! Fails
!ERROR: Actual argument for 'tokens=' has bad type 'INTEGER(4)'
call tokenize(string=string, set=set, tokens=first, separator=separator)
!========================================================================
! Type/kind inconsistency between STRING, SET, TOKENS, SEPARATOR
!========================================================================
! All character arguments must have the same kind (but can have different lengths)
! This is implicitly handled by SameCharNoLen in the intrinsic definition
! Fails
!ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=2,LEN=10_8)'
call tokenize(string=string, set=wide_set, tokens=tokens)
end program test_tokenize_errors