llvm-project/llvm/unittests/Support/ConvertEBCDICTest.cpp
Abhina Sree a9ee8e4a45
Create a EncodingConverter class with both iconv and icu support. (#138893)
This patch adds a wrapper class called EncodingConverter for
ConverterEBCDIC. This class is then extended to support the ICU library
or iconv library. The ICU library currently takes priority over the
iconv library.

Relevant RFCs:

https://discourse.llvm.org/t/rfc-adding-a-charset-converter-to-the-llvm-support-library/69795

https://discourse.llvm.org/t/rfc-enabling-fexec-charset-support-to-llvm-and-clang-reposting/71512

Stacked PR to enable fexec-charset that depends on this:
https://github.com/llvm/llvm-project/pull/138895

See old PR for review and commit history:
https://github.com/llvm/llvm-project/pull/74516
2025-05-20 14:02:22 -04:00

98 lines
3.2 KiB
C++

//===- unittests/Support/ConvertEBCDICTest.cpp - EBCDIC/UTF8 conversion tests
//-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===--------------------------------------------------------------------------===//
#include "llvm/Support/ConvertEBCDIC.h"
#include "llvm/ADT/SmallString.h"
#include "gtest/gtest.h"
using namespace llvm;
namespace {
// String "Hello World!"
static const char HelloA[] =
"\x48\x65\x6C\x6C\x6F\x20\x57\x6F\x72\x6C\x64\x21\x0a";
static const char HelloE[] =
"\xC8\x85\x93\x93\x96\x40\xE6\x96\x99\x93\x84\x5A\x15";
// String "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
static const char ABCStrA[] =
"\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52"
"\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A"
"\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A";
static const char ABCStrE[] =
"\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9"
"\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91"
"\x92\x93\x94\x95\x96\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9";
// String "¡¢£AÄÅÆEÈÉÊaàáâãäeèéêë"
static const char AccentUTF[] =
"\xc2\xa1\xc2\xa2\xc2\xa3\x41\xc3\x84\xc3\x85\xc3\x86\x45\xc3\x88\xc3\x89"
"\xc3\x8a\x61\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4\x65\xc3\xa8\xc3\xa9"
"\xc3\xaa\xc3\xab";
static const char AccentE[] = "\xaa\x4a\xb1\xc1\x63\x67\x9e\xc5\x74\x71\x72"
"\x81\x44\x45\x42\x46\x43\x85\x54\x51\x52\x53";
// String with Cyrillic character ya.
static const char CyrillicUTF[] = "\xd0\xaf";
TEST(ConverterEBCDIC, convertToEBCDIC) {
// Hello string.
StringRef Src(HelloA);
SmallString<64> Dst;
std::error_code EC = ConverterEBCDIC::convertToEBCDIC(Src, Dst);
EXPECT_TRUE(!EC);
EXPECT_STREQ(HelloE, static_cast<std::string>(Dst).c_str());
Dst.clear();
// ABC string.
Src = ABCStrA;
EC = ConverterEBCDIC::convertToEBCDIC(Src, Dst);
EXPECT_TRUE(!EC);
EXPECT_STREQ(ABCStrE, static_cast<std::string>(Dst).c_str());
Dst.clear();
// Accent string.
Src = AccentUTF;
EC = ConverterEBCDIC::convertToEBCDIC(Src, Dst);
EXPECT_TRUE(!EC);
EXPECT_STREQ(AccentE, static_cast<std::string>(Dst).c_str());
Dst.clear();
// Cyrillic string. Results in error because not representable in 1047.
Src = CyrillicUTF;
EC = ConverterEBCDIC::convertToEBCDIC(Src, Dst);
EXPECT_EQ(EC, std::errc::illegal_byte_sequence);
Dst.clear();
}
TEST(ConverterEBCDIC, convertFromEBCDIC) {
// Hello string.
StringRef Src(HelloE);
SmallString<64> Dst;
ConverterEBCDIC::convertToUTF8(Src, Dst);
EXPECT_STREQ(HelloA, static_cast<std::string>(Dst).c_str());
Dst.clear();
// ABC string.
Src = ABCStrE;
ConverterEBCDIC::convertToUTF8(Src, Dst);
EXPECT_STREQ(ABCStrA, static_cast<std::string>(Dst).c_str());
Dst.clear();
// Accent string.
Src = AccentE;
ConverterEBCDIC::convertToUTF8(Src, Dst);
EXPECT_STREQ(AccentUTF, static_cast<std::string>(Dst).c_str());
Dst.clear();
}
} // namespace