The error messages in tests are far better when a test fails if the test is written using ASSERT_/EXPECT_<operator>(A, B) rather than ASSERT_/EXPECT_TRUE(A <operator> B). This commit updates all of llvm/unittests/Support to use these macros where possible. This change has not been possible in: - llvm/unittests/Support/FSUniqueIDTest.cpp - due to not overloading operators beyond ==, != and <. - llvm/unittests/Support/BranchProbabilityTest.cpp - where the unchanged tests are of the operator overloads themselves. There are other possibilities of this conversion not being valid, which have not applied in these tests, as they do not use NULL (they use nullptr), and they do not use const char* (they use std::string or StringRef). Reviewed By: mubashar_ Differential Revision: https://reviews.llvm.org/D117319
108 lines
4.0 KiB
C++
108 lines
4.0 KiB
C++
//===- unittests/Support/UnicodeTest.cpp - Unicode.h tests ----------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/Unicode.h"
|
|
#include "llvm/Support/ConvertUTF.h"
|
|
#include "gtest/gtest.h"
|
|
|
|
namespace llvm {
|
|
namespace sys {
|
|
namespace unicode {
|
|
namespace {
|
|
|
|
TEST(Unicode, columnWidthUTF8) {
|
|
EXPECT_EQ(0, columnWidthUTF8(""));
|
|
EXPECT_EQ(1, columnWidthUTF8(" "));
|
|
EXPECT_EQ(1, columnWidthUTF8("a"));
|
|
EXPECT_EQ(1, columnWidthUTF8("~"));
|
|
|
|
EXPECT_EQ(6, columnWidthUTF8("abcdef"));
|
|
|
|
EXPECT_EQ(-1, columnWidthUTF8("\x01"));
|
|
EXPECT_EQ(-1, columnWidthUTF8("\t"));
|
|
EXPECT_EQ(-1, columnWidthUTF8("aaaaaaaaaa\x01"));
|
|
EXPECT_EQ(-1, columnWidthUTF8("\342\200\213")); // 200B ZERO WIDTH SPACE
|
|
|
|
// 00AD SOFT HYPHEN is displayed on most terminals as a space or a dash. Some
|
|
// text editors display it only when a line is broken at it, some use it as a
|
|
// line-break hint, but don't display. We choose terminal-oriented
|
|
// interpretation.
|
|
EXPECT_EQ(1, columnWidthUTF8("\302\255"));
|
|
|
|
EXPECT_EQ(0, columnWidthUTF8("\314\200")); // 0300 COMBINING GRAVE ACCENT
|
|
EXPECT_EQ(1, columnWidthUTF8("\340\270\201")); // 0E01 THAI CHARACTER KO KAI
|
|
EXPECT_EQ(2, columnWidthUTF8("\344\270\200")); // CJK UNIFIED IDEOGRAPH-4E00
|
|
|
|
EXPECT_EQ(4, columnWidthUTF8("\344\270\200\344\270\200"));
|
|
EXPECT_EQ(3, columnWidthUTF8("q\344\270\200"));
|
|
EXPECT_EQ(3, columnWidthUTF8("\314\200\340\270\201\344\270\200"));
|
|
|
|
// Invalid UTF-8 strings, columnWidthUTF8 should error out.
|
|
EXPECT_EQ(-2, columnWidthUTF8("\344"));
|
|
EXPECT_EQ(-2, columnWidthUTF8("\344\270"));
|
|
EXPECT_EQ(-2, columnWidthUTF8("\344\270\033"));
|
|
EXPECT_EQ(-2, columnWidthUTF8("\344\270\300"));
|
|
EXPECT_EQ(-2, columnWidthUTF8("\377\366\355"));
|
|
|
|
EXPECT_EQ(-2, columnWidthUTF8("qwer\344"));
|
|
EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270"));
|
|
EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\033"));
|
|
EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\300"));
|
|
EXPECT_EQ(-2, columnWidthUTF8("qwer\377\366\355"));
|
|
|
|
// UTF-8 sequences longer than 4 bytes correspond to unallocated Unicode
|
|
// characters.
|
|
EXPECT_EQ(-2, columnWidthUTF8("\370\200\200\200\200")); // U+200000
|
|
EXPECT_EQ(-2, columnWidthUTF8("\374\200\200\200\200\200")); // U+4000000
|
|
}
|
|
|
|
TEST(Unicode, isPrintable) {
|
|
EXPECT_FALSE(isPrintable(0)); // <control-0000>-<control-001F>
|
|
EXPECT_FALSE(isPrintable(0x01));
|
|
EXPECT_FALSE(isPrintable(0x1F));
|
|
EXPECT_TRUE(isPrintable(' '));
|
|
EXPECT_TRUE(isPrintable('A'));
|
|
EXPECT_TRUE(isPrintable('~'));
|
|
EXPECT_FALSE(isPrintable(0x7F)); // <control-007F>..<control-009F>
|
|
EXPECT_FALSE(isPrintable(0x90));
|
|
EXPECT_FALSE(isPrintable(0x9F));
|
|
|
|
EXPECT_TRUE(isPrintable(0xAC));
|
|
EXPECT_TRUE(isPrintable(0xAD)); // SOFT HYPHEN is displayed on most terminals
|
|
// as either a space or a dash.
|
|
EXPECT_TRUE(isPrintable(0xAE));
|
|
|
|
EXPECT_TRUE(isPrintable(0x0377)); // GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
|
|
EXPECT_FALSE(isPrintable(0x0378)); // <reserved-0378>..<reserved-0379>
|
|
|
|
EXPECT_FALSE(isPrintable(0x0600)); // ARABIC NUMBER SIGN
|
|
|
|
EXPECT_FALSE(isPrintable(0x1FFFF)); // <reserved-1F774>..<noncharacter-1FFFF>
|
|
EXPECT_TRUE(isPrintable(0x20000)); // CJK UNIFIED IDEOGRAPH-20000
|
|
|
|
EXPECT_FALSE(isPrintable(0x10FFFF)); // noncharacter
|
|
|
|
// test the validity of a fast path in columnWidthUTF8
|
|
for (unsigned char c = 0; c < 128; ++c) {
|
|
const UTF8 buf8[2] = {c, 0};
|
|
const UTF8 *Target8 = &buf8[0];
|
|
UTF32 buf32[1];
|
|
UTF32 *Target32 = &buf32[0];
|
|
auto status = ConvertUTF8toUTF32(&Target8, Target8 + 1, &Target32,
|
|
Target32 + 1, strictConversion);
|
|
EXPECT_EQ(status, conversionOK);
|
|
EXPECT_EQ((columnWidthUTF8(reinterpret_cast<const char *>(buf8)) == 1),
|
|
(bool)isPrintable(buf32[0]));
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace unicode
|
|
} // namespace sys
|
|
} // namespace llvm
|