Archibald Elliott 38ac4093d9 [NFCI][Support] Avoid ASSERT_/EXPECT_TRUE(A <op> B)
The error messages in tests are far better when a test fails if the test
is written using ASSERT_/EXPECT_<operator>(A, B) rather than
ASSERT_/EXPECT_TRUE(A <operator> B).

This commit updates all of llvm/unittests/Support to use these macros
where possible.

This change has not been possible in:
- llvm/unittests/Support/FSUniqueIDTest.cpp - due to not overloading
  operators beyond ==, != and <.
- llvm/unittests/Support/BranchProbabilityTest.cpp - where the unchanged
  tests are of the operator overloads themselves.

There are other possibilities of this conversion not being valid, which
have not applied in these tests, as they do not use NULL (they use
nullptr), and they do not use const char* (they use std::string or
StringRef).

Reviewed By: mubashar_

Differential Revision: https://reviews.llvm.org/D117319
2022-01-21 13:15:04 +00:00

108 lines
4.0 KiB
C++

//===- unittests/Support/UnicodeTest.cpp - Unicode.h tests ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Unicode.h"
#include "llvm/Support/ConvertUTF.h"
#include "gtest/gtest.h"
namespace llvm {
namespace sys {
namespace unicode {
namespace {
TEST(Unicode, columnWidthUTF8) {
EXPECT_EQ(0, columnWidthUTF8(""));
EXPECT_EQ(1, columnWidthUTF8(" "));
EXPECT_EQ(1, columnWidthUTF8("a"));
EXPECT_EQ(1, columnWidthUTF8("~"));
EXPECT_EQ(6, columnWidthUTF8("abcdef"));
EXPECT_EQ(-1, columnWidthUTF8("\x01"));
EXPECT_EQ(-1, columnWidthUTF8("\t"));
EXPECT_EQ(-1, columnWidthUTF8("aaaaaaaaaa\x01"));
EXPECT_EQ(-1, columnWidthUTF8("\342\200\213")); // 200B ZERO WIDTH SPACE
// 00AD SOFT HYPHEN is displayed on most terminals as a space or a dash. Some
// text editors display it only when a line is broken at it, some use it as a
// line-break hint, but don't display. We choose terminal-oriented
// interpretation.
EXPECT_EQ(1, columnWidthUTF8("\302\255"));
EXPECT_EQ(0, columnWidthUTF8("\314\200")); // 0300 COMBINING GRAVE ACCENT
EXPECT_EQ(1, columnWidthUTF8("\340\270\201")); // 0E01 THAI CHARACTER KO KAI
EXPECT_EQ(2, columnWidthUTF8("\344\270\200")); // CJK UNIFIED IDEOGRAPH-4E00
EXPECT_EQ(4, columnWidthUTF8("\344\270\200\344\270\200"));
EXPECT_EQ(3, columnWidthUTF8("q\344\270\200"));
EXPECT_EQ(3, columnWidthUTF8("\314\200\340\270\201\344\270\200"));
// Invalid UTF-8 strings, columnWidthUTF8 should error out.
EXPECT_EQ(-2, columnWidthUTF8("\344"));
EXPECT_EQ(-2, columnWidthUTF8("\344\270"));
EXPECT_EQ(-2, columnWidthUTF8("\344\270\033"));
EXPECT_EQ(-2, columnWidthUTF8("\344\270\300"));
EXPECT_EQ(-2, columnWidthUTF8("\377\366\355"));
EXPECT_EQ(-2, columnWidthUTF8("qwer\344"));
EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270"));
EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\033"));
EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\300"));
EXPECT_EQ(-2, columnWidthUTF8("qwer\377\366\355"));
// UTF-8 sequences longer than 4 bytes correspond to unallocated Unicode
// characters.
EXPECT_EQ(-2, columnWidthUTF8("\370\200\200\200\200")); // U+200000
EXPECT_EQ(-2, columnWidthUTF8("\374\200\200\200\200\200")); // U+4000000
}
TEST(Unicode, isPrintable) {
EXPECT_FALSE(isPrintable(0)); // <control-0000>-<control-001F>
EXPECT_FALSE(isPrintable(0x01));
EXPECT_FALSE(isPrintable(0x1F));
EXPECT_TRUE(isPrintable(' '));
EXPECT_TRUE(isPrintable('A'));
EXPECT_TRUE(isPrintable('~'));
EXPECT_FALSE(isPrintable(0x7F)); // <control-007F>..<control-009F>
EXPECT_FALSE(isPrintable(0x90));
EXPECT_FALSE(isPrintable(0x9F));
EXPECT_TRUE(isPrintable(0xAC));
EXPECT_TRUE(isPrintable(0xAD)); // SOFT HYPHEN is displayed on most terminals
// as either a space or a dash.
EXPECT_TRUE(isPrintable(0xAE));
EXPECT_TRUE(isPrintable(0x0377)); // GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
EXPECT_FALSE(isPrintable(0x0378)); // <reserved-0378>..<reserved-0379>
EXPECT_FALSE(isPrintable(0x0600)); // ARABIC NUMBER SIGN
EXPECT_FALSE(isPrintable(0x1FFFF)); // <reserved-1F774>..<noncharacter-1FFFF>
EXPECT_TRUE(isPrintable(0x20000)); // CJK UNIFIED IDEOGRAPH-20000
EXPECT_FALSE(isPrintable(0x10FFFF)); // noncharacter
// test the validity of a fast path in columnWidthUTF8
for (unsigned char c = 0; c < 128; ++c) {
const UTF8 buf8[2] = {c, 0};
const UTF8 *Target8 = &buf8[0];
UTF32 buf32[1];
UTF32 *Target32 = &buf32[0];
auto status = ConvertUTF8toUTF32(&Target8, Target8 + 1, &Target32,
Target32 + 1, strictConversion);
EXPECT_EQ(status, conversionOK);
EXPECT_EQ((columnWidthUTF8(reinterpret_cast<const char *>(buf8)) == 1),
(bool)isPrintable(buf32[0]));
}
}
} // namespace
} // namespace unicode
} // namespace sys
} // namespace llvm