#include #include #include #include struct TestCase { char *input; size_t size; size_t read; uint32_t result; }; BH_UNIT_TEST(Case) { size_t i, j; for (i = 0; i < 0x110000; i++) { j = BH_UnicodeLower(i); if (j == i) { j = BH_UnicodeUpper(i); j = BH_UnicodeLower(j); } else j = BH_UnicodeUpper(j); /* Some exceptions */ if (i == 0x130 && j == 0x49) continue; else if (i == 0x131 && j == 0x69) continue; else if (i == 0x1C5 && j == 0x1C4) continue; else if (i == 0x1C8 && j == 0x1C7) continue; else if (i == 0x1CB && j == 0x1CA) continue; BH_VERIFY(i == j); } return 0; } BH_UNIT_TEST(Utf8) { const struct TestCase *current; const struct TestCase cases[] = { /* Normal cases */ {"\x00", 1, 1, 0}, {"\xC2\x80", 2, 2, 0x80}, {"\xE0\xA0\x80", 3, 3, 0x800}, {"\xF0\x90\x80\x80", 4, 4, 0x10000}, {"\x7F", 1, 1, 0x7F}, {"\xDF\xBF", 2, 2, 0x7FF}, {"\xEF\xBF\xBF", 3, 3, 0xFFFF}, {"\xED\x9F\xBF", 3, 3, 0xD7FF}, {"\xEE\x80\x80", 3, 3, 0xE000}, {"\xEF\xBF\xBD", 3, 3, 0xFFFD}, {"H", 1, 1, 'H'}, {"\xCE\xBA", 2, 2, 0x3BA}, /* Lonely start characters */ {"\xC0 ", 2, 1, -1}, {"\xC1 ", 2, 1, -1}, {"\xC2 ", 2, 1, -1}, {"\xC3 ", 2, 1, -1}, {"\xC4 ", 2, 1, -1}, /* Malformed sequences */ {"\x80", 1, -1, -1}, {"\xBF", 1, -1, -1}, {"\xFE", 1, -1, -1}, {"\xFF", 1, -1, -1}, /* Overlong sequences */ {"\xC0\xAF", 2, 1, -1}, /* UTF-16 surrogate pairs */ {"\xED\xA0\x80", 3, -1, -1}, {"\xED\xAE\x80", 3, -1, -1}, {"\xED\xB0\x80", 3, -1, -1}, {"\xED\xBF\xBF", 3, -1, -1}, {NULL, 0, 0, 0} }; char buffer[8]; uint32_t unit; size_t i, outSize, inSize; /* Encode and decode all character in a valid UTF-8 range */ for (i = 0; i < 0x110000; i++) { inSize = BH_UnicodeEncodeUtf8(i, buffer); /* Check for surrogate pairs */ if (i > 0xD7FF && i < 0xE000) { BH_VERIFY(inSize == 0); continue; } BH_VERIFY(inSize > 0); outSize = BH_UnicodeDecodeUtf8(buffer, inSize, &unit); BH_VERIFY(inSize == outSize); BH_VERIFY(unit == i); } /* Test special cases */ for (current = cases; current->input; current++) { i = BH_UnicodeDecodeUtf8(current->input, current->size, &unit); if (current->read == (size_t)-1 && i) i = -1; if (i != current->read || unit != current->result) { printf("\tcase %d\n", (int)(current - cases)); BH_VERIFY(i == current->read); BH_VERIFY(unit == current->result); } } return 0; } BH_UNIT_TEST(Utf16) { char buffer[8]; uint32_t unit; size_t i, outSize, inSize; /* Encode and decode all character in a valid UTF-8 range */ for (i = 0; i < 0x110000; i++) { /* Check for little endian */ inSize = BH_UnicodeEncodeUtf16LE(i, buffer); if (i > 0xD7FF && i < 0xE000) { BH_VERIFY(inSize == 0); continue; } BH_VERIFY(inSize > 0); outSize = BH_UnicodeDecodeUtf16LE(buffer, inSize, &unit); BH_VERIFY(inSize == outSize); BH_VERIFY(unit == i); /* Check for big endian */ inSize = BH_UnicodeEncodeUtf16BE(i, buffer); if (i > 0xD7FF && i < 0xE000) { BH_VERIFY(inSize == 0); continue; } BH_VERIFY(inSize > 0); outSize = BH_UnicodeDecodeUtf16BE(buffer, inSize, &unit); BH_VERIFY(inSize == outSize); BH_VERIFY(unit == i); } return 0; } BH_UNIT_TEST(Utf32) { char buffer[8]; uint32_t unit; size_t i, outSize, inSize; /* Encode and decode all character in a valid UTF-8 range */ for (i = 0; i < 0x110000; i++) { /* Check for little endian */ inSize = BH_UnicodeEncodeUtf32LE(i, buffer); if (i > 0xD7FF && i < 0xE000) { BH_VERIFY(inSize == 0); continue; } BH_VERIFY(inSize > 0); outSize = BH_UnicodeDecodeUtf32LE(buffer, inSize, &unit); BH_VERIFY(inSize == outSize); BH_VERIFY(unit == i); /* Check for big endian */ inSize = BH_UnicodeEncodeUtf32BE(i, buffer); if (i > 0xD7FF && i < 0xE000) { BH_VERIFY(inSize == 0); continue; } BH_VERIFY(inSize > 0); outSize = BH_UnicodeDecodeUtf32BE(buffer, inSize, &unit); BH_VERIFY(inSize == outSize); BH_VERIFY(unit == i); } return 0; } int main(int argc, char **argv) { BH_UNUSED(argc); BH_UNUSED(argv); BH_UNIT_ADD(Case); BH_UNIT_ADD(Utf8); BH_UNIT_ADD(Utf16); BH_UNIT_ADD(Utf32); return BH_UnitRun(); }