diff options
| author | Mikhail Romanko <me@blankhex.com> | 2025-04-05 12:48:29 +0300 |
|---|---|---|
| committer | Mikhail Romanko <me@blankhex.com> | 2025-04-05 13:56:00 +0300 |
| commit | 6aee5a83aa009c5e2cd5be5278c0b3b1fdb1325d (patch) | |
| tree | 4920c7efbfc2b6627e6a97ba48f4054709b4e3af | |
| parent | f9ebeabb18397f0ec6eba6223c556c70183c3fef (diff) | |
| download | bhlib-6aee5a83aa009c5e2cd5be5278c0b3b1fdb1325d.tar.gz | |
Refactor string functions, add unicode support
Refactored existing functions. Added Unicode support and UTF-8, UTF-16,
and UTF-32 encoding/decoding.
| -rw-r--r-- | CMakeLists.txt | 1 | ||||
| -rw-r--r-- | include/BH/Algo.h | 6 | ||||
| -rw-r--r-- | include/BH/String.h | 190 | ||||
| -rw-r--r-- | src/String/Float.c | 122 | ||||
| -rw-r--r-- | src/String/Inline/Int.h (renamed from src/String/FromInt.inl) | 0 | ||||
| -rw-r--r-- | src/String/Inline/Mpi.h (renamed from src/String/BInt.h) | 218 | ||||
| -rw-r--r-- | src/String/Inline/Signed.h (renamed from src/String/ToIntS.inl) | 0 | ||||
| -rw-r--r-- | src/String/Inline/Unicode.h | 438 | ||||
| -rw-r--r-- | src/String/Inline/Unsigned.h (renamed from src/String/ToIntU.inl) | 0 | ||||
| -rw-r--r-- | src/String/Int.c | 32 | ||||
| -rw-r--r-- | src/String/Unicode.c | 280 |
11 files changed, 1098 insertions, 189 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index abecdf3..6efa99e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,6 +94,7 @@ set(BH_SOURCE src/Util.c src/String/Int.c src/String/Float.c + src/String/Unicode.c ) set(BH_HEADER diff --git a/include/BH/Algo.h b/include/BH/Algo.h index 4cb940c..2164c15 100644 --- a/include/BH/Algo.h +++ b/include/BH/Algo.h @@ -8,9 +8,9 @@ /** * Exchanges values between \a src and \a dest elements of sizze \a size. * - * \param dest Destination element pointer - * \param src Source element pointer - * \param size Element size in bytes + * \param dest Destination element pointer + * \param src Source element pointer + * \param size Element size in bytes */ void BH_Swap(void *dest, void *src, diff --git a/include/BH/String.h b/include/BH/String.h index 1484889..ba855fd 100644 --- a/include/BH/String.h +++ b/include/BH/String.h @@ -389,4 +389,194 @@ uint64_t BH_StringToInt64u(const char *string, int base); +/** + * Converts unicode \a unit codepoint to lowercase. + * + * \param unit Codepoint + * + * \return On success, returns lowercased codepoint. + * \return On failure, returns codepoint without a change. + */ +uint32_t BH_UnicodeLower(uint32_t unit); + + +/** + * Converts unicode \a unit codepoint to uppercase. + * + * \param unit Codepoint + * + * \return On success, returns uppercased codepoint. + * \return On failure, returns codepoint without a change. + */ +uint32_t BH_UnicodeUpper(uint32_t unit); + + +/** + * Decodes a UTF-8 sequence from a \a string (with the given \a size), storing + * the result in a \a unit and returning the number of bytes read. + * + * Invalid UTF-8 sequences result in a -1 codepoint. + * + * \param string String + * \param size Size + * \param unit Codepoint + * + * \return Returns readed amount of bytes + * \return Returns zero if the string contains only a portion of the sequence. + */ +size_t BH_UnicodeDecodeUtf8(const char *string, + size_t size, + uint32_t *unit); + + +/** + * Encodes a Unicode \a unit to UTF-8 encoded bytes (stored in a \a string) + * and returns the number of bytes written. + * + * String are assumed to have 4 bytes of space. + * + * \param unit Codepoint + * \param string String + * + * \return On success, returns written amount of bytes. + * \return Of faulure, returns zero. + */ +size_t BH_UnicodeEncodeUtf8(uint32_t unit, + char *string); + + +/** + * Decodes a UTF-16LE sequence from a \a string (with the given \a size), + * storing the result in a \a unit and returning the number of bytes read. + * + * Invalid UTF-16 sequences result in a -1 codepoint. + * + * \param string String + * \param size Size + * \param unit Codepoint + * + * \return Returns readed amount of bytes + * \return Returns zero if the string contains only a portion of the sequence. + */ +size_t BH_UnicodeDecodeUtf16LE(const char *string, + size_t size, + uint32_t *unit); + + +/** + * Decodes a UTF-16BE sequence from a \a string (with the given \a size), + * storing the result in a \a unit and returning the number of bytes read. + * + * Invalid UTF-16 sequences result in a -1 codepoint. + * + * \param string String + * \param size Size + * \param unit Codepoint + * + * \return Returns readed amount of bytes + * \return Returns zero if the string contains only a portion of the sequence. + */ +size_t BH_UnicodeDecodeUtf16BE(const char *string, + size_t size, + uint32_t *unit); + + +/** + * Encodes a Unicode \a unit to UTF-16LE encoded bytes (stored in a \a string) + * and returns the number of bytes written. + * + * String are assumed to have 4 bytes of space. + * + * \param unit Codepoint + * \param string String + * + * \return On success, returns written amount of bytes. + * \return Of faulure, returns zero. + */ +size_t BH_UnicodeEncodeUtf16LE(uint32_t unit, + char *string); + + +/** + * Encodes a Unicode \a unit to UTF-16BE encoded bytes (stored in a \a string) + * and returns the number of bytes written. + * + * String are assumed to have 4 bytes of space. + * + * \param unit Codepoint + * \param string String + * + * \return On success, returns written amount of bytes. + * \return Of faulure, returns zero. + */ +size_t BH_UnicodeEncodeUtf16BE(uint32_t unit, + char *string); + + +/** + * Decodes a UTF-32LE symbol from a \a string (with the given \a size), + * storing the result in a \a unit and returning the number of bytes read. + * + * Invalid UTF-32 result in a -1 codepoint. + * + * \param string String + * \param size Size + * \param unit Codepoint + * + * \return Returns readed amount of bytes + * \return Returns zero if the string contains only a portion of the sequence. + */ +size_t BH_UnicodeDecodeUtf32LE(const char *string, + size_t size, + uint32_t *unit); + + +/** + * Decodes a UTF-32BE symbol from a \a string (with the given \a size), + * storing the result in a \a unit and returning the number of bytes read. + * + * Invalid UTF-32 result in a -1 codepoint. + * + * \param string String + * \param size Size + * \param unit Codepoint + * + * \return Returns readed amount of bytes + * \return Returns zero if the string contains only a portion of the sequence. + */ +size_t BH_UnicodeDecodeUtf32BE(const char *string, + size_t size, + uint32_t *unit); + + +/** + * Encodes a Unicode \a unit to UTF-32LE encoded bytes (stored in a \a string) + * and returns the number of bytes written. + * + * String are assumed to have 4 bytes of space. + * + * \param unit Codepoint + * \param string String + * + * \return On success, returns written amount of bytes. + * \return Of faulure, returns zero. + */ +size_t BH_UnicodeEncodeUtf32LE(uint32_t unit, + char *string); + +/** + * Encodes a Unicode \a unit to UTF-32LE encoded bytes (stored in a \a string) + * and returns the number of bytes written. + * + * String are assumed to have 4 bytes of space. + * + * \param unit Codepoint + * \param string String + * + * \return On success, returns written amount of bytes. + * \return Of faulure, returns zero. + */ +size_t BH_UnicodeEncodeUtf32BE(uint32_t unit, + char *string); + #endif /* BH_STRING_H */ diff --git a/src/String/Float.c b/src/String/Float.c index 9c4e98c..a8ed44d 100644 --- a/src/String/Float.c +++ b/src/String/Float.c @@ -8,7 +8,7 @@ #include <string.h> -#include "BInt.h" +#include "Inline/Mpi.h" /* Common defines */ @@ -31,11 +31,11 @@ struct Buffer struct DragonState { - BInt r; - BInt s; - BInt mm; - BInt mp; - BInt tmp[5]; + Mpi r; + Mpi s; + Mpi mm; + Mpi mp; + Mpi tmp[5]; long k; int cutoff; }; @@ -55,21 +55,21 @@ static void dragonFixup(struct DragonState *state, /* Account for unqual gaps */ if (f == (((uint64_t)1) << 52)) { - BIntLsh(&state->mp, 1, &state->mp); - BIntLsh(&state->r, 1, &state->r); - BIntLsh(&state->s, 1, &state->s); + MpiLsh(&state->mp, 1, &state->mp); + MpiLsh(&state->r, 1, &state->r); + MpiLsh(&state->s, 1, &state->s); } state->k = 0; /* Burger/Dybvig approach */ #ifndef BH_TWEAK_SHORT_BINT - state->k = BIntClz((f >> 32) & BINT_MASK); - state->k += (state->k == 32) ? (BIntClz(f & BINT_MASK)) : (0); + state->k = MpiClz((f >> 32) & MPI_MASK); + state->k += (state->k == 32) ? (MpiClz(f & MPI_MASK)) : (0); #else - state->k = BIntClz((f >> 48) & BINT_MASK); - state->k += (state->k == 16) ? (BIntClz((f >> 32) & BINT_MASK)) : (0); - state->k += (state->k == 32) ? (BIntClz((f >> 16) & BINT_MASK)) : (0); - state->k += (state->k == 48) ? (BIntClz(f & BINT_MASK)) : (0); + state->k = MpiClz((f >> 48) & MPI_MASK); + state->k += (state->k == 16) ? (MpiClz((f >> 32) & MPI_MASK)) : (0); + state->k += (state->k == 32) ? (MpiClz((f >> 16) & MPI_MASK)) : (0); + state->k += (state->k == 48) ? (MpiClz(f & MPI_MASK)) : (0); #endif /* 77 / 256 is an approximation for Log(2) or 0.30102999 */ @@ -82,18 +82,18 @@ static void dragonFixup(struct DragonState *state, /* Scale numbers accordinaly */ if (state->k < 0) { - BIntPow10(&state->r, -state->k, &state->r, state->tmp); - BIntPow10(&state->mm, -state->k, &state->mm, state->tmp); - BIntPow10(&state->mp, -state->k, &state->mp, state->tmp); + MpiPow10(&state->r, -state->k, &state->r, state->tmp); + MpiPow10(&state->mm, -state->k, &state->mm, state->tmp); + MpiPow10(&state->mp, -state->k, &state->mp, state->tmp); } else if (state->k > 0) - BIntPow10(&state->s, state->k, &state->s, state->tmp); + MpiPow10(&state->s, state->k, &state->s, state->tmp); /* Scale S if we underestimated */ - if (BIntCompare(&state->r, &state->s) >= 0) + if (MpiCompare(&state->r, &state->s) >= 0) { state->k += 1; - BIntMulDigit(&state->s, 10, &state->s); + MpiMulDigit(&state->s, 10, &state->s); } /* Find cutoff */ @@ -122,8 +122,8 @@ static void dragonRound(struct DragonState *state, /* Check if rounding up required */ if (high == low) { - BIntLsh(&state->r, 1, &state->tmp[0]); - i = BIntCompare(&state->tmp[0], &state->s); + MpiLsh(&state->r, 1, &state->tmp[0]); + i = MpiCompare(&state->tmp[0], &state->s); if (i < 0) { low = 1; high = 0; } else if (i > 0) { low = 0; high = 1; } else low = (((s - '0') & 0x1) == 0); @@ -175,22 +175,22 @@ static void dragon(double value, /* Prepare dragon */ f = frexp(value, &e) * ((uint64_t)1 << 53); #ifndef BH_TWEAK_SHORT_BINT - state.r.data[0] = f & BINT_MASK; - state.r.data[1] = (f >> 32) & BINT_MASK; + state.r.data[0] = f & MPI_MASK; + state.r.data[1] = (f >> 32) & MPI_MASK; state.r.size = 2; #else - state.r.data[0] = f & BINT_MASK; - state.r.data[1] = (f >> 16) & BINT_MASK; - state.r.data[2] = (f >> 32) & BINT_MASK; - state.r.data[3] = (f >> 48) & BINT_MASK; + state.r.data[0] = f & MPI_MASK; + state.r.data[1] = (f >> 16) & MPI_MASK; + state.r.data[2] = (f >> 32) & MPI_MASK; + state.r.data[3] = (f >> 48) & MPI_MASK; state.r.size = 4; #endif - BIntTrim(&state.r); + MpiTrim(&state.r); - BIntLsh(&state.r, MAX(e - 53, 0), &state.r); - BIntLsh(&BInt1, MAX(0, -(e - 53)), &state.s); - BIntLsh(&BInt1, MAX(e - 53, 0), &state.mm); - BIntLsh(&BInt1, MAX(e - 53, 0), &state.mp); + MpiLsh(&state.r, MAX(e - 53, 0), &state.r); + MpiLsh(&BInt1, MAX(0, -(e - 53)), &state.s); + MpiLsh(&BInt1, MAX(e - 53, 0), &state.mm); + MpiLsh(&BInt1, MAX(e - 53, 0), &state.mp); dragonFixup(&state, precision, mode, f, e); /* Main digit generation loop */ @@ -198,8 +198,8 @@ static void dragon(double value, while(1) { state.k -= 1; - BIntMulDigit(&state.r, 10, &state.r); - BIntDiv(&state.r, &state.s, &state.tmp[0], &state.r, &state.tmp[1]); + MpiMulDigit(&state.r, 10, &state.r); + MpiDiv(&state.r, &state.s, &state.tmp[0], &state.r, &state.tmp[1]); s = '0'; if (state.tmp[0].size) @@ -208,13 +208,13 @@ static void dragon(double value, if (mode == NORMAL) { - BIntMulDigit(&state.mm, 10, &state.mm); - BIntMulDigit(&state.mp, 10, &state.mp); - BIntLsh(&state.r, 1, &state.tmp[1]); - BIntLsh(&state.s, 1, &state.tmp[2]); - BIntAdd(&state.tmp[1], &state.mp, &state.tmp[3]); - low = BIntCompare(&state.tmp[1], &state.mm) < 0; - high = BIntCompare(&state.tmp[3], &state.tmp[2]) > 0; + MpiMulDigit(&state.mm, 10, &state.mm); + MpiMulDigit(&state.mp, 10, &state.mp); + MpiLsh(&state.r, 1, &state.tmp[1]); + MpiLsh(&state.s, 1, &state.tmp[2]); + MpiAdd(&state.tmp[1], &state.mp, &state.tmp[3]); + low = MpiCompare(&state.tmp[1], &state.mm) < 0; + high = MpiCompare(&state.tmp[3], &state.tmp[2]) > 0; if (low || high || state.k == state.cutoff || buffer->size >= BUFSIZE) break; } @@ -617,7 +617,7 @@ double BH_StringToDouble(const char *string, size_t *size) { int type, e, sign, i, count, shift; - BInt r, s, tmp[5]; + Mpi r, s, tmp[5]; char buffer[20]; double result; uint64_t f; @@ -668,43 +668,43 @@ double BH_StringToDouble(const char *string, for (i = 0; i < count; i++) { tmp[0].data[0] = buffer[i] - '0'; - BIntMulDigit(&r, 10, &r); - BIntAdd(&r, &tmp[0], &r); + MpiMulDigit(&r, 10, &r); + MpiAdd(&r, &tmp[0], &r); } if (e >= 0) - BIntPow10(&r, e, &r, &tmp[0]); + MpiPow10(&r, e, &r, &tmp[0]); else - BIntPow10(&s, -e, &s, &tmp[0]); + MpiPow10(&s, -e, &s, &tmp[0]); /* Calculate required shift */ shift = -52; - if (BIntCompare(&r, &s) >= 0) + if (MpiCompare(&r, &s) >= 0) { - BIntDiv(&r, &s, &tmp[0], &tmp[1], &tmp[2]); - shift += BIntLog2(&tmp[0]); + MpiDiv(&r, &s, &tmp[0], &tmp[1], &tmp[2]); + shift += MpiLog2(&tmp[0]); } else { - BIntDiv(&s, &r, &tmp[0], &tmp[1], &tmp[2]); - shift += -(BIntLog2(&tmp[0]) + 1); + MpiDiv(&s, &r, &tmp[0], &tmp[1], &tmp[2]); + shift += -(MpiLog2(&tmp[0]) + 1); } /* Shift */ if (shift > 0) - BIntLsh(&s, shift, &s); + MpiLsh(&s, shift, &s); else if (shift < 0) - BIntLsh(&r, -shift, &r); + MpiLsh(&r, -shift, &r); /* Calculate final exponent and 53 bit integer */ - BIntDiv(&r, &s, &tmp[0], &tmp[1], &tmp[2]); - BIntRsh(&s, 1, &s); - if (BIntCompare(&tmp[1], &s) > 0 || (BIntCompare(&tmp[1], &s) == 0 && (tmp[0].data[0] & 0x1))) + MpiDiv(&r, &s, &tmp[0], &tmp[1], &tmp[2]); + MpiRsh(&s, 1, &s); + if (MpiCompare(&tmp[1], &s) > 0 || (MpiCompare(&tmp[1], &s) == 0 && (tmp[0].data[0] & 0x1))) { - BIntAdd(&tmp[0], &BInt1, &tmp[0]); - if (BIntCompare(&tmp[0], &BInt53) >= 0) + MpiAdd(&tmp[0], &BInt1, &tmp[0]); + if (MpiCompare(&tmp[0], &BInt53) >= 0) { - BIntRsh(&tmp[0], 1, &tmp[0]); + MpiRsh(&tmp[0], 1, &tmp[0]); shift++; } } diff --git a/src/String/FromInt.inl b/src/String/Inline/Int.h index 57eaee0..57eaee0 100644 --- a/src/String/FromInt.inl +++ b/src/String/Inline/Int.h diff --git a/src/String/BInt.h b/src/String/Inline/Mpi.h index 49f304b..efaa388 100644 --- a/src/String/BInt.h +++ b/src/String/Inline/Mpi.h @@ -1,23 +1,23 @@ /* Platform dependant definition */ #ifndef BH_TWEAK_SHORT_BINT -#define BINT_SIZE 40 -#define BINT_TYPE uint32_t -#define BINT_TTYPE uint64_t -#define BINT_BITS 32 -#define BINT_MASK 0xFFFFFFFFul +#define MPI_SIZE 40 +#define MPI_TYPE uint32_t +#define MPI_TTYPE uint64_t +#define MPI_BITS 32 +#define MPI_MASK 0xFFFFFFFFul #else -#define BINT_SIZE 80 -#define BINT_TYPE uint16_t -#define BINT_TTYPE uint32_t -#define BINT_BITS 16 -#define BINT_MASK 0xFFFFu +#define MPI_SIZE 80 +#define MPI_TYPE uint16_t +#define MPI_TTYPE uint32_t +#define MPI_BITS 16 +#define MPI_MASK 0xFFFFu #endif -typedef struct BInt { +typedef struct Mpi { int size; - BINT_TYPE data[BINT_SIZE]; -} BInt; + MPI_TYPE data[MPI_SIZE]; +} Mpi; static const uint8_t clzLookup[256] = @@ -42,11 +42,11 @@ static const uint8_t clzLookup[256] = #ifndef BH_TWEAK_SHORT_BINT -static const BInt BInt1 = {1, {0x00000001ul}}; -static const BInt BInt53 = {2, {0x00000000ul, 0x00200000ul}}; +static const Mpi BInt1 = {1, {0x00000001ul}}; +static const Mpi BInt53 = {2, {0x00000000ul, 0x00200000ul}}; -static const BInt powLookup[] = +static const Mpi powLookup[] = { {1, {0x0000000Aul}}, {1, {0x00000064ul}}, @@ -68,7 +68,7 @@ static const BInt powLookup[] = }; -static int BIntClz(BINT_TYPE value) +static int MpiClz(MPI_TYPE value) { if (value & 0xFF000000ul) return clzLookup[(value >> 24) & 0xFF]; @@ -80,11 +80,11 @@ static int BIntClz(BINT_TYPE value) return 24 + clzLookup[value & 0xFF]; } #else -static const BInt BInt1 = {1, {0x0001u}}; -static const BInt BInt53 = {4, {0x0000u, 0x0000u, 0x0000u, 0x0020u}}; +static const Mpi BInt1 = {1, {0x0001u}}; +static const Mpi BInt53 = {4, {0x0000u, 0x0000u, 0x0000u, 0x0020u}}; -static const BInt powLookup[] = +static const Mpi powLookup[] = { {1, {0x000Au}}, {1, {0x0064u}}, @@ -110,7 +110,7 @@ static const BInt powLookup[] = }; -static int BIntClz(BINT_TYPE value) +static int MpiClz(MPI_TYPE value) { if (value & 0xFF00) return clzLookup[(value >> 8) & 0xFF]; @@ -120,18 +120,18 @@ static int BIntClz(BINT_TYPE value) #endif -static int BIntLog2(const BInt *in) +static int MpiLog2(const Mpi *in) { /* Preconditions */ assert(in != NULL); assert(in->size != 0); assert(in->data[in->size - 1] != 0); - return (BINT_BITS - 1) - BIntClz(in->data[in->size - 1]) + BINT_BITS * (in->size - 1); + return (MPI_BITS - 1) - MpiClz(in->data[in->size - 1]) + MPI_BITS * (in->size - 1); } -static void BIntTrim(BInt *in) +static void MpiTrim(Mpi *in) { /* Preconditions */ assert(in != NULL); @@ -141,8 +141,8 @@ static void BIntTrim(BInt *in) } -static int BIntCompare(const BInt *a, - const BInt *b) +static int MpiCompare(const Mpi *a, + const Mpi *b) { int i; @@ -167,17 +167,17 @@ static int BIntCompare(const BInt *a, } -static void BIntAdd(const BInt *a, - const BInt *b, - BInt *out) +static void MpiAdd(const Mpi *a, + const Mpi *b, + Mpi *out) { - BINT_TTYPE carry; + MPI_TTYPE carry; int i; /* Preconditions */ assert(a != NULL && b != NULL && out != NULL); - assert(a->size + 1 <= BINT_SIZE); - assert(b->size + 1 <= BINT_SIZE); + assert(a->size + 1 <= MPI_SIZE); + assert(b->size + 1 <= MPI_SIZE); /* Addition loop */ carry = 0; @@ -188,8 +188,8 @@ static void BIntAdd(const BInt *a, if (i < b->size) carry += b->data[i]; - out->data[i] = carry & BINT_MASK; - carry = (carry >> BINT_BITS); + out->data[i] = carry & MPI_MASK; + carry = (carry >> MPI_BITS); } /* Handle new digit */ @@ -200,16 +200,16 @@ static void BIntAdd(const BInt *a, } -static void BIntSub(const BInt *a, - const BInt *b, - BInt *out) +static void MpiSub(const Mpi *a, + const Mpi *b, + Mpi *out) { - BINT_TTYPE carry; + MPI_TTYPE carry; int i; /* Preconditions */ assert(a != NULL && b != NULL && out != NULL); - assert(BIntCompare(a, b) >= 0); + assert(MpiCompare(a, b) >= 0); /* Main subtraction loop */ carry = 0; @@ -220,27 +220,27 @@ static void BIntSub(const BInt *a, if (i < b->size) carry -= b->data[i]; - out->data[i] = carry & BINT_MASK; - carry = carry >> BINT_BITS; - carry |= (carry << BINT_BITS); + out->data[i] = carry & MPI_MASK; + carry = carry >> MPI_BITS; + carry |= (carry << MPI_BITS); } /* Trim leading zeros */ out->size = a->size; - BIntTrim(out); + MpiTrim(out); } -static void BIntMul(const BInt *a, - const BInt *b, - BInt *out) +static void MpiMul(const Mpi *a, + const Mpi *b, + Mpi *out) { - BINT_TTYPE carry; + MPI_TTYPE carry; int i, j; /* Preconditions */ assert(a != NULL && b != NULL && out != NULL); - assert(a->size + b->size <= BINT_SIZE); + assert(a->size + b->size <= MPI_SIZE); /* Zero out the result */ memset(out->data, 0, sizeof(out->data)); @@ -252,50 +252,50 @@ static void BIntMul(const BInt *a, for (j = 0; j < b->size; j++) { carry += out->data[i + j]; - carry += (BINT_TTYPE)a->data[i] * (BINT_TTYPE)b->data[j]; - out->data[i + j] = carry & BINT_MASK; - carry = (carry >> BINT_BITS); + carry += (MPI_TTYPE)a->data[i] * (MPI_TTYPE)b->data[j]; + out->data[i + j] = carry & MPI_MASK; + carry = (carry >> MPI_BITS); } out->data[i + j] += carry; } /* Trim leading zeros */ out->size = a->size + b->size; - BIntTrim(out); + MpiTrim(out); } -static void BIntMulDigit(const BInt *a, - BINT_TYPE b, - BInt *out) +static void MpiMulDigit(const Mpi *a, + MPI_TYPE b, + Mpi *out) { - BINT_TTYPE carry; + MPI_TTYPE carry; int i; /* Preconditions */ assert(a != NULL && out != NULL); - assert(a->size + 1 <= BINT_SIZE); + assert(a->size + 1 <= MPI_SIZE); /* Multiplication loop */ carry = 0; for (i = 0; i < a->size; i++) { - carry += (BINT_TTYPE)a->data[i] * b; - out->data[i] = carry & BINT_MASK; - carry = (carry >> BINT_BITS); + carry += (MPI_TTYPE)a->data[i] * b; + out->data[i] = carry & MPI_MASK; + carry = (carry >> MPI_BITS); } out->data[i] = carry; /* Trim leading zeros */ out->size = a->size + 1; - BIntTrim(out); + MpiTrim(out); } -static void BIntPow10(const BInt *in, - int exponent, - BInt *out, - BInt *tmp) +static void MpiPow10(const Mpi *in, + int exponent, + Mpi *out, + Mpi *tmp) { int i, current; @@ -309,26 +309,26 @@ static void BIntPow10(const BInt *in, if (!(exponent & 0x1)) continue; - BIntMul(&tmp[current], &powLookup[i], &tmp[1 - current]); + MpiMul(&tmp[current], &powLookup[i], &tmp[1 - current]); current = 1 - current; } *out = tmp[current]; } -static void BIntLsh(const BInt *in, - int amount, - BInt *out) +static void MpiLsh(const Mpi *in, + int amount, + Mpi *out) { int blocks, bits, i; - BINT_TYPE low, high; + MPI_TYPE low, high; /* Preconditions */ assert(in != NULL && out != NULL); - assert(amount >= 0 && in->size + (amount + BINT_BITS - 1) / BINT_BITS <= BINT_SIZE); + assert(amount >= 0 && in->size + (amount + MPI_BITS - 1) / MPI_BITS <= MPI_SIZE); - blocks = amount / BINT_BITS; - bits = amount % BINT_BITS; + blocks = amount / MPI_BITS; + bits = amount % MPI_BITS; if (!in->size) { out->size = 0; @@ -341,7 +341,7 @@ static void BIntLsh(const BInt *in, high = 0; for (i = in->size + blocks; i > blocks; i--) { - low = in->data[i - blocks - 1] >> (BINT_BITS - bits); + low = in->data[i - blocks - 1] >> (MPI_BITS - bits); out->data[i] = low | high; high = in->data[i - blocks - 1] << bits; } @@ -356,25 +356,25 @@ static void BIntLsh(const BInt *in, } /* Trim leading zeros and zero out lower blocks */ - BIntTrim(out); + MpiTrim(out); for (i = blocks; i; i--) out->data[i - 1] = 0; } -static void BIntRsh(const BInt *in, - int amount, - BInt *out) +static void MpiRsh(const Mpi *in, + int amount, + Mpi *out) { int blocks, bits, i; - BINT_TYPE low, high; + MPI_TYPE low, high; /* Preconditions */ assert(in != NULL && out != NULL); assert(amount >= 0); - blocks = amount / BINT_BITS; - bits = amount % BINT_BITS; + blocks = amount / MPI_BITS; + bits = amount % MPI_BITS; /* Zero size input or shift is bigger then input */ if (in->size == 0 || in->size <= blocks) @@ -390,7 +390,7 @@ static void BIntRsh(const BInt *in, high = 0; for (i = 0; i < in->size - blocks - 1; i++) { - high = in->data[i + blocks + 1] << (BINT_BITS - bits); + high = in->data[i + blocks + 1] << (MPI_BITS - bits); out->data[i] = low | high; low = in->data[i + blocks + 1] >> bits; } @@ -404,38 +404,38 @@ static void BIntRsh(const BInt *in, /* Trim leading zeros */ out->size = in->size - blocks; - BIntTrim(out); + MpiTrim(out); } -static BINT_TTYPE BIntGuess(const BInt *a, - const BInt *b) +static MPI_TTYPE MpiGuess(const Mpi *a, + const Mpi *b) { - BINT_TTYPE tmp; + MPI_TTYPE tmp; /* Preconditions */ assert(a != NULL && b != NULL); assert(a->size > 0 && b->size > 0); assert((a->size == b->size) || ((a->size != b->size) && a->size > 1)); - if (BIntCompare(a, b) < 0) + if (MpiCompare(a, b) < 0) return 0; tmp = a->data[a->size - 1]; if (a->size != b->size) - tmp = (tmp << BINT_BITS) | a->data[a->size - 2]; + tmp = (tmp << MPI_BITS) | a->data[a->size - 2]; return tmp / b->data[b->size - 1]; } -static void BIntDiv(const BInt *a, - const BInt *b, - BInt *q, - BInt *r, - BInt *tmp) +static void MpiDiv(const Mpi *a, + const Mpi *b, + Mpi *q, + Mpi *r, + Mpi *tmp) { - BINT_TTYPE digit; + MPI_TTYPE digit; int shift; /* Preconditions */ @@ -443,7 +443,7 @@ static void BIntDiv(const BInt *a, assert(b->size != 0); /* Handle case where a is less then b */ - if (BIntCompare(a, b) < 0) + if (MpiCompare(a, b) < 0) { *r = *a; q->size = 0; @@ -451,16 +451,16 @@ static void BIntDiv(const BInt *a, } /* Normilize input to reduce tries */ - shift = BIntClz(b->data[b->size - 1]); - BIntLsh(a, shift, &tmp[0]); - BIntLsh(b, shift, &tmp[1]); + shift = MpiClz(b->data[b->size - 1]); + MpiLsh(a, shift, &tmp[0]); + MpiLsh(b, shift, &tmp[1]); /* Prepare first step of the division */ q->size = 0; r->size = 0; - while (BIntCompare(r, &tmp[1]) < 0) + while (MpiCompare(r, &tmp[1]) < 0) { - BIntLsh(r, BINT_BITS, r); + MpiLsh(r, MPI_BITS, r); r->data[0] = tmp[0].data[--tmp[0].size]; r->size += !r->size; } @@ -468,19 +468,19 @@ static void BIntDiv(const BInt *a, while (1) { /* Make a guess and check */ - digit = BIntGuess(r, &tmp[1]); - while (digit > BINT_MASK) + digit = MpiGuess(r, &tmp[1]); + while (digit > MPI_MASK) digit--; - BIntMulDigit(&tmp[1], digit, &tmp[2]); - while (BIntCompare(r, &tmp[2]) < 0) + MpiMulDigit(&tmp[1], digit, &tmp[2]); + while (MpiCompare(r, &tmp[2]) < 0) { --digit; - BIntSub(&tmp[2], &tmp[1], &tmp[2]); + MpiSub(&tmp[2], &tmp[1], &tmp[2]); } /* Store digit in quotient */ - BIntSub(r, &tmp[2], r); - BIntLsh(q, BINT_BITS, q); + MpiSub(r, &tmp[2], r); + MpiLsh(q, MPI_BITS, q); q->data[0] = digit; q->size += !q->size; @@ -488,12 +488,12 @@ static void BIntDiv(const BInt *a, if (!tmp[0].size) break; - BIntLsh(r, BINT_BITS, r); + MpiLsh(r, MPI_BITS, r); r->data[0] = tmp[0].data[--tmp[0].size]; if (!r->size) r->size = 1; } /* Normilize remainder */ - BIntRsh(r, shift, r); + MpiRsh(r, shift, r); } diff --git a/src/String/ToIntS.inl b/src/String/Inline/Signed.h index f6ba991..f6ba991 100644 --- a/src/String/ToIntS.inl +++ b/src/String/Inline/Signed.h diff --git a/src/String/Inline/Unicode.h b/src/String/Inline/Unicode.h new file mode 100644 index 0000000..7b69392 --- /dev/null +++ b/src/String/Inline/Unicode.h @@ -0,0 +1,438 @@ +#define UTF8_OK 0 +#define UTF8_ERROR 96 +#define UTF16_NORMAL 0 +#define UTF16_LOWSUR 1 +#define UTF16_HIGHSUR 2 + + +struct CaseMap +{ + uint16_t from; + uint16_t to; +}; + + +static struct CaseMap lookupLower[] = +{ + {0x0041, 0x0061}, {0x0042, 0x0062}, {0x0043, 0x0063}, {0x0044, 0x0064}, + {0x0045, 0x0065}, {0x0046, 0x0066}, {0x0047, 0x0067}, {0x0048, 0x0068}, + {0x0049, 0x0069}, {0x004A, 0x006A}, {0x004B, 0x006B}, {0x004C, 0x006C}, + {0x004D, 0x006D}, {0x004E, 0x006E}, {0x004F, 0x006F}, {0x0050, 0x0070}, + {0x0051, 0x0071}, {0x0052, 0x0072}, {0x0053, 0x0073}, {0x0054, 0x0074}, + {0x0055, 0x0075}, {0x0056, 0x0076}, {0x0057, 0x0077}, {0x0058, 0x0078}, + {0x0059, 0x0079}, {0x005A, 0x007A}, {0x00C0, 0x00E0}, {0x00C1, 0x00E1}, + {0x00C2, 0x00E2}, {0x00C3, 0x00E3}, {0x00C4, 0x00E4}, {0x00C5, 0x00E5}, + {0x00C6, 0x00E6}, {0x00C7, 0x00E7}, {0x00C8, 0x00E8}, {0x00C9, 0x00E9}, + {0x00CA, 0x00EA}, {0x00CB, 0x00EB}, {0x00CC, 0x00EC}, {0x00CD, 0x00ED}, + {0x00CE, 0x00EE}, {0x00CF, 0x00EF}, {0x00D0, 0x00F0}, {0x00D1, 0x00F1}, + {0x00D2, 0x00F2}, {0x00D3, 0x00F3}, {0x00D4, 0x00F4}, {0x00D5, 0x00F5}, + {0x00D6, 0x00F6}, {0x00D8, 0x00F8}, {0x00D9, 0x00F9}, {0x00DA, 0x00FA}, + {0x00DB, 0x00FB}, {0x00DC, 0x00FC}, {0x00DD, 0x00FD}, {0x00DE, 0x00FE}, + {0x0100, 0x0101}, {0x0102, 0x0103}, {0x0104, 0x0105}, {0x0106, 0x0107}, + {0x0108, 0x0109}, {0x010A, 0x010B}, {0x010C, 0x010D}, {0x010E, 0x010F}, + {0x0110, 0x0111}, {0x0112, 0x0113}, {0x0114, 0x0115}, {0x0116, 0x0117}, + {0x0118, 0x0119}, {0x011A, 0x011B}, {0x011C, 0x011D}, {0x011E, 0x011F}, + {0x0120, 0x0121}, {0x0122, 0x0123}, {0x0124, 0x0125}, {0x0126, 0x0127}, + {0x0128, 0x0129}, {0x012A, 0x012B}, {0x012C, 0x012D}, {0x012E, 0x012F}, + {0x0130, 0x0069}, {0x0132, 0x0133}, {0x0134, 0x0135}, {0x0136, 0x0137}, + {0x0139, 0x013A}, {0x013B, 0x013C}, {0x013D, 0x013E}, {0x013F, 0x0140}, + {0x0141, 0x0142}, {0x0143, 0x0144}, {0x0145, 0x0146}, {0x0147, 0x0148}, + {0x014A, 0x014B}, {0x014C, 0x014D}, {0x014E, 0x014F}, {0x0150, 0x0151}, + {0x0152, 0x0153}, {0x0154, 0x0155}, {0x0156, 0x0157}, {0x0158, 0x0159}, + {0x015A, 0x015B}, {0x015C, 0x015D}, {0x015E, 0x015F}, {0x0160, 0x0161}, + {0x0162, 0x0163}, {0x0164, 0x0165}, {0x0166, 0x0167}, {0x0168, 0x0169}, + {0x016A, 0x016B}, {0x016C, 0x016D}, {0x016E, 0x016F}, {0x0170, 0x0171}, + {0x0172, 0x0173}, {0x0174, 0x0175}, {0x0176, 0x0177}, {0x0178, 0x00FF}, + {0x0179, 0x017A}, {0x017B, 0x017C}, {0x017D, 0x017E}, {0x0181, 0x0253}, + {0x0182, 0x0183}, {0x0184, 0x0185}, {0x0186, 0x0254}, {0x0187, 0x0188}, + {0x018A, 0x0257}, {0x018B, 0x018C}, {0x018E, 0x0258}, {0x018F, 0x0259}, + {0x0190, 0x025B}, {0x0191, 0x0192}, {0x0193, 0x0260}, {0x0194, 0x0263}, + {0x0196, 0x0269}, {0x0197, 0x0268}, {0x0198, 0x0199}, {0x019C, 0x026f}, + {0x019D, 0x0272}, {0x019F, 0x0275}, {0x01A0, 0x01A1}, {0x01A2, 0x01A3}, + {0x01A4, 0x01A5}, {0x01A7, 0x01A8}, {0x01A9, 0x0283}, {0x01AC, 0x01AD}, + {0x01AE, 0x0288}, {0x01AF, 0x01B0}, {0x01B1, 0x028A}, {0x01B2, 0x028B}, + {0x01B3, 0x01B4}, {0x01B5, 0x01B6}, {0x01B7, 0x0292}, {0x01B8, 0x01B9}, + {0x01BC, 0x01BD}, {0x01C4, 0x01C6}, {0x01C5, 0x01C6}, {0x01C7, 0x01C9}, + {0x01C8, 0x01C9}, {0x01CA, 0x01CC}, {0x01CB, 0x01CC}, {0x01CD, 0x01CE}, + {0x01CF, 0x01D0}, {0x01D1, 0x01D2}, {0x01D3, 0x01D4}, {0x01D5, 0x01D6}, + {0x01D7, 0x01D8}, {0x01D9, 0x01DA}, {0x01DB, 0x01DC}, {0x01DE, 0x01DF}, + {0x01E0, 0x01E1}, {0x01E2, 0x01E3}, {0x01E4, 0x01E5}, {0x01E6, 0x01E7}, + {0x01E8, 0x01E9}, {0x01EA, 0x01EB}, {0x01EC, 0x01ED}, {0x01EE, 0x01EF}, + {0x01F1, 0x01F3}, {0x01F4, 0x01F5}, {0x01FA, 0x01FB}, {0x01FC, 0x01FD}, + {0x01FE, 0x01FF}, {0x0200, 0x0201}, {0x0202, 0x0203}, {0x0204, 0x0205}, + {0x0206, 0x0207}, {0x0208, 0x0209}, {0x020A, 0x020B}, {0x020C, 0x020D}, + {0x020E, 0x020F}, {0x0210, 0x0211}, {0x0212, 0x0213}, {0x0214, 0x0215}, + {0x0216, 0x0217}, {0x0386, 0x03AC}, {0x0388, 0x03AD}, {0x0389, 0x03AE}, + {0x038A, 0x03AF}, {0x038C, 0x03CC}, {0x038E, 0x03CD}, {0x038F, 0x03CE}, + {0x0391, 0x03B1}, {0x0392, 0x03B2}, {0x0393, 0x03B3}, {0x0394, 0x03B4}, + {0x0395, 0x03B5}, {0x0396, 0x03B6}, {0x0397, 0x03B7}, {0x0398, 0x03B8}, + {0x0399, 0x03B9}, {0x039A, 0x03BA}, {0x039B, 0x03BB}, {0x039C, 0x03BC}, + {0x039D, 0x03BD}, {0x039E, 0x03BE}, {0x039F, 0x03BF}, {0x03A0, 0x03C0}, + {0x03A1, 0x03C1}, {0x03A3, 0x03C3}, {0x03A4, 0x03C4}, {0x03A5, 0x03C5}, + {0x03A6, 0x03C6}, {0x03A7, 0x03C7}, {0x03A8, 0x03C8}, {0x03A9, 0x03C9}, + {0x03AA, 0x03CA}, {0x03AB, 0x03CB}, {0x03E2, 0x03E3}, {0x03E4, 0x03E5}, + {0x03E6, 0x03E7}, {0x03E8, 0x03E9}, {0x03EA, 0x03EB}, {0x03EC, 0x03ED}, + {0x03EE, 0x03EF}, {0x0401, 0x0451}, {0x0402, 0x0452}, {0x0403, 0x0453}, + {0x0404, 0x0454}, {0x0405, 0x0455}, {0x0406, 0x0456}, {0x0407, 0x0457}, + {0x0408, 0x0458}, {0x0409, 0x0459}, {0x040A, 0x045A}, {0x040B, 0x045B}, + {0x040C, 0x045C}, {0x040E, 0x045E}, {0x040F, 0x045F}, {0x0410, 0x0430}, + {0x0411, 0x0431}, {0x0412, 0x0432}, {0x0413, 0x0433}, {0x0414, 0x0434}, + {0x0415, 0x0435}, {0x0416, 0x0436}, {0x0417, 0x0437}, {0x0418, 0x0438}, + {0x0419, 0x0439}, {0x041A, 0x043A}, {0x041B, 0x043B}, {0x041C, 0x043C}, + {0x041D, 0x043D}, {0x041E, 0x043E}, {0x041F, 0x043F}, {0x0420, 0x0440}, + {0x0421, 0x0441}, {0x0422, 0x0442}, {0x0423, 0x0443}, {0x0424, 0x0444}, + {0x0425, 0x0445}, {0x0426, 0x0446}, {0x0427, 0x0447}, {0x0428, 0x0448}, + {0x0429, 0x0449}, {0x042A, 0x044A}, {0x042B, 0x044B}, {0x042C, 0x044C}, + {0x042D, 0x044D}, {0x042E, 0x044E}, {0x042F, 0x044F}, {0x0460, 0x0461}, + {0x0462, 0x0463}, {0x0464, 0x0465}, {0x0466, 0x0467}, {0x0468, 0x0469}, + {0x046A, 0x046B}, {0x046C, 0x046D}, {0x046E, 0x046F}, {0x0470, 0x0471}, + {0x0472, 0x0473}, {0x0474, 0x0475}, {0x0476, 0x0477}, {0x0478, 0x0479}, + {0x047A, 0x047B}, {0x047C, 0x047D}, {0x047E, 0x047F}, {0x0480, 0x0481}, + {0x0490, 0x0491}, {0x0492, 0x0493}, {0x0494, 0x0495}, {0x0496, 0x0497}, + {0x0498, 0x0499}, {0x049A, 0x049B}, {0x049C, 0x049D}, {0x049E, 0x049F}, + {0x04A0, 0x04A1}, {0x04A2, 0x04A3}, {0x04A4, 0x04A5}, {0x04A6, 0x04A7}, + {0x04A8, 0x04A9}, {0x04AA, 0x04AB}, {0x04AC, 0x04AD}, {0x04AE, 0x04AF}, + {0x04B0, 0x04B1}, {0x04B2, 0x04B3}, {0x04B4, 0x04B5}, {0x04B6, 0x04B7}, + {0x04B8, 0x04B9}, {0x04BA, 0x04BB}, {0x04BC, 0x04BD}, {0x04BE, 0x04BF}, + {0x04C1, 0x04C2}, {0x04C3, 0x04C4}, {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, + {0x04D0, 0x04D1}, {0x04D2, 0x04D3}, {0x04D4, 0x04D5}, {0x04D6, 0x04D7}, + {0x04D8, 0x04D9}, {0x04DA, 0x04DB}, {0x04DC, 0x04DD}, {0x04DE, 0x04DF}, + {0x04E0, 0x04E1}, {0x04E2, 0x04E3}, {0x04E4, 0x04E5}, {0x04E6, 0x04E7}, + {0x04E8, 0x04E9}, {0x04EA, 0x04EB}, {0x04EE, 0x04EF}, {0x04F0, 0x04F1}, + {0x04F2, 0x04F3}, {0x04F4, 0x04F5}, {0x04F8, 0x04F9}, {0x0531, 0x0561}, + {0x0532, 0x0562}, {0x0533, 0x0563}, {0x0534, 0x0564}, {0x0535, 0x0565}, + {0x0536, 0x0566}, {0x0537, 0x0567}, {0x0538, 0x0568}, {0x0539, 0x0569}, + {0x053A, 0x056A}, {0x053B, 0x056B}, {0x053C, 0x056C}, {0x053D, 0x056D}, + {0x053E, 0x056E}, {0x053F, 0x056F}, {0x0540, 0x0570}, {0x0541, 0x0571}, + {0x0542, 0x0572}, {0x0543, 0x0573}, {0x0544, 0x0574}, {0x0545, 0x0575}, + {0x0546, 0x0576}, {0x0547, 0x0577}, {0x0548, 0x0578}, {0x0549, 0x0579}, + {0x054A, 0x057A}, {0x054B, 0x057B}, {0x054C, 0x057C}, {0x054D, 0x057D}, + {0x054E, 0x057E}, {0x054F, 0x057F}, {0x0550, 0x0580}, {0x0551, 0x0581}, + {0x0552, 0x0582}, {0x0553, 0x0583}, {0x0554, 0x0584}, {0x0555, 0x0585}, + {0x0556, 0x0586}, {0x10A0, 0x10D0}, {0x10A1, 0x10D1}, {0x10A2, 0x10D2}, + {0x10A3, 0x10D3}, {0x10A4, 0x10D4}, {0x10A5, 0x10D5}, {0x10A6, 0x10D6}, + {0x10A7, 0x10D7}, {0x10A8, 0x10D8}, {0x10A9, 0x10D9}, {0x10AA, 0x10DA}, + {0x10AB, 0x10DB}, {0x10AC, 0x10DC}, {0x10AD, 0x10DD}, {0x10AE, 0x10DE}, + {0x10AF, 0x10DF}, {0x10B0, 0x10E0}, {0x10B1, 0x10E1}, {0x10B2, 0x10E2}, + {0x10B3, 0x10E3}, {0x10B4, 0x10E4}, {0x10B5, 0x10E5}, {0x10B6, 0x10E6}, + {0x10B7, 0x10E7}, {0x10B8, 0x10E8}, {0x10B9, 0x10E9}, {0x10BA, 0x10EA}, + {0x10BB, 0x10EB}, {0x10BC, 0x10EC}, {0x10BD, 0x10ED}, {0x10BE, 0x10EE}, + {0x10BF, 0x10EF}, {0x10C0, 0x10F0}, {0x10C1, 0x10F1}, {0x10C2, 0x10F2}, + {0x10C3, 0x10F3}, {0x10C4, 0x10F4}, {0x10C5, 0x10F5}, {0x1E00, 0x1E01}, + {0x1E02, 0x1E03}, {0x1E04, 0x1E05}, {0x1E06, 0x1E07}, {0x1E08, 0x1E09}, + {0x1E0A, 0x1E0B}, {0x1E0C, 0x1E0D}, {0x1E0E, 0x1E0F}, {0x1E10, 0x1E11}, + {0x1E12, 0x1E13}, {0x1E14, 0x1E15}, {0x1E16, 0x1E17}, {0x1E18, 0x1E19}, + {0x1E1A, 0x1E1B}, {0x1E1C, 0x1E1D}, {0x1E1E, 0x1E1F}, {0x1E20, 0x1E21}, + {0x1E22, 0x1E23}, {0x1E24, 0x1E25}, {0x1E26, 0x1E27}, {0x1E28, 0x1E29}, + {0x1E2A, 0x1E2B}, {0x1E2C, 0x1E2D}, {0x1E2E, 0x1E2F}, {0x1E30, 0x1E31}, + {0x1E32, 0x1E33}, {0x1E34, 0x1E35}, {0x1E36, 0x1E37}, {0x1E38, 0x1E39}, + {0x1E3A, 0x1E3B}, {0x1E3C, 0x1E3D}, {0x1E3E, 0x1E3F}, {0x1E40, 0x1E41}, + {0x1E42, 0x1E43}, {0x1E44, 0x1E45}, {0x1E46, 0x1E47}, {0x1E48, 0x1E49}, + {0x1E4A, 0x1E4B}, {0x1E4C, 0x1E4D}, {0x1E4E, 0x1E4F}, {0x1E50, 0x1E51}, + {0x1E52, 0x1E53}, {0x1E54, 0x1E55}, {0x1E56, 0x1E57}, {0x1E58, 0x1E59}, + {0x1E5A, 0x1E5B}, {0x1E5C, 0x1E5D}, {0x1E5E, 0x1E5F}, {0x1E60, 0x1E61}, + {0x1E62, 0x1E63}, {0x1E64, 0x1E65}, {0x1E66, 0x1E67}, {0x1E68, 0x1E69}, + {0x1E6A, 0x1E6B}, {0x1E6C, 0x1E6D}, {0x1E6E, 0x1E6F}, {0x1E70, 0x1E71}, + {0x1E72, 0x1E73}, {0x1E74, 0x1E75}, {0x1E76, 0x1E77}, {0x1E78, 0x1E79}, + {0x1E7A, 0x1E7B}, {0x1E7C, 0x1E7D}, {0x1E7E, 0x1E7F}, {0x1E80, 0x1E81}, + {0x1E82, 0x1E83}, {0x1E84, 0x1E85}, {0x1E86, 0x1E87}, {0x1E88, 0x1E89}, + {0x1E8A, 0x1E8B}, {0x1E8C, 0x1E8D}, {0x1E8E, 0x1E8F}, {0x1E90, 0x1E91}, + {0x1E92, 0x1E93}, {0x1E94, 0x1E95}, {0x1EA0, 0x1EA1}, {0x1EA2, 0x1EA3}, + {0x1EA4, 0x1EA5}, {0x1EA6, 0x1EA7}, {0x1EA8, 0x1EA9}, {0x1EAA, 0x1EAB}, + {0x1EAC, 0x1EAD}, {0x1EAE, 0x1EAF}, {0x1EB0, 0x1EB1}, {0x1EB2, 0x1EB3}, + {0x1EB4, 0x1EB5}, {0x1EB6, 0x1EB7}, {0x1EB8, 0x1EB9}, {0x1EBA, 0x1EBB}, + {0x1EBC, 0x1EBD}, {0x1EBE, 0x1EBF}, {0x1EC0, 0x1EC1}, {0x1EC2, 0x1EC3}, + {0x1EC4, 0x1EC5}, {0x1EC6, 0x1EC7}, {0x1EC8, 0x1EC9}, {0x1ECA, 0x1ECB}, + {0x1ECC, 0x1ECD}, {0x1ECE, 0x1ECF}, {0x1ED0, 0x1ED1}, {0x1ED2, 0x1ED3}, + {0x1ED4, 0x1ED5}, {0x1ED6, 0x1ED7}, {0x1ED8, 0x1ED9}, {0x1EDA, 0x1EDB}, + {0x1EDC, 0x1EDD}, {0x1EDE, 0x1EDF}, {0x1EE0, 0x1EE1}, {0x1EE2, 0x1EE3}, + {0x1EE4, 0x1EE5}, {0x1EE6, 0x1EE7}, {0x1EE8, 0x1EE9}, {0x1EEA, 0x1EEB}, + {0x1EEC, 0x1EED}, {0x1EEE, 0x1EEF}, {0x1EF0, 0x1EF1}, {0x1EF2, 0x1EF3}, + {0x1EF4, 0x1EF5}, {0x1EF6, 0x1EF7}, {0x1EF8, 0x1EF9}, {0x1F08, 0x1F00}, + {0x1F09, 0x1F01}, {0x1F0A, 0x1F02}, {0x1F0B, 0x1F03}, {0x1F0C, 0x1F04}, + {0x1F0D, 0x1F05}, {0x1F0E, 0x1F06}, {0x1F0F, 0x1F07}, {0x1F18, 0x1F10}, + {0x1F19, 0x1F11}, {0x1F1A, 0x1F12}, {0x1F1B, 0x1F13}, {0x1F1C, 0x1F14}, + {0x1F1D, 0x1F15}, {0x1F28, 0x1F20}, {0x1F29, 0x1F21}, {0x1F2A, 0x1F22}, + {0x1F2B, 0x1F23}, {0x1F2C, 0x1F24}, {0x1F2D, 0x1F25}, {0x1F2E, 0x1F26}, + {0x1F2F, 0x1F27}, {0x1F38, 0x1F30}, {0x1F39, 0x1F31}, {0x1F3A, 0x1F32}, + {0x1F3B, 0x1F33}, {0x1F3C, 0x1F34}, {0x1F3D, 0x1F35}, {0x1F3E, 0x1F36}, + {0x1F3F, 0x1F37}, {0x1F48, 0x1F40}, {0x1F49, 0x1F41}, {0x1F4A, 0x1F42}, + {0x1F4B, 0x1F43}, {0x1F4C, 0x1F44}, {0x1F4D, 0x1F45}, {0x1F59, 0x1F51}, + {0x1F5B, 0x1F53}, {0x1F5D, 0x1F55}, {0x1F5F, 0x1F57}, {0x1F68, 0x1F60}, + {0x1F69, 0x1F61}, {0x1F6A, 0x1F62}, {0x1F6B, 0x1F63}, {0x1F6C, 0x1F64}, + {0x1F6D, 0x1F65}, {0x1F6E, 0x1F66}, {0x1F6F, 0x1F67}, {0x1F88, 0x1F80}, + {0x1F89, 0x1F81}, {0x1F8A, 0x1F82}, {0x1F8B, 0x1F83}, {0x1F8C, 0x1F84}, + {0x1F8D, 0x1F85}, {0x1F8E, 0x1F86}, {0x1F8F, 0x1F87}, {0x1F98, 0x1F90}, + {0x1F99, 0x1F91}, {0x1F9A, 0x1F92}, {0x1F9B, 0x1F93}, {0x1F9C, 0x1F94}, + {0x1F9D, 0x1F95}, {0x1F9E, 0x1F96}, {0x1F9F, 0x1F97}, {0x1FA8, 0x1FA0}, + {0x1FA9, 0x1FA1}, {0x1FAA, 0x1FA2}, {0x1FAB, 0x1FA3}, {0x1FAC, 0x1FA4}, + {0x1FAD, 0x1FA5}, {0x1FAE, 0x1FA6}, {0x1FAF, 0x1FA7}, {0x1FB8, 0x1FB0}, + {0x1FB9, 0x1FB1}, {0x1FD8, 0x1FD0}, {0x1FD9, 0x1FD1}, {0x1FE8, 0x1FE0}, + {0x1FE9, 0x1FE1}, {0x24B6, 0x24D0}, {0x24B7, 0x24D1}, {0x24B8, 0x24D2}, + {0x24B9, 0x24D3}, {0x24BA, 0x24D4}, {0x24BB, 0x24D5}, {0x24BC, 0x24D6}, + {0x24BD, 0x24D7}, {0x24BE, 0x24D8}, {0x24BF, 0x24D9}, {0x24C0, 0x24DA}, + {0x24C1, 0x24DB}, {0x24C2, 0x24DC}, {0x24C3, 0x24DD}, {0x24C4, 0x24DE}, + {0x24C5, 0x24DF}, {0x24C6, 0x24E0}, {0x24C7, 0x24E1}, {0x24C8, 0x24E2}, + {0x24C9, 0x24E3}, {0x24CA, 0x24E4}, {0x24CB, 0x24E5}, {0x24CC, 0x24E6}, + {0x24CD, 0x24E7}, {0x24CE, 0x24E8}, {0x24CF, 0x24E9}, {0xFF21, 0xFF41}, + {0xFF22, 0xFF42}, {0xFF23, 0xFF43}, {0xFF24, 0xFF44}, {0xFF25, 0xFF45}, + {0xFF26, 0xFF46}, {0xFF27, 0xFF47}, {0xFF28, 0xFF48}, {0xFF29, 0xFF49}, + {0xFF2A, 0xFF4A}, {0xFF2B, 0xFF4B}, {0xFF2C, 0xFF4C}, {0xFF2D, 0xFF4D}, + {0xFF2E, 0xFF4E}, {0xFF2F, 0xFF4F}, {0xFF30, 0xFF50}, {0xFF31, 0xFF51}, + {0xFF32, 0xFF52}, {0xFF33, 0xFF53}, {0xFF34, 0xFF54}, {0xFF35, 0xFF55}, + {0xFF36, 0xFF56}, {0xFF37, 0xFF57}, {0xFF38, 0xFF58}, {0xFF39, 0xFF59}, + {0xFF3A, 0xFF5A} +}; + + +static size_t lookupLowerIndex[] = +{ + 0, 56, 181, 193, 233, 343, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +381, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 419, 539, + -1, -1, -1, -1, 617, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 643 +}; + + +static struct CaseMap lookupUpper[] = +{ + {0x0061, 0x0041}, {0x0062, 0x0042}, {0x0063, 0x0043}, {0x0064, 0x0044}, + {0x0065, 0x0045}, {0x0066, 0x0046}, {0x0067, 0x0047}, {0x0068, 0x0048}, + {0x0069, 0x0049}, {0x006A, 0x004A}, {0x006B, 0x004B}, {0x006C, 0x004C}, + {0x006D, 0x004D}, {0x006E, 0x004E}, {0x006F, 0x004F}, {0x0070, 0x0050}, + {0x0071, 0x0051}, {0x0072, 0x0052}, {0x0073, 0x0053}, {0x0074, 0x0054}, + {0x0075, 0x0055}, {0x0076, 0x0056}, {0x0077, 0x0057}, {0x0078, 0x0058}, + {0x0079, 0x0059}, {0x007A, 0x005A}, {0x00E0, 0x00C0}, {0x00E1, 0x00C1}, + {0x00E2, 0x00C2}, {0x00E3, 0x00C3}, {0x00E4, 0x00C4}, {0x00E5, 0x00C5}, + {0x00E6, 0x00C6}, {0x00E7, 0x00C7}, {0x00E8, 0x00C8}, {0x00E9, 0x00C9}, + {0x00EA, 0x00CA}, {0x00EB, 0x00CB}, {0x00EC, 0x00CC}, {0x00ED, 0x00CD}, + {0x00EE, 0x00CE}, {0x00EF, 0x00CF}, {0x00F0, 0x00D0}, {0x00F1, 0x00D1}, + {0x00F2, 0x00D2}, {0x00F3, 0x00D3}, {0x00F4, 0x00D4}, {0x00F5, 0x00D5}, + {0x00F6, 0x00D6}, {0x00F8, 0x00D8}, {0x00F9, 0x00D9}, {0x00FA, 0x00DA}, + {0x00FB, 0x00DB}, {0x00FC, 0x00DC}, {0x00FD, 0x00DD}, {0x00FE, 0x00DE}, + {0x00FF, 0x0178}, {0x0101, 0x0100}, {0x0103, 0x0102}, {0x0105, 0x0104}, + {0x0107, 0x0106}, {0x0109, 0x0108}, {0x010B, 0x010A}, {0x010D, 0x010C}, + {0x010F, 0x010E}, {0x0111, 0x0110}, {0x0113, 0x0112}, {0x0115, 0x0114}, + {0x0117, 0x0116}, {0x0119, 0x0118}, {0x011B, 0x011A}, {0x011D, 0x011C}, + {0x011F, 0x011E}, {0x0121, 0x0120}, {0x0123, 0x0122}, {0x0125, 0x0124}, + {0x0127, 0x0126}, {0x0129, 0x0128}, {0x012B, 0x012A}, {0x012D, 0x012C}, + {0x012F, 0x012E}, {0x0131, 0x0049}, {0x0133, 0x0132}, {0x0135, 0x0134}, + {0x0137, 0x0136}, {0x013A, 0x0139}, {0x013C, 0x013B}, {0x013E, 0x013D}, + {0x0140, 0x013F}, {0x0142, 0x0141}, {0x0144, 0x0143}, {0x0146, 0x0145}, + {0x0148, 0x0147}, {0x014B, 0x014A}, {0x014D, 0x014C}, {0x014F, 0x014E}, + {0x0151, 0x0150}, {0x0153, 0x0152}, {0x0155, 0x0154}, {0x0157, 0x0156}, + {0x0159, 0x0158}, {0x015B, 0x015A}, {0x015D, 0x015C}, {0x015F, 0x015E}, + {0x0161, 0x0160}, {0x0163, 0x0162}, {0x0165, 0x0164}, {0x0167, 0x0166}, + {0x0169, 0x0168}, {0x016B, 0x016A}, {0x016D, 0x016C}, {0x016F, 0x016E}, + {0x0171, 0x0170}, {0x0173, 0x0172}, {0x0175, 0x0174}, {0x0177, 0x0176}, + {0x017A, 0x0179}, {0x017C, 0x017B}, {0x017E, 0x017D}, {0x0183, 0x0182}, + {0x0185, 0x0184}, {0x0188, 0x0187}, {0x018C, 0x018B}, {0x0192, 0x0191}, + {0x0199, 0x0198}, {0x01A1, 0x01A0}, {0x01A3, 0x01A2}, {0x01A5, 0x01A4}, + {0x01A8, 0x01A7}, {0x01AD, 0x01AC}, {0x01B0, 0x01AF}, {0x01B4, 0x01B3}, + {0x01B6, 0x01B5}, {0x01B9, 0x01B8}, {0x01BD, 0x01BC}, {0x01C6, 0x01C4}, + {0x01C9, 0x01C7}, {0x01CC, 0x01CA}, {0x01CE, 0x01CD}, {0x01D0, 0x01CF}, + {0x01D2, 0x01D1}, {0x01D4, 0x01D3}, {0x01D6, 0x01D5}, {0x01D8, 0x01D7}, + {0x01DA, 0x01D9}, {0x01DC, 0x01DB}, {0x01DF, 0x01DE}, {0x01E1, 0x01E0}, + {0x01E3, 0x01E2}, {0x01E5, 0x01E4}, {0x01E7, 0x01E6}, {0x01E9, 0x01E8}, + {0x01EB, 0x01EA}, {0x01ED, 0x01EC}, {0x01EF, 0x01EE}, {0x01F3, 0x01F1}, + {0x01F5, 0x01F4}, {0x01FB, 0x01FA}, {0x01FD, 0x01FC}, {0x01FF, 0x01FE}, + {0x0201, 0x0200}, {0x0203, 0x0202}, {0x0205, 0x0204}, {0x0207, 0x0206}, + {0x0209, 0x0208}, {0x020B, 0x020A}, {0x020D, 0x020C}, {0x020F, 0x020E}, + {0x0211, 0x0210}, {0x0213, 0x0212}, {0x0215, 0x0214}, {0x0217, 0x0216}, + {0x0253, 0x0181}, {0x0254, 0x0186}, {0x0257, 0x018A}, {0x0258, 0x018E}, + {0x0259, 0x018F}, {0x025B, 0x0190}, {0x0260, 0x0193}, {0x0263, 0x0194}, + {0x0268, 0x0197}, {0x0269, 0x0196}, {0x026F, 0x019C}, {0x0272, 0x019D}, + {0x0275, 0x019F}, {0x0283, 0x01A9}, {0x0288, 0x01AE}, {0x028A, 0x01B1}, + {0x028B, 0x01B2}, {0x0292, 0x01B7}, {0x03AC, 0x0386}, {0x03AD, 0x0388}, + {0x03AE, 0x0389}, {0x03AF, 0x038A}, {0x03B1, 0x0391}, {0x03B2, 0x0392}, + {0x03B3, 0x0393}, {0x03B4, 0x0394}, {0x03B5, 0x0395}, {0x03B6, 0x0396}, + {0x03B7, 0x0397}, {0x03B8, 0x0398}, {0x03B9, 0x0399}, {0x03BA, 0x039A}, + {0x03BB, 0x039B}, {0x03BC, 0x039C}, {0x03BD, 0x039D}, {0x03BE, 0x039E}, + {0x03BF, 0x039F}, {0x03C0, 0x03A0}, {0x03C1, 0x03A1}, {0x03C3, 0x03A3}, + {0x03C4, 0x03A4}, {0x03C5, 0x03A5}, {0x03C6, 0x03A6}, {0x03C7, 0x03A7}, + {0x03C8, 0x03A8}, {0x03C9, 0x03A9}, {0x03CA, 0x03AA}, {0x03CB, 0x03AB}, + {0x03CC, 0x038C}, {0x03CD, 0x038E}, {0x03CE, 0x038F}, {0x03E3, 0x03E2}, + {0x03E5, 0x03E4}, {0x03E7, 0x03E6}, {0x03E9, 0x03E8}, {0x03EB, 0x03EA}, + {0x03ED, 0x03EC}, {0x03EF, 0x03EE}, {0x0430, 0x0410}, {0x0431, 0x0411}, + {0x0432, 0x0412}, {0x0433, 0x0413}, {0x0434, 0x0414}, {0x0435, 0x0415}, + {0x0436, 0x0416}, {0x0437, 0x0417}, {0x0438, 0x0418}, {0x0439, 0x0419}, + {0x043A, 0x041A}, {0x043B, 0x041B}, {0x043C, 0x041C}, {0x043D, 0x041D}, + {0x043E, 0x041E}, {0x043F, 0x041F}, {0x0440, 0x0420}, {0x0441, 0x0421}, + {0x0442, 0x0422}, {0x0443, 0x0423}, {0x0444, 0x0424}, {0x0445, 0x0425}, + {0x0446, 0x0426}, {0x0447, 0x0427}, {0x0448, 0x0428}, {0x0449, 0x0429}, + {0x044A, 0x042A}, {0x044B, 0x042B}, {0x044C, 0x042C}, {0x044D, 0x042D}, + {0x044E, 0x042E}, {0x044F, 0x042F}, {0x0451, 0x0401}, {0x0452, 0x0402}, + {0x0453, 0x0403}, {0x0454, 0x0404}, {0x0455, 0x0405}, {0x0456, 0x0406}, + {0x0457, 0x0407}, {0x0458, 0x0408}, {0x0459, 0x0409}, {0x045A, 0x040A}, + {0x045B, 0x040B}, {0x045C, 0x040C}, {0x045E, 0x040E}, {0x045F, 0x040F}, + {0x0461, 0x0460}, {0x0463, 0x0462}, {0x0465, 0x0464}, {0x0467, 0x0466}, + {0x0469, 0x0468}, {0x046B, 0x046A}, {0x046D, 0x046C}, {0x046F, 0x046E}, + {0x0471, 0x0470}, {0x0473, 0x0472}, {0x0475, 0x0474}, {0x0477, 0x0476}, + {0x0479, 0x0478}, {0x047B, 0x047A}, {0x047D, 0x047C}, {0x047F, 0x047E}, + {0x0481, 0x0480}, {0x0491, 0x0490}, {0x0493, 0x0492}, {0x0495, 0x0494}, + {0x0497, 0x0496}, {0x0499, 0x0498}, {0x049B, 0x049A}, {0x049D, 0x049C}, + {0x049F, 0x049E}, {0x04A1, 0x04A0}, {0x04A3, 0x04A2}, {0x04A5, 0x04A4}, + {0x04A7, 0x04A6}, {0x04A9, 0x04A8}, {0x04AB, 0x04AA}, {0x04AD, 0x04AC}, + {0x04AF, 0x04AE}, {0x04B1, 0x04B0}, {0x04B3, 0x04B2}, {0x04B5, 0x04B4}, + {0x04B7, 0x04B6}, {0x04B9, 0x04B8}, {0x04BB, 0x04BA}, {0x04BD, 0x04BC}, + {0x04BF, 0x04BE}, {0x04C2, 0x04C1}, {0x04C4, 0x04C3}, {0x04C8, 0x04C7}, + {0x04CC, 0x04CB}, {0x04D1, 0x04D0}, {0x04D3, 0x04D2}, {0x04D5, 0x04D4}, + {0x04D7, 0x04D6}, {0x04D9, 0x04D8}, {0x04DB, 0x04DA}, {0x04DD, 0x04DC}, + {0x04DF, 0x04DE}, {0x04E1, 0x04E0}, {0x04E3, 0x04E2}, {0x04E5, 0x04E4}, + {0x04E7, 0x04E6}, {0x04E9, 0x04E8}, {0x04EB, 0x04EA}, {0x04EF, 0x04EE}, + {0x04F1, 0x04F0}, {0x04F3, 0x04F2}, {0x04F5, 0x04F4}, {0x04F9, 0x04F8}, + {0x0561, 0x0531}, {0x0562, 0x0532}, {0x0563, 0x0533}, {0x0564, 0x0534}, + {0x0565, 0x0535}, {0x0566, 0x0536}, {0x0567, 0x0537}, {0x0568, 0x0538}, + {0x0569, 0x0539}, {0x056A, 0x053A}, {0x056B, 0x053B}, {0x056C, 0x053C}, + {0x056D, 0x053D}, {0x056E, 0x053E}, {0x056F, 0x053F}, {0x0570, 0x0540}, + {0x0571, 0x0541}, {0x0572, 0x0542}, {0x0573, 0x0543}, {0x0574, 0x0544}, + {0x0575, 0x0545}, {0x0576, 0x0546}, {0x0577, 0x0547}, {0x0578, 0x0548}, + {0x0579, 0x0549}, {0x057A, 0x054A}, {0x057B, 0x054B}, {0x057C, 0x054C}, + {0x057D, 0x054D}, {0x057E, 0x054E}, {0x057F, 0x054F}, {0x0580, 0x0550}, + {0x0581, 0x0551}, {0x0582, 0x0552}, {0x0583, 0x0553}, {0x0584, 0x0554}, + {0x0585, 0x0555}, {0x0586, 0x0556}, {0x10D0, 0x10A0}, {0x10D1, 0x10A1}, + {0x10D2, 0x10A2}, {0x10D3, 0x10A3}, {0x10D4, 0x10A4}, {0x10D5, 0x10A5}, + {0x10D6, 0x10A6}, {0x10D7, 0x10A7}, {0x10D8, 0x10A8}, {0x10D9, 0x10A9}, + {0x10DA, 0x10AA}, {0x10DB, 0x10AB}, {0x10DC, 0x10AC}, {0x10DD, 0x10AD}, + {0x10DE, 0x10AE}, {0x10DF, 0x10AF}, {0x10E0, 0x10B0}, {0x10E1, 0x10B1}, + {0x10E2, 0x10B2}, {0x10E3, 0x10B3}, {0x10E4, 0x10B4}, {0x10E5, 0x10B5}, + {0x10E6, 0x10B6}, {0x10E7, 0x10B7}, {0x10E8, 0x10B8}, {0x10E9, 0x10B9}, + {0x10EA, 0x10BA}, {0x10EB, 0x10BB}, {0x10EC, 0x10BC}, {0x10ED, 0x10BD}, + {0x10EE, 0x10BE}, {0x10EF, 0x10BF}, {0x10F0, 0x10C0}, {0x10F1, 0x10C1}, + {0x10F2, 0x10C2}, {0x10F3, 0x10C3}, {0x10F4, 0x10C4}, {0x10F5, 0x10C5}, + {0x1E01, 0x1E00}, {0x1E03, 0x1E02}, {0x1E05, 0x1E04}, {0x1E07, 0x1E06}, + {0x1E09, 0x1E08}, {0x1E0B, 0x1E0A}, {0x1E0D, 0x1E0C}, {0x1E0F, 0x1E0E}, + {0x1E11, 0x1E10}, {0x1E13, 0x1E12}, {0x1E15, 0x1E14}, {0x1E17, 0x1E16}, + {0x1E19, 0x1E18}, {0x1E1B, 0x1E1A}, {0x1E1D, 0x1E1C}, {0x1E1F, 0x1E1E}, + {0x1E21, 0x1E20}, {0x1E23, 0x1E22}, {0x1E25, 0x1E24}, {0x1E27, 0x1E26}, + {0x1E29, 0x1E28}, {0x1E2B, 0x1E2A}, {0x1E2D, 0x1E2C}, {0x1E2F, 0x1E2E}, + {0x1E31, 0x1E30}, {0x1E33, 0x1E32}, {0x1E35, 0x1E34}, {0x1E37, 0x1E36}, + {0x1E39, 0x1E38}, {0x1E3B, 0x1E3A}, {0x1E3D, 0x1E3C}, {0x1E3F, 0x1E3E}, + {0x1E41, 0x1E40}, {0x1E43, 0x1E42}, {0x1E45, 0x1E44}, {0x1E47, 0x1E46}, + {0x1E49, 0x1E48}, {0x1E4B, 0x1E4A}, {0x1E4D, 0x1E4C}, {0x1E4F, 0x1E4E}, + {0x1E51, 0x1E50}, {0x1E53, 0x1E52}, {0x1E55, 0x1E54}, {0x1E57, 0x1E56}, + {0x1E59, 0x1E58}, {0x1E5B, 0x1E5A}, {0x1E5D, 0x1E5C}, {0x1E5F, 0x1E5E}, + {0x1E61, 0x1E60}, {0x1E63, 0x1E62}, {0x1E65, 0x1E64}, {0x1E67, 0x1E66}, + {0x1E69, 0x1E68}, {0x1E6B, 0x1E6A}, {0x1E6D, 0x1E6C}, {0x1E6F, 0x1E6E}, + {0x1E71, 0x1E70}, {0x1E73, 0x1E72}, {0x1E75, 0x1E74}, {0x1E77, 0x1E76}, + {0x1E79, 0x1E78}, {0x1E7B, 0x1E7A}, {0x1E7D, 0x1E7C}, {0x1E7F, 0x1E7E}, + {0x1E81, 0x1E80}, {0x1E83, 0x1E82}, {0x1E85, 0x1E84}, {0x1E87, 0x1E86}, + {0x1E89, 0x1E88}, {0x1E8B, 0x1E8A}, {0x1E8D, 0x1E8C}, {0x1E8F, 0x1E8E}, + {0x1E91, 0x1E90}, {0x1E93, 0x1E92}, {0x1E95, 0x1E94}, {0x1EA1, 0x1EA0}, + {0x1EA3, 0x1EA2}, {0x1EA5, 0x1EA4}, {0x1EA7, 0x1EA6}, {0x1EA9, 0x1EA8}, + {0x1EAB, 0x1EAA}, {0x1EAD, 0x1EAC}, {0x1EAF, 0x1EAE}, {0x1EB1, 0x1EB0}, + {0x1EB3, 0x1EB2}, {0x1EB5, 0x1EB4}, {0x1EB7, 0x1EB6}, {0x1EB9, 0x1EB8}, + {0x1EBB, 0x1EBA}, {0x1EBD, 0x1EBC}, {0x1EBF, 0x1EBE}, {0x1EC1, 0x1EC0}, + {0x1EC3, 0x1EC2}, {0x1EC5, 0x1EC4}, {0x1EC7, 0x1EC6}, {0x1EC9, 0x1EC8}, + {0x1ECB, 0x1ECA}, {0x1ECD, 0x1ECC}, {0x1ECF, 0x1ECE}, {0x1ED1, 0x1ED0}, + {0x1ED3, 0x1ED2}, {0x1ED5, 0x1ED4}, {0x1ED7, 0x1ED6}, {0x1ED9, 0x1ED8}, + {0x1EDB, 0x1EDA}, {0x1EDD, 0x1EDC}, {0x1EDF, 0x1EDE}, {0x1EE1, 0x1EE0}, + {0x1EE3, 0x1EE2}, {0x1EE5, 0x1EE4}, {0x1EE7, 0x1EE6}, {0x1EE9, 0x1EE8}, + {0x1EEB, 0x1EEA}, {0x1EED, 0x1EEC}, {0x1EEF, 0x1EEE}, {0x1EF1, 0x1EF0}, + {0x1EF3, 0x1EF2}, {0x1EF5, 0x1EF4}, {0x1EF7, 0x1EF6}, {0x1EF9, 0x1EF8}, + {0x1F00, 0x1F08}, {0x1F01, 0x1F09}, {0x1F02, 0x1F0A}, {0x1F03, 0x1F0B}, + {0x1F04, 0x1F0C}, {0x1F05, 0x1F0D}, {0x1F06, 0x1F0E}, {0x1F07, 0x1F0F}, + {0x1F10, 0x1F18}, {0x1F11, 0x1F19}, {0x1F12, 0x1F1A}, {0x1F13, 0x1F1B}, + {0x1F14, 0x1F1C}, {0x1F15, 0x1F1D}, {0x1F20, 0x1F28}, {0x1F21, 0x1F29}, + {0x1F22, 0x1F2A}, {0x1F23, 0x1F2B}, {0x1F24, 0x1F2C}, {0x1F25, 0x1F2D}, + {0x1F26, 0x1F2E}, {0x1F27, 0x1F2F}, {0x1F30, 0x1F38}, {0x1F31, 0x1F39}, + {0x1F32, 0x1F3A}, {0x1F33, 0x1F3B}, {0x1F34, 0x1F3C}, {0x1F35, 0x1F3D}, + {0x1F36, 0x1F3E}, {0x1F37, 0x1F3F}, {0x1F40, 0x1F48}, {0x1F41, 0x1F49}, + {0x1F42, 0x1F4A}, {0x1F43, 0x1F4B}, {0x1F44, 0x1F4C}, {0x1F45, 0x1F4D}, + {0x1F51, 0x1F59}, {0x1F53, 0x1F5B}, {0x1F55, 0x1F5D}, {0x1F57, 0x1F5F}, + {0x1F60, 0x1F68}, {0x1F61, 0x1F69}, {0x1F62, 0x1F6A}, {0x1F63, 0x1F6B}, + {0x1F64, 0x1F6C}, {0x1F65, 0x1F6D}, {0x1F66, 0x1F6E}, {0x1F67, 0x1F6F}, + {0x1F80, 0x1F88}, {0x1F81, 0x1F89}, {0x1F82, 0x1F8A}, {0x1F83, 0x1F8B}, + {0x1F84, 0x1F8C}, {0x1F85, 0x1F8D}, {0x1F86, 0x1F8E}, {0x1F87, 0x1F8F}, + {0x1F90, 0x1F98}, {0x1F91, 0x1F99}, {0x1F92, 0x1F9A}, {0x1F93, 0x1F9B}, + {0x1F94, 0x1F9C}, {0x1F95, 0x1F9D}, {0x1F96, 0x1F9E}, {0x1F97, 0x1F9F}, + {0x1FA0, 0x1FA8}, {0x1FA1, 0x1FA9}, {0x1FA2, 0x1FAA}, {0x1FA3, 0x1FAB}, + {0x1FA4, 0x1FAC}, {0x1FA5, 0x1FAD}, {0x1FA6, 0x1FAE}, {0x1FA7, 0x1FAF}, + {0x1FB0, 0x1FB8}, {0x1FB1, 0x1FB9}, {0x1FD0, 0x1FD8}, {0x1FD1, 0x1FD9}, + {0x1FE0, 0x1FE8}, {0x1FE1, 0x1FE9}, {0x24D0, 0x24B6}, {0x24D1, 0x24B7}, + {0x24D2, 0x24B8}, {0x24D3, 0x24B9}, {0x24D4, 0x24BA}, {0x24D5, 0x24BB}, + {0x24D6, 0x24BC}, {0x24D7, 0x24BD}, {0x24D8, 0x24BE}, {0x24D9, 0x24BF}, + {0x24DA, 0x24C0}, {0x24DB, 0x24C1}, {0x24DC, 0x24C2}, {0x24DD, 0x24C3}, + {0x24DE, 0x24C4}, {0x24DF, 0x24C5}, {0x24E0, 0x24C6}, {0x24E1, 0x24C7}, + {0x24E2, 0x24C8}, {0x24E3, 0x24C9}, {0x24E4, 0x24CA}, {0x24E5, 0x24CB}, + {0x24E6, 0x24CC}, {0x24E7, 0x24CD}, {0x24E8, 0x24CE}, {0x24E9, 0x24CF}, + {0xFF41, 0xFF21}, {0xFF42, 0xFF22}, {0xFF43, 0xFF23}, {0xFF44, 0xFF24}, + {0xFF45, 0xFF25}, {0xFF46, 0xFF26}, {0xFF47, 0xFF27}, {0xFF48, 0xFF28}, + {0xFF49, 0xFF29}, {0xFF4A, 0xFF2A}, {0xFF4B, 0xFF2B}, {0xFF4C, 0xFF2C}, + {0xFF4D, 0xFF2D}, {0xFF4E, 0xFF2E}, {0xFF4F, 0xFF2F}, {0xFF50, 0xFF30}, + {0xFF51, 0xFF31}, {0xFF52, 0xFF32}, {0xFF53, 0xFF33}, {0xFF54, 0xFF34}, + {0xFF55, 0xFF35}, {0xFF56, 0xFF36}, {0xFF57, 0xFF37}, {0xFF58, 0xFF38}, + {0xFF59, 0xFF39}, {0xFF5A, 0xFF3A} +}; + +static size_t lookupUpperIndex[] = +{ + 0, 57, 160, 190, 230, 340, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +378, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 416, 536, + -1, -1, -1, -1, 614, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 640 +}; + +static unsigned char lookupUtf8Type[] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 11, 11, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, + 8, 9, 9, 9, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +}; + + +static unsigned char lookupUtf8State[] = +{ + 0, 96, 96, 96, 72, 60, 36, 84, 48, 24, 12, 96, + 96, 36, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 36, 36, 36, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 72, 72, 72, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 36, 36, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 72, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 0, 0, 0, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 72, 72, 96, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 +}; + + +static unsigned char lookupUtf8Bits[] = +{ + 7, 6, 6, 6, 5, 4, 4, 4, 3, 3, 3, 1 +}; diff --git a/src/String/ToIntU.inl b/src/String/Inline/Unsigned.h index ff57e84..ff57e84 100644 --- a/src/String/ToIntU.inl +++ b/src/String/Inline/Unsigned.h diff --git a/src/String/Int.c b/src/String/Int.c index 18e62ae..2e94814 100644 --- a/src/String/Int.c +++ b/src/String/Int.c @@ -86,7 +86,7 @@ int BH_StringFromInt8s(char *string, int base, size_t *actual) { - #include "ToIntS.inl" + #include "Inline/Signed.h" } @@ -96,7 +96,7 @@ int BH_StringFromInt16s(char *string, int base, size_t *actual) { - #include "ToIntS.inl" + #include "Inline/Signed.h" } @@ -106,7 +106,7 @@ int BH_StringFromInt32s(char *string, int base, size_t *actual) { - #include "ToIntS.inl" + #include "Inline/Signed.h" } @@ -116,7 +116,7 @@ int BH_StringFromInt64s(char *string, int base, size_t *actual) { - #include "ToIntS.inl" + #include "Inline/Signed.h" } @@ -126,7 +126,7 @@ int BH_StringFromInt8u(char *string, int base, size_t *actual) { - #include "ToIntU.inl" + #include "Inline/Unsigned.h" } @@ -136,7 +136,7 @@ int BH_StringFromInt16u(char *string, int base, size_t *actual) { - #include "ToIntU.inl" + #include "Inline/Unsigned.h" } @@ -146,7 +146,7 @@ int BH_StringFromInt32u(char *string, int base, size_t *actual) { - #include "ToIntU.inl" + #include "Inline/Unsigned.h" } @@ -156,7 +156,7 @@ int BH_StringFromInt64u(char *string, int base, size_t *actual) { - #include "ToIntU.inl" + #include "Inline/Unsigned.h" } @@ -165,7 +165,7 @@ int8_t BH_StringToInt8s(const char *string, int base) { int8_t result; - #include "FromInt.inl" + #include "Inline/Int.h" } @@ -174,7 +174,7 @@ int16_t BH_StringToInt16s(const char *string, int base) { int16_t result; - #include "FromInt.inl" + #include "Inline/Int.h" } @@ -183,7 +183,7 @@ int32_t BH_StringToInt32s(const char *string, int base) { int32_t result; - #include "FromInt.inl" + #include "Inline/Int.h" } @@ -192,7 +192,7 @@ int64_t BH_StringToInt64s(const char *string, int base) { int64_t result; - #include "FromInt.inl" + #include "Inline/Int.h" } @@ -201,7 +201,7 @@ uint8_t BH_StringToInt8u(const char *string, int base) { uint8_t result; - #include "FromInt.inl" + #include "Inline/Int.h" } @@ -210,7 +210,7 @@ uint16_t BH_StringToInt16u(const char *string, int base) { uint16_t result; - #include "FromInt.inl" + #include "Inline/Int.h" } @@ -219,7 +219,7 @@ uint32_t BH_StringToInt32u(const char *string, int base) { uint32_t result; - #include "FromInt.inl" + #include "Inline/Int.h" } @@ -228,5 +228,5 @@ uint64_t BH_StringToInt64u(const char *string, int base) { uint64_t result; - #include "FromInt.inl" + #include "Inline/Int.h" } diff --git a/src/String/Unicode.c b/src/String/Unicode.c new file mode 100644 index 0000000..1f0eaf0 --- /dev/null +++ b/src/String/Unicode.c @@ -0,0 +1,280 @@ +#include <BH/String.h> +#include <BH/Util.h> + +#include "Inline/Unicode.h" + + +uint32_t decodeUtf8(uint32_t *unit, + unsigned char *state, + unsigned char byte) +{ + unsigned char type, bits; + + type = lookupUtf8Type[(size_t)byte]; + bits = lookupUtf8Bits[(size_t)type]; + *unit = (*unit << bits) | ((0xFF >> (8 - bits)) & byte); + + return *state = lookupUtf8State[(size_t)type + *state]; +} + + +uint32_t BH_UnicodeLower(uint32_t unit) +{ + size_t i; + + /* In convertable range */ + if (unit >= 0xFFFF) + return unit; + + /* Fasttrack and search lookup table */ + i = lookupLowerIndex[unit >> 8]; + while (i < sizeof(lookupLower) / sizeof(struct CaseMap)) + { + if (lookupLower[i].from > unit) + return unit; + if (lookupLower[i].from == unit) + return lookupLower[i].to; + i++; + } + + return unit; +} + + +uint32_t BH_UnicodeUpper(uint32_t unit) +{ + size_t i; + + /* In convertable range */ + if (unit >= 0xFFFF) + return unit; + + /* Fasttrack and search lookup table */ + i = lookupUpperIndex[unit >> 8]; + while (i < sizeof(lookupUpper) / sizeof(struct CaseMap)) + { + if (lookupUpper[i].from > unit) + return unit; + if (lookupUpper[i].from == unit) + return lookupUpper[i].to; + i++; + } + + return unit; +} + + +size_t BH_UnicodeDecodeUtf8(const char *string, + size_t size, + uint32_t *unit) +{ + unsigned char state; + size_t i; + + state = 0; + *unit = 0; + for (i = 0; i < size; i++) + { + switch (decodeUtf8(unit, &state, string[i])) + { + case UTF8_OK: return i + 1; + case UTF8_ERROR: *unit = 0xFFFFFFFF; return (i > 0 ? i : i + 1); + default: break; + } + } + + return 0; +} + + +size_t BH_UnicodeEncodeUtf8(uint32_t unit, + char *string) +{ + size_t result; + + result = 0; + if (unit < 0x80ul) + { + string[0] = unit & 0x7F; + result = 1; + } + else if (unit < 0x800ul) + { + string[0] = 0xC0 | (unit >> 6); + string[1] = 0x80 | (unit & 0x3F); + result = 2; + } + else if (unit < 0x10000ul) + { + string[0] = 0xE0 | (unit >> 12); + string[1] = 0x80 | ((unit >> 6) & 0x3F); + string[2] = 0x80 | (unit & 0x3F); + result = 3; + } + else if (unit < 0x200000ul) + { + string[0] = 0xF0 | (unit >> 18); + string[1] = 0x80 | ((unit >> 12) & 0x3F); + string[2] = 0x80 | ((unit >> 6) & 0x3F); + string[3] = 0x80 | (unit & 0x3F); + result = 4; + } + + return result; +} + + +static int classifyUtf16(uint16_t value) +{ + if (value > 0xD7FF && value < 0xDC00) + return UTF16_LOWSUR; + else if (value > 0xDBFF && value < 0xE000) + return UTF16_HIGHSUR; + return UTF16_NORMAL; +} + + +size_t BH_UnicodeDecodeUtf16LE(const char *string, + size_t size, + uint32_t *unit) +{ + uint16_t lower, upper; + + if (size < 2) + return 0; + + upper = BH_Read16LEu(string); + *unit = 0xFFFFFFFF; + + if (classifyUtf16(upper) == UTF16_NORMAL) + *unit = upper; + else if (classifyUtf16(upper) == UTF16_LOWSUR) + { + if (size < 4) + return 0; + + lower = BH_Read16LEu(string + 2); + if (classifyUtf16(lower) == UTF16_HIGHSUR) + { + *unit = (((upper & 0x3FF) << 10) | (lower & 0x3FF)) + 0x10000; + return 4; + } + } + + return 2; +} + + +size_t BH_UnicodeDecodeUtf16BE(const char *string, + size_t size, + uint32_t *unit) +{ + uint16_t lower, upper; + + if (size < 2) + return 0; + + upper = BH_Read16BEu(string); + *unit = 0xFFFFFFFF; + + if (classifyUtf16(upper) == UTF16_NORMAL) + *unit = upper; + else if (classifyUtf16(upper) == UTF16_LOWSUR) + { + if (size < 4) + return 0; + + lower = BH_Read16BEu(string + 2); + if (classifyUtf16(lower) == UTF16_HIGHSUR) + { + *unit = (((upper & 0x3FF) << 10) | (lower & 0x3FF)) + 0x10000; + return 4; + } + } + + return 2; +} + + +size_t BH_UnicodeEncodeUtf16LE(uint32_t unit, + char *string) +{ + if (unit < 0x10000) + { + BH_Write16LEu(string, unit); + return 2; + } + else if (unit < 0x200000) + { + unit -= 0x10000; + BH_Write16LEu(string, 0xD800 | (unit >> 10)); + BH_Write16LEu(string + 2, 0xDC00 | (unit & 0x3FF)); + return 4; + } + return 0; +} + + +size_t BH_UnicodeEncodeUtf16BE(uint32_t unit, + char *string) +{ + if (unit < 0x10000) + { + BH_Write16BEu(string, unit); + return 2; + } + else if (unit < 0x200000) + { + unit -= 0x10000; + BH_Write16BEu(string, 0xD800 | (unit >> 10)); + BH_Write16BEu(string + 2, 0xDC00 | (unit & 0x3FF)); + return 4; + } + return 0; +} + + +size_t BH_UnicodeDecodeUtf32LE(const char *string, + size_t size, + uint32_t *unit) +{ + if (size < 4) + return 0; + + *unit = BH_Read32LEu(string); + return 4; +} + + +size_t BH_UnicodeDecodeUtf32BE(const char *string, + size_t size, + uint32_t *unit) +{ + if (size < 4) + return 0; + + *unit = BH_Read32BEu(string); + return 4; +} + + +size_t BH_UnicodeEncodeUtf32LE(uint32_t unit, + char *string) +{ + if (unit > 0x1FFFFF) + return 0; + + BH_Write32LEu(string, unit); + return 4; +} + + +size_t BH_UnicodeEncodeUtf32BE(uint32_t unit, + char *string) +{ + if (unit > 0x1FFFFF) + return 0; + + BH_Write32BEu(string, unit); + return 4; +} |
