Initial commit

2026-06-14 22:51:45 +03:00
commit 78bf3c74b6
18 changed files with 4096 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,68 @@
 # ---> C
 # Prerequisites
 *.d
 # Object files
 *.o
 *.ko
 *.obj
 *.elf
 # Linker output
 *.ilk
 *.map
 *.exp
 # Precompiled Headers
 *.gch
 *.pch
 # Libraries
 *.lib
 *.a
 *.la
 *.lo
 # Shared objects (inc. Windows DLLs)
 *.dll
 *.so
 *.so.*
 *.dylib
 # Executables
 *.exe
 *.out
 *.app
 *.i*86
 *.x86_64
 *.hex
 # Debug files
 *.dSYM/
 *.su
 *.idb
 *.pdb
 # Kernel Module Compile Results
 *.mod*
 *.cmd
 .tmp_versions/
 modules.order
 Module.symvers
 Mkfile.old
 dkms.conf
 # ---> CMake
 CMakeLists.txt.user
 CMakeCache.txt
 CMakeFiles
 CMakeScripts
 Testing
 Makefile
 cmake_install.cmake
 install_manifest.txt
 compile_commands.json
 CTestTestfile.cmake
 _deps
 CMakeUserPresets.json
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,29 @@
 cmake_minimum_required(VERSION 3.10)
 project(CgeStr LANGUAGES C)
 set(CMAKE_C_STANDARD 99)
 set(CMAKE_C_STANDARD_REQUIRED ON)
 set(SOURCES
    Rune.c
    Str.c
    UCD.c
    Utf8.c
    Utf16.c
 )
 set(HEADERS
    CgeStr.h
 )
 add_library(CgeStr STATIC ${SOURCES} ${HEADERS})
 target_include_directories(CgeStr PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 install(TARGETS CgeStr
    ARCHIVE DESTINATION lib
    LIBRARY DESTINATION lib
    RUNTIME DESTINATION bin
 )
 install(FILES ${HEADERS} DESTINATION include)
--- a/CgeStr.h
+++ b/CgeStr.h
@@ -0,0 +1,76 @@
 #ifndef CGE_STR_H
 #define CGE_STR_H
 #include <stdint.h>
 typedef struct CgeStr {
    const char* data;
    size_t size;
 } CgeStr;
 #define CGE_STR_LIT(s) \
    {(s), sizeof(s) - 1}
 typedef void (*CgeStrIterCb)(uint32_t rune, void* user);
 typedef void (*CgeStrWriteCb)(const char* data, size_t size, void* user);
 enum CgeCat {
    CGE_CAT_LU, CGE_CAT_LL, CGE_CAT_LT, CGE_CAT_LM, CGE_CAT_LO, CGE_CAT_MN,
    CGE_CAT_MC, CGE_CAT_ME, CGE_CAT_ND, CGE_CAT_NL, CGE_CAT_NO, CGE_CAT_PC,
    CGE_CAT_PD, CGE_CAT_PS, CGE_CAT_PE, CGE_CAT_PI, CGE_CAT_PF, CGE_CAT_PO,
    CGE_CAT_SM, CGE_CAT_SC, CGE_CAT_SK, CGE_CAT_SO, CGE_CAT_ZS, CGE_CAT_ZL,
    CGE_CAT_ZP, CGE_CAT_CC, CGE_CAT_CF, CGE_CAT_CS, CGE_CAT_CO, CGE_CAT_CN
 };
 int CgeRuneCategory(uint32_t rune);
 uint32_t CgeRuneLower(uint32_t rune);
 uint32_t CgeRuneUpper(uint32_t rune);
 uint32_t CgeRuneTitle(uint32_t rune);
 uint32_t CgeRuneFold(uint32_t rune);
 size_t CgeRuneLowerFull(uint32_t rune, uint32_t* out);
 size_t CgeRuneUpperFull(uint32_t rune, uint32_t* out);
 size_t CgeRuneTitleFull(uint32_t rune, uint32_t* out);
 size_t CgeRuneFoldFull(uint32_t rune, uint32_t* out);
 int CgeRuneIsControl(uint32_t rune);
 int CgeRuneIsDigit(uint32_t rune);
 int CgeRuneIsGraphic(uint32_t rune);
 int CgeRuneIsLetter(uint32_t rune);
 int CgeRuneIsLower(uint32_t rune);
 int CgeRuneIsMark(uint32_t rune);
 int CgeRuneIsNumber(uint32_t rune);
 int CgeRuneIsPrint(uint32_t rune);
 int CgeRuneIsPunct(uint32_t rune);
 int CgeRuneIsSpace(uint32_t rune);
 int CgeRuneIsSymbol(uint32_t rune);
 int CgeRuneIsTitle(uint32_t rune);
 int CgeRuneIsUpper(uint32_t rune);
 int CgeUtf8Encode(uint32_t rune, char* data);
 int CgeUtf8EncodeLax(uint32_t rune, char* data);
 int CgeUtf8Decode(const char* data, size_t size, uint32_t* rune);
 int CgeUtf8DecodeLax(const char* data, size_t size, uint32_t* rune);
 int CgeUtf16Encode(uint32_t rune, uint16_t* data);
 int CgeUtf16EncodeLax(uint32_t rune, uint16_t* data);
 int CgeUtf16Decode(const uint16_t* data, size_t size, uint32_t* rune);
 int CgeUtf16DecodeLax(const uint16_t* data, size_t size, uint32_t* rune);
 void CgeStrIter(CgeStr str, CgeStrIterCb cb, void* user);
 void CgeStrToLower(CgeStr str, CgeStrWriteCb cb, void* user);
 void CgeStrToUpper(CgeStr str, CgeStrWriteCb cb, void* user);
 void CgeStrFold(CgeStr str, CgeStrWriteCb cb, void* user);
 int CgeStrCmp(CgeStr lhs, CgeStr rhs);
 int CgeStrICmp(CgeStr lhs, CgeStr rhs);
 size_t CgeStrIndexRune(CgeStr str, uint32_t rune);
 size_t CgeStrLastIndexRune(CgeStr str, uint32_t rune);
 size_t CgeStrIndexStr(CgeStr str, CgeStr substr);
 size_t CgeStrLastIndexStr(CgeStr str, CgeStr substr);
 int CgeStrHasPrefix(CgeStr str, CgeStr prefix);
 int CgeStrHasSuffix(CgeStr str, CgeStr suffix);
 CgeStr CgeStrTrimLeft(CgeStr str);
 CgeStr CgeStrTrimRight(CgeStr str);
 CgeStr CgeStrTrim(CgeStr str);
 CgeStr CgeStrSplit(CgeStr *s, uint32_t delim);
 #endif /* CGE_STR_H */
--- a/12
+++ b/12
@@ -0,0 +1,12 @@
 Copyright (C) 2026 by blankhex me@blankhex.com
 Permission to use, copy, modify, and/or distribute this software for any
 purpose with or without fee is hereby granted.
 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
 INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
 OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 PERFORMANCE OF THIS SOFTWARE.
--- a/Makefile.mingw
+++ b/Makefile.mingw
@@ -0,0 +1,23 @@
 # MinGW Makefile for CgeStr
 CC = gcc
 AR = ar
 CFLAGS = -std=c99 -O2 -Wall -Wextra
 ARFLAGS = rcs
 TARGET = libCgeStr.a
 SOURCES = Rune.c Str.c UCD.c Utf8.c Utf16.c
 OBJECTS = $(SOURCES:.c=.o)
 .PHONY: all clean
 all: $(TARGET)
 $(TARGET): $(OBJECTS)
 	$(AR) $(ARFLAGS) $@ $^
 %.o: %.c CgeStr.h
 	$(CC) $(CFLAGS) -c $< -o $@
 clean:
 	del $(OBJECTS) $(TARGET) 2>nul || exit 0
--- a/Makefile.posix
+++ b/Makefile.posix
@@ -0,0 +1,33 @@
 # POSIX Makefile for CgeStr
 CC = gcc
 AR = ar
 CFLAGS = -std=c99 -O2 -Wall -Wextra -fPIC
 ARFLAGS = rcs
 TARGET = libCgeStr.a
 SOURCES = Rune.c Str.c UCD.c Utf8.c Utf16.c
 OBJECTS = $(SOURCES:.c=.o)
 .PHONY: all clean install
 all: $(TARGET)
 $(TARGET): $(OBJECTS)
 	$(AR) $(ARFLAGS) $@ $^
 %.o: %.c CgeStr.h
 	$(CC) $(CFLAGS) -c $< -o $@
 clean:
 	rm -f $(OBJECTS) $(TARGET)
 install: $(TARGET)
 	cp $(TARGET) /usr/local/lib/
 	cp CgeStr.h /usr/local/include/
 	ldconfig || echo "Run ldconfig manually if needed"
 uninstall:
 	rm -f /usr/local/lib/libCgeStr.a
 	rm -f /usr/local/include/CgeStr.h
 	ldconfig || true
--- a/Makefile.win32
+++ b/Makefile.win32
@@ -0,0 +1,23 @@
 # Makefile.win32 for MSVC (NMake)
 # Usage: Open "x86 Native Tools Command Prompt", then:
 #   nmake -f Makefile.win32
 CC = cl
 LIB = lib
 CFLAGS = /c /nologo /W3 /O2
 LIBFLAGS = /nologo
 TARGET = CgeStr.lib
 SOURCES = Rune.c Str.c UCD.c Utf8.c Utf16.c
 OBJECTS = $(SOURCES:.c=.obj)
 $(TARGET): $(OBJECTS)
 	$(LIB) $(LIBFLAGS) /OUT:$(TARGET) $(OBJECTS)
 {.}.c{}.obj:
 	$(CC) $(CFLAGS) /Fo$@ $<
 clean:
 	del $(OBJECTS) $(TARGET) 2>nul
 .PHONY: clean
--- a/README.md
+++ b/README.md
@@ -0,0 +1,56 @@
 # CgeStr - Unicode String Library for C
 A lightweight, dependency-free C library for UTF-8 string processing with full
 Unicode support.
 ## Features
 - UTF-8 encoding and decoding
 - UTF-16 encoding and decoding
 - Unicode case mapping: lowercase, uppercase, titlecase, case folding
 - Full case mapping functions returning multiple runes when needed
 - Character classification: isControl, isDigit, isLetter, isSpace, and others
 - Unicode category lookup via `CgeRuneCategory`
 - Case-sensitive and case-insensitive string comparison
 - Substring and rune search
 - Prefix and suffix checking
 - String trimming (left, right, both)
 - String splitting by rune
 - Iteration over Unicode code points using callback interface
 ## Build Systems
 - CMake - supports Linux, macOS, Windows (MSVC, MinGW)
 - Makefile.posix - for GCC/Clang on POSIX systems
 - Makefile.mingw - for MinGW on Windows
 - Makefile.win32 - for MSVC with NMake
 Builds a static library. No shared library or external dependencies.
 ## Usage Example
 ```c
 #include "CgeStr.h"
 #include <stdio.h>
 void print_rune(uint32_t rune, void* user) {
    printf("U+%04X ", rune);
 }
 int main() {
    CgeStr str = CGE_STR_LIT("Héllo, 世界!");
    CgeStrIter(str, print_rune, NULL);
    printf("\n");
    return 0;
 }
 ```
 ## Portability
 - Written in C89+stdint.h
 - No dynamic memory allocation
 - No external dependencies
 ## License
 0BSD - a permissive license with no attribution required.
--- a/Rune.c
+++ b/Rune.c
@@ -0,0 +1,176 @@
 #include "CgeStr.h"
 int CgeRuneIsControl(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_CC:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsDigit(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_ND:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsGraphic(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_LL:
        case CGE_CAT_LM:
        case CGE_CAT_LO:
        case CGE_CAT_LT:
        case CGE_CAT_LU:
        case CGE_CAT_MC:
        case CGE_CAT_ME:
        case CGE_CAT_MN:
        case CGE_CAT_ND:
        case CGE_CAT_NL:
        case CGE_CAT_NO:
        case CGE_CAT_PC:
        case CGE_CAT_PD:
        case CGE_CAT_PE:
        case CGE_CAT_PF:
        case CGE_CAT_PI:
        case CGE_CAT_PO:
        case CGE_CAT_PS:
        case CGE_CAT_SC:
        case CGE_CAT_SK:
        case CGE_CAT_SM:
        case CGE_CAT_SO:
        case CGE_CAT_ZS:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsLetter(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_LL:
        case CGE_CAT_LM:
        case CGE_CAT_LO:
        case CGE_CAT_LT:
        case CGE_CAT_LU:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsLower(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_LL:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsMark(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_MC:
        case CGE_CAT_ME:
        case CGE_CAT_MN:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsNumber(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_ND:
        case CGE_CAT_NL:
        case CGE_CAT_NO:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsPrint(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_LL:
        case CGE_CAT_LM:
        case CGE_CAT_LO:
        case CGE_CAT_LT:
        case CGE_CAT_LU:
        case CGE_CAT_MC:
        case CGE_CAT_ME:
        case CGE_CAT_MN:
        case CGE_CAT_ND:
        case CGE_CAT_NL:
        case CGE_CAT_NO:
        case CGE_CAT_PC:
        case CGE_CAT_PD:
        case CGE_CAT_PE:
        case CGE_CAT_PF:
        case CGE_CAT_PI:
        case CGE_CAT_PO:
        case CGE_CAT_PS:
        case CGE_CAT_SC:
        case CGE_CAT_SK:
        case CGE_CAT_SM:
        case CGE_CAT_SO:
            return 1;
    }
    return rune == ' ';
 }
 int CgeRuneIsPunct(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_PC:
        case CGE_CAT_PD:
        case CGE_CAT_PE:
        case CGE_CAT_PF:
        case CGE_CAT_PI:
        case CGE_CAT_PO:
        case CGE_CAT_PS:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsSpace(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_ZL:
        case CGE_CAT_ZP:
        case CGE_CAT_ZS:
            return 1;
    }
    switch (rune) {
        case '\f':
        case '\n':
        case '\r':
        case '\t':
        case '\v':
            return 1;
    }
    return 0;
 }
 int CgeRuneIsSymbol(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_SC:
        case CGE_CAT_SK:
        case CGE_CAT_SM:
        case CGE_CAT_SO:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsTitle(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_LT:
            return 1;
    }
    return 0;
 }
 int CgeRuneIsUpper(uint32_t rune) {
    switch (CgeRuneCategory(rune)) {
        case CGE_CAT_LU:
            return 1;
    }
    return 0;
 }
--- a/Str.c
+++ b/Str.c
@@ -0,0 +1,307 @@
 #include "CgeStr.h"
 #include <stdint.h>
 #include <string.h>
 #define INVALID_RUNE    0xFFFD
 #define MAX_UNI_STREAM  4
 struct UniStream {
    uint32_t data[MAX_UNI_STREAM];
    size_t head;
    size_t tail;
    size_t size;
 };
 static int uniStreamPut(struct UniStream* stream, uint32_t rune) {
    if (stream->size >= MAX_UNI_STREAM)
        return 0;
    stream->data[stream->tail] = rune;
    stream->tail = (stream->tail + 1) & (MAX_UNI_STREAM - 1);
    stream->size++;
    return 1;
 }
 static int uniStreamGet(struct UniStream* stream, uint32_t* rune) {
    if (!stream->size)
        return 0;
    *rune = stream->data[stream->head];
    stream->head = (stream->head + 1) & (MAX_UNI_STREAM - 1);
    stream->size--;
    return 1;
 }
 void CgeStrIter(CgeStr str, CgeStrIterCb cb, void* user) {
    const char* current = str.data;
    const char* end = str.data + str.size;
    while (current < end) {
        uint32_t rune;
        current += CgeUtf8DecodeLax(current, end - current, &rune);
        cb(rune, user);
    }
 }
 void CgeStrToLower(CgeStr str, CgeStrWriteCb cb, void* user) {
    const char* current = str.data;
    const char* end = str.data + str.size;
    while (current < end) {
        uint32_t rune;
        uint32_t mapped[3];
        int count, i;
        current += CgeUtf8DecodeLax(current, end - current, &rune);
        count = (int)CgeRuneLowerFull(rune, mapped);
        for (i = 0; i < count; i++) {
            char scratch[4];
            cb(scratch, CgeUtf8EncodeLax(mapped[i], scratch), user);
        }
    }
 }
 void CgeStrToUpper(CgeStr str, CgeStrWriteCb cb, void* user) {
    const char* current = str.data;
    const char* end = str.data + str.size;
    while (current < end) {
        uint32_t rune;
        uint32_t mapped[3];
        int count, i;
        current += CgeUtf8DecodeLax(current, end - current, &rune);
        count = (int)CgeRuneUpperFull(rune, mapped);
        for (i = 0; i < count; i++) {
            char scratch[4];
            cb(scratch, CgeUtf8EncodeLax(mapped[i], scratch), user);
        }
    }
 }
 void CgeStrFold(CgeStr str, CgeStrWriteCb cb, void* user) {
    const char* current = str.data;
    const char* end = str.data + str.size;
    while (current < end) {
        uint32_t rune;
        uint32_t mapped[3];
        int count, i;
        current += CgeUtf8DecodeLax(current, end - current, &rune);
        count = (int)CgeRuneFoldFull(rune, mapped);
        for (i = 0; i < count; i++) {
            char scratch[4];
            cb(scratch, CgeUtf8EncodeLax(mapped[i], scratch), user);
        }
    }
 }
 int CgeStrCmp(CgeStr lhs, CgeStr rhs) {
    size_t leastSize;
    int result;
    leastSize = (lhs.size < rhs.size) ? lhs.size : rhs.size;
    result = memcmp(lhs.data, rhs.data, leastSize);
    if (result < 0)
        return -1;
    else if (result > 0)
        return 1;
    if (lhs.size < rhs.size)
        return -1;
    else if (lhs.size > rhs.size)
        return 1;
    return 0;
 }
 int CgeStrICmp(CgeStr lhs, CgeStr rhs) {
    struct UniStream buf1 = {{0}, 0, 0, 0};
    struct UniStream buf2 = {{0}, 0, 0, 0};
    const char* current1 = lhs.data;
    const char* current2 = rhs.data;
    const char* end1 = lhs.data + lhs.size;
    const char* end2 = rhs.data + rhs.size;
    while (1) {
        uint32_t rune1, rune2;
        if (!buf1.size && current1 < end1) {
            uint32_t scratch, folded[3];
            int i, count;
            current1 += CgeUtf8DecodeLax(current1, end1 - current1, &scratch);
            count = (int)CgeRuneFoldFull(scratch, folded);
            for (i = 0; i < count; i++)
                uniStreamPut(&buf1, folded[i]);
        }
        if (!buf2.size && current2 < end2) {
            uint32_t scratch, folded[3];
            int i, count;
            current2 += CgeUtf8DecodeLax(current2, end2 - current2, &scratch);
            count = (int)CgeRuneFoldFull(scratch, folded);
            for (i = 0; i < count; i++)
                uniStreamPut(&buf2, folded[i]);
        }
        if (!buf1.size && !buf2.size)
            return 0;
        if (!uniStreamGet(&buf1, &rune1))
            return -1;
        if (!uniStreamGet(&buf2, &rune2))
            return 1;
        if (rune1 < rune2)
            return -1;
        if (rune1 > rune2)
            return 1;
    }
 }
 size_t CgeStrIndexRune(CgeStr str, uint32_t rune) {
    size_t i = 0;
    while (i < str.size) {
        uint32_t r;
        int count;
        count = CgeUtf8DecodeLax(str.data + i, str.size - i, &r);
        if (r == rune)
            return i;
        i += count;
    }
    return (size_t)-1;
 }
 size_t CgeStrLastIndexRune(CgeStr str, uint32_t rune) {
    size_t i = str.size;
    while (i > 0) {
        size_t current = i;
        uint32_t r;
        while (current > 0 && (str.data[current - 1] & 0xC0) == 0x80)
            current--;
        if (!current)
            current = i - 1;
        CgeUtf8DecodeLax(str.data + current, i - current, &r);
        if (r == rune)
            return current;
        i = current;
    }
    return (size_t)-1;
 }
 size_t CgeStrIndexStr(CgeStr str, CgeStr substr) {
    size_t i;
    if (!substr.size)
        return 0;
    if (str.size < substr.size)
        return (size_t)-1;
    for (i = 0; i <= str.size - substr.size; i++) {
        if (!memcmp(str.data + i, substr.data, substr.size))
            return i;
    }
    return (size_t)-1;
 }
 size_t CgeStrLastIndexStr(CgeStr str, CgeStr substr) {
    size_t i;
    if (!substr.size)
        return str.size;
    if (str.size < substr.size)
        return (size_t)-1;
    for (i = str.size - substr.size; i != (size_t)-1; i--) {
        if (!memcmp(str.data + i, substr.data, substr.size))
            return i;
    }
    return (size_t)-1;
 }
 int CgeStrHasPrefix(CgeStr str, CgeStr prefix) {
    if (prefix.size > str.size)
        return 0;
    return !memcmp(str.data, prefix.data, prefix.size);
 }
 int CgeStrHasSuffix(CgeStr str, CgeStr suffix) {
    if (suffix.size > str.size)
        return 0;
    return !memcmp(str.data + str.size - suffix.size, suffix.data, suffix.size);
 }
 CgeStr CgeStrTrimLeft(CgeStr str) {
    while (str.size) {
        uint32_t rune;
        int count;
        count = CgeUtf8DecodeLax(str.data, str.size, &rune);
        if (!CgeRuneIsSpace(rune))
            break;
        str.data += count;
        str.size -= count;
    }
    return str;
 }
 CgeStr CgeStrTrimRight(CgeStr str) {
     while (str.size) {
        size_t pos = str.size;
        uint32_t rune;
        while (pos > 0 && (str.data[pos - 1] & 0xC0) == 0x80)
            pos--;
        if (pos == 0)
            pos = 1;
        CgeUtf8DecodeLax(str.data + pos - 1, str.size - (pos - 1), &rune);
        if (!CgeRuneIsSpace(rune))
            break;
        str.size = pos - 1;
    }
    return str;
 }
 CgeStr CgeStrTrim(CgeStr str) {
    return CgeStrTrimRight(CgeStrTrimLeft(str));
 }
 CgeStr CgeStrSplit(CgeStr *s, uint32_t delim) {
    size_t pos;
    int count;
    uint32_t r;
    CgeStr result;
    pos = CgeStrIndexRune(*s, delim);
    if (pos == (size_t)-1) {
        result = *s;
        s->size = 0;
        return result;
    }
    count = CgeUtf8DecodeLax(s->data + pos, s->size - pos, &r);
    result.data = s->data;
    result.size = pos;
    s->data += pos + count;
    s->size -= pos + count;
    return result;
 }
--- a/UCD.c
+++ b/UCD.c
--- a/Utf16.c
+++ b/Utf16.c
@@ -0,0 +1,64 @@
 #include "CgeStr.h"
 #define INVALID_RUNE    0xFFFD
 int CgeUtf16Encode(uint32_t rune, uint16_t* data) {
    if (rune <= 0xFFFF) {
        if (rune >= 0xD800 && rune <= 0xDFFF)
            return -1;
        data[0] = (uint16_t)rune;
        return 1;
    }
    if (rune <= 0x10FFFF) {
        uint32_t x = rune - 0x10000;
        data[0] = (uint16_t)(0xD800 + (x >> 10));
        data[1] = (uint16_t)(0xDC00 + (x & 0x3FF));
        return 2;
    }
    return -1;
 }
 int CgeUtf16EncodeLax(uint32_t rune, uint16_t* data) {
    int result;
    result = CgeUtf16Encode(rune, data);
    if (result == -1)
        result = CgeUtf16Encode(INVALID_RUNE, data);
    return result;
 }
 int CgeUtf16Decode(const uint16_t* data, size_t size, uint32_t* rune) {
    uint16_t trail, lead = data[0];
    if (size == 0)
        return -1;
    if (lead < 0xD800 || lead > 0xDFFF) {
        *rune = lead;
        return 1;
    } else if (lead >= 0xD800 && lead <= 0xDBFF) {
        if (size < 2)
            return -1;
        trail = data[1];
        if (trail >= 0xDC00 && trail <= 0xDFFF) {
            *rune = 0x10000 + ((lead & 0x3FF) << 10) + (trail & 0x3FF);
            return 2;
        }
    }
    return -1;
 }
 int CgeUtf16DecodeLax(const uint16_t* data, size_t size, uint32_t* rune) {
    int result;
    result = CgeUtf16Decode(data, size, rune);
    if (result == -1) {
        *rune = INVALID_RUNE;
        result = 1;
    }
    return result;
 }
--- a/Utf8.c
+++ b/Utf8.c
@@ -0,0 +1,101 @@
 #include "CgeStr.h"
 #define INVALID_RUNE    0xFFFD
 int CgeUtf8Encode(uint32_t rune, char* data) {
    if (rune < 0x80) {
        data[0] = (char)rune;
        return 1;
    }
    if (rune < 0x800) {
        data[0] = (char)(0xC0 | (rune >> 6));
        data[1] = (char)(0x80 | (rune & 0x3F));
        return 2;
    }
    if (rune < 0x10000) {
        if (rune >= 0xD800 && rune <= 0xDFFF)
            return -1;
        data[0] = (char)(0xE0 | (rune >> 12));
        data[1] = (char)(0x80 | ((rune >> 6) & 0x3F));
        data[2] = (char)(0x80 | (rune & 0x3F));
        return 3;
    }
    if (rune <= 0x10FFFF) {
        data[0] = (char)(0xF0 | (rune >> 18));
        data[1] = (char)(0x80 | ((rune >> 12) & 0x3F));
        data[2] = (char)(0x80 | ((rune >> 6) & 0x3F));
        data[3] = (char)(0x80 | (rune & 0x3F));
        return 4;
    }
    return -1;
 }
 int CgeUtf8EncodeLax(uint32_t rune, char* data) {
    int result;
    result = CgeUtf8Encode(rune, data);
    if (result == -1)
        result = CgeUtf8Encode(INVALID_RUNE, data);
    return result;
 }
 int CgeUtf8Decode(const char* data, size_t size, uint32_t* rune) {
    unsigned char byte = (unsigned char)data[0];
    int i, n;
    if (size == 0)
        return -1;
    if (byte < 0x80) {
        *rune = byte;
        return 1;
    }
    if ((byte & 0xE0) == 0xC0) {
        n = 2;
        *rune = byte & 0x1F;
    } else if ((byte & 0xF0) == 0xE0) {
        n = 3;
        *rune = byte & 0x0F;
    } else if ((byte & 0xF8) == 0xF0) {
        n = 4;
        *rune = byte & 0x07;
    } else {
        return -1;
    }
    if (size < (size_t)n)
        return -1;
    for (i = 1; i < n; i++) {
        byte = (unsigned char)data[i];
        if ((byte & 0xC0) != 0x80)
            return -1;
        *rune = (*rune << 6) | (byte & 0x3F);
    }
    if ((n == 2 && *rune < 0x80) ||
        (n == 3 && *rune < 0x800) ||
        (n == 4 && *rune < 0x10000)) {
        return -1;
    }
    if (*rune > 0x10FFFF || (*rune >= 0xD800 && *rune <= 0xDFFF)) {
        return -1;
    }
    return n;
 }
 int CgeUtf8DecodeLax(const char* data, size_t size, uint32_t* rune) {
    int result;
    result = CgeUtf8Decode(data, size, rune);
    if (result == -1) {
        *rune = INVALID_RUNE;
        result = 1;
    }
    return result;
 }
--- a/generator/Blocks.h
+++ b/generator/Blocks.h
@@ -0,0 +1,168 @@
 #ifndef BLOCKS_H
 #define BLOCKS_H
 #include <stddef.h>
 #include <stdlib.h>
 #include "ValueList.h"
 struct BlockLevel {
    long* data;
    size_t size;
    size_t capacity;
    struct ValueList* list;
 };
 struct Blocks {
    struct BlockLevel* levels;
    size_t depth;
 };
 static int ilog2(unsigned long value) {
    int result = -1;
    while (value) value >>= 1, result++;
    return result;
 }
 static long blockInsert(struct Blocks* blocks, long value, size_t depth) {
    struct BlockLevel* level = blocks->levels + depth;
    if (blocks->depth - 1 != depth) {
        if ((value = blockInsert(blocks, value, depth + 1)) == -1)
            return -1;
    }
    level->data[level->size++] = value;
    if (level->size >= level->capacity) {
        level->size = 0;
        if (depth == 0 || (value = valueListFind(level->list, level->data, level->capacity)) == -1) {
            value = valueListIntern(&level->list, level->data, level->capacity);
        }
        return value;
    }
    return -1;
 }
 static long blockFindR(struct Blocks* blocks, long value, long offset, size_t depth) {
    struct BlockLevel* level = &blocks->levels[depth];
    struct ValueList* current;
    size_t i, bits = 0;
    for (i = depth + 1; i < blocks->depth; i++) {
        bits += ilog2(blocks->levels[i].capacity);
    }
    current = level->list;
    while (offset--) current = current->next;
    offset = (value >> bits) & ((unsigned long)level->capacity - 1);
    offset = current->data[offset];
    if (depth != blocks->depth - 1)
        return blockFindR(blocks, value, offset, depth + 1);
    return offset;
 }
 static long blockFind(struct Blocks* blocks, long value) {
    size_t i, bits = 0;
    long offset;
    for (i = 0; i < blocks->depth; i++) {
        bits += ilog2(blocks->levels[i].capacity);
    }
    offset = value >> bits;
    return blockFindR(blocks, value, offset, 0);
 }
 static void blockInit(struct Blocks* blocks, size_t depth, ...) {
    va_list args;
    blocks->depth = 0;
    if (!(blocks->levels = malloc(sizeof(struct BlockLevel) * depth)))
        abort();
    va_start(args, depth);
    while (blocks->depth < depth) {
        struct BlockLevel* level = &blocks->levels[blocks->depth];
        level->list = NULL;
        level->size = 0;
        level->capacity = va_arg(args, int);
        if (!(level->data = malloc(level->capacity * sizeof(long))))
            abort();
        blocks->depth++;
    }
    va_end(args);
 }
 static void blockDump(struct Blocks* blocks, size_t depth, FILE* out,
                      const char* name, const char* type) {
    struct BlockLevel* level = &blocks->levels[depth];
    struct ValueList* current;
    size_t i, j, printed = 0;
    fprintf(out, "static const %s %s[] = {\n    ", type, name);
    current = level->list;
    for (current = level->list; current; current = current->next) {
        for (i = 0; i < level->capacity; i++) {
            if (printed++ >= 15) {
                fprintf(out, "\n    ");
                printed = 1;
            }
            fprintf(out, "%ld, ", current->data[i]);
        }
    }
    fprintf(out, "\n};\n\n");
 }
 static void blockAccess(struct Blocks* blocks, size_t depth, FILE* out,
                        const char* var, const char* arg, const char* name) {
    struct BlockLevel* level = &blocks->levels[depth];
    long i, bits = 0, offset, mask;
    for (i = depth + 1; i < blocks->depth; i++) {
        bits += ilog2(blocks->levels[i].capacity);
    }
    offset = ilog2(blocks->levels[depth].capacity);
    mask = level->capacity - 1;
    fprintf(out, "    %s = (long)%s", var, name);
    if (depth) {
        if (offset) {
            fprintf(out, "[(%s<<%ld)", var, offset);
        } else {
            fprintf(out, "[%s", var);
        }
    }
    else
        fprintf(out, "[");
    if (mask || !depth) {
        if (depth)
            fprintf(out, "+");
        if (mask)
            fprintf(out, "(");
        if (bits)
            fprintf(out, "(%s>>%ld)", arg, (long)bits);
        else
            fprintf(out, "%s", arg);
        if (depth)
            fprintf(out, "&%ld", mask);
        if (mask)
            fprintf(out, ")");
    }
    fprintf(out, "];\n");
 }
 #endif /* BLOCKS_H */
--- a/generator/README.md
+++ b/generator/README.md
@@ -0,0 +1,28 @@
 # Generator
 This utility should be used to regenerate tables based on Unicode Character
 Database (UCD).
 Current version of this utility builds tables and functions for the following
 properties:
 - Case mappings for lower, upper, title cases (1:1 and 1:M)
 - Case folding (1:1 and 1:M)
 - General category
 ## Usage
 Compile `Tables.c`
 ```
 gcc Tables.c -o Tables
 ```
 Download and put `UnicodeData.txt`, `CaseFolding.txt`, `SpecialCasing.txt` near
 the compiled `Tables` program, run it and pipe output into `UCD.c`.
 ```
 Tables > ../UCD.c
 ```
 You can download required files from [here](https://www.unicode.org/Public/UCD/latest/ucd)
--- a/generator/Tables.c
+++ b/generator/Tables.c
@@ -0,0 +1,465 @@
 #include <stdarg.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "Blocks.h"
 #include "TextProc.h"
 #define MAX_LINE 512
 struct CaseInfo {
    long rune;
    struct {
        long lower;
        long upper;
        long title;
        long fold;
    } simple;
    struct {
        long lower[4];
        long upper[4];
        long title[4];
        long fold[4];
    } full;
    struct CaseInfo* prev;
    struct CaseInfo* next;
 };
 typedef int (*EntryCb)(long rune, int fill, char** fields, size_t size);
 static void entryProcess(FILE* in, EntryCb cb, const char* globStart,
                           const char* globEnd, size_t globField,
                           size_t codeField, size_t minFields) {
    long code, startCode, prevCode = -1;
    char line[MAX_LINE];
    char* fields[MAX_FIELDS];
    int emitted = 0;
    size_t columns;
    while ((columns = processLine(in, line, sizeof(line), fields))) {
        if (columns < minFields)
            continue;
        code = strtol(fields[codeField], NULL, 16);
        while (prevCode + 1 < code)
            emitted = cb(++prevCode, 1, fields, columns);
        if (globStart && globEnd && columns >= globField &&
            glob(globStart, fields[globField])) {
            startCode = code;
            while ((columns = processLine(in, line, sizeof(line), fields))) {
                if (columns < minFields)
                    continue;
                break;
            }
            if (!glob(globEnd, fields[globField])) {
                fprintf(stderr, "Abnormal input - can find last element\n");
                abort();
            }
            code = strtol(fields[codeField], NULL, 16);
            while (startCode <= code) {
                emitted = cb(startCode, 0, fields, columns);
                startCode++;
            }
        } else {
            emitted = cb(code, 0, fields, columns);
        }
        prevCode = code;
    }
    while (prevCode + 1 < 0x110000)
        emitted = cb(++prevCode, 1, fields, columns);
    while (emitted == -1)
        emitted = cb(code++, 1, fields, columns);
 }
 static struct CaseInfo* caseInfoSort(struct CaseInfo* head) {
    struct CaseInfo* current;
    struct CaseInfo* next;
    int swapped;
    if (!head || !head->next)
        return head;
    do {
        swapped = 0;
        current = head;
        while (current->next) {
            next = current->next;
            if (current->rune > next->rune) {
                swapped = 1;
                if (current->prev)
                    current->prev->next = next;
                if (next->next)
                    next->next->prev = current;
                current->next = next->next;
                next->prev = current->prev;
                current->prev = next;
                next->next = current;
                if (current == head)
                    head = next;
            } else
                current = current->next;
        }
    } while (swapped);
    return head;
 }
 static struct CaseInfo* caseInfoGet(struct CaseInfo** head, long rune) {
    struct CaseInfo* current = *head;
    struct CaseInfo* node;
    while (current != NULL) {
        if (current->rune == rune)
            return current;
        current = current->next;
    }
    if (!(node = malloc(sizeof(*node))))
        return NULL;
    memset(node, 0, sizeof(*node));
    node->rune = rune;
    node->next = *head;
    node->prev = NULL;
    if (*head)
        (*head)->prev = node;
    *head = node;
    return node;
 }
 static size_t categoryClassify(const char* name) {
    static const char *categories[] = {
        "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Mc", "Me", "Nd", "Nl", "No", "Pc",
        "Pd", "Ps", "Pe", "Pi", "Pf", "Po", "Sm", "Sc", "Sk", "So", "Zs", "Zl",
        "Zp", "Cc", "Cf", "Cs", "Co", "Cn", NULL,
    };
    size_t index = 0;
    while (categories[index]) {
        if (!strncmp(categories[index], name, 2))
            return index;
        ++index;
    }
    return categoryClassify("Cn");
 }
 FILE* in;
 FILE* out;
 struct Blocks categoryBlocks;
 struct CaseInfo* caseInfo = NULL;
 static int entryUnicodeData(long rune, int fill, char** fields, size_t size) {
    long lowercase, uppercase, titlecase;
    struct CaseInfo* node;
    if (fill) {
        return blockInsert(&categoryBlocks, categoryClassify("Cn"), 0);
    } else {
        lowercase = strtol(fields[13], NULL, 16);
        uppercase = strtol(fields[12], NULL, 16);
        titlecase = strtol(fields[14], NULL, 16);
        if (lowercase || uppercase || titlecase) {
            node = caseInfoGet(&caseInfo, rune);
            node->simple.lower = lowercase;
            node->simple.upper = uppercase;
            node->simple.title = titlecase;
        }
        return blockInsert(&categoryBlocks, categoryClassify(fields[2]), 0);
    }
 }
 static void arrayParseFromStr(const char* field, long* array) {
    char* endptr = (char*)field;
    size_t written = 0;
    while (1) {
        array[written] = strtol(endptr, &endptr, 16);
        if (!array[written])
            break;
        written++;
    }
 }
 static int entryCaseFolding(long rune, int fill, char** fields, size_t size) {
    struct CaseInfo* node;
    if (fill || !strcmp("T", fields[1]))
        return 1;
    node = caseInfoGet(&caseInfo, rune);
    if (strcmp("F", fields[1])) {
        node->simple.fold = strtol(fields[2], NULL, 16);
    } else {
        arrayParseFromStr(fields[2], node->full.fold);
    }
    return 1;
 }
 static int entrySpecialCasing(long rune, int fill, char** fields, size_t size) {
    struct CaseInfo* node;
    if (fill || strcmp("", fields[4]))
        return 1;
    node = caseInfoGet(&caseInfo, rune);
    arrayParseFromStr(fields[1], node->full.lower);
    arrayParseFromStr(fields[3], node->full.upper);
    arrayParseFromStr(fields[2], node->full.title);
    return 1;
 }
 static void mappingRemoveSingle(long* array) {
    if (array[0] && !array[1])
        array[0] = 0;
 }
 static void caseInfoReduce(void) {
    struct CaseInfo* current = caseInfo;
    while (current) {
        if (!current->simple.title && current->simple.upper)
            current->simple.title = current->simple.upper;
        if (!current->full.title[0] && current->full.upper[0])
            memcpy(current->full.title, current->full.upper, 4 * sizeof(long));
        mappingRemoveSingle(current->full.lower);
        mappingRemoveSingle(current->full.upper);
        mappingRemoveSingle(current->full.title);
        mappingRemoveSingle(current->full.fold);
        current = current->next;
    }
 }
 struct Blocks lowerBlocks, upperBlocks, titleBlocks, foldBlocks;
 struct Blocks lowerFullBlocks, upperFullBlocks, titleFullBlocks, foldFullBlocks;
 long longIndexData[1024][4];
 size_t longIndexSize = 0;
 static long longIndexGet(long* array) {
    size_t i;
    for (i = 0; i < longIndexSize; i++) {
        if (!memcmp(array, longIndexData[i], 4 * sizeof(long)))
            return i;
    }
    memcpy(longIndexData[longIndexSize], array, 4 * sizeof(long));
    return longIndexSize++;
 }
 static void blocksBuild(void) {
    struct CaseInfo* current = caseInfo;
    int emitted;
    long last = -1;
    blockInit(&lowerBlocks, 4, 1, 64, 16, 1);
    blockInit(&upperBlocks, 4, 1, 64, 16, 1);
    blockInit(&titleBlocks, 4, 1, 64, 16, 1);
    blockInit(&foldBlocks, 4, 1, 64, 16, 1);
    blockInit(&lowerFullBlocks, 4, 1, 64, 32, 1);
    blockInit(&upperFullBlocks, 4, 1, 64, 32, 1);
    blockInit(&titleFullBlocks, 4, 1, 64, 32, 1);
    blockInit(&foldFullBlocks, 4, 1, 64, 32, 1);
    while (current) {
        while (last + 1 < current->rune) {
            blockInsert(&lowerBlocks, 0, 0);
            blockInsert(&upperBlocks, 0, 0);
            blockInsert(&titleBlocks, 0, 0);
            blockInsert(&foldBlocks, 0, 0);
            blockInsert(&lowerFullBlocks, -1, 0);
            blockInsert(&upperFullBlocks, -1, 0);
            blockInsert(&titleFullBlocks, -1, 0);
            blockInsert(&foldFullBlocks, -1, 0);
            last++;
        }
        if (current->simple.lower)
            blockInsert(&lowerBlocks, current->simple.lower - current->rune, 0);
        else
            blockInsert(&lowerBlocks, 0, 0);
        if (current->simple.upper)
            blockInsert(&upperBlocks, current->simple.upper - current->rune, 0);
        else
            blockInsert(&upperBlocks, 0, 0);
        if (current->simple.title)
            blockInsert(&titleBlocks, current->simple.title - current->rune, 0);
        else
            blockInsert(&titleBlocks, 0, 0);
        if (current->simple.fold)
            blockInsert(&foldBlocks, current->simple.fold - current->rune, 0);
        else
            blockInsert(&foldBlocks, 0, 0);
        if (current->full.lower[0])
            blockInsert(&lowerFullBlocks, longIndexGet(current->full.lower), 0);
        else
            blockInsert(&lowerFullBlocks, -1, 0);
        if (current->full.upper[0])
            blockInsert(&upperFullBlocks, longIndexGet(current->full.upper), 0);
        else
            blockInsert(&upperFullBlocks, -1, 0);
        if (current->full.title[0])
            blockInsert(&titleFullBlocks, longIndexGet(current->full.title), 0);
        else
            blockInsert(&titleFullBlocks, -1, 0);
        if (current->full.fold[0])
            emitted = blockInsert(&foldFullBlocks, longIndexGet(current->full.fold), 0);
        else
            emitted = blockInsert(&foldFullBlocks, -1, 0);
        last = current->rune;
        current = current->next;
    }
    while (last + 1 < 0x110000 || emitted == -1) {
        blockInsert(&lowerBlocks, 0, 0);
        blockInsert(&upperBlocks, 0, 0);
        blockInsert(&titleBlocks, 0, 0);
        blockInsert(&foldBlocks, 0, 0);
        blockInsert(&lowerFullBlocks, -1, 0);
        blockInsert(&upperFullBlocks, -1, 0);
        blockInsert(&titleFullBlocks, -1, 0);
        emitted = blockInsert(&foldFullBlocks, -1, 0);
        last++;
    }
 }
 static void outputCode(void) {
 #define DUMP(NAME, BLOCK, TYPE1, TYPE2, TYPE3, TYPE4) \
    blockDump(&BLOCK, 0, out, NAME "1", TYPE1); \
    blockDump(&BLOCK, 1, out, NAME "2", TYPE2); \
    blockDump(&BLOCK, 2, out, NAME "3", TYPE3); \
    blockDump(&BLOCK, 3, out, NAME "4", TYPE4)
    fprintf(out, "/* Auto-generated case mapping tables */\n\n");
    fprintf(out, "#include <stdint.h>\n\n");
    DUMP("cat", categoryBlocks, "uint8_t", "uint16_t", "uint16_t", "uint8_t");
    DUMP("low", lowerBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
    DUMP("upp", upperBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
    DUMP("tit", titleBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
    DUMP("fod", foldBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
    DUMP("lfx", lowerFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
    DUMP("ufx", upperFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
    DUMP("tfx", titleFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
    DUMP("ffx", foldFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
    fprintf(out, "static const int32_t case_data[][3] = {");
    {
        size_t i;
        for (i = 0; i < longIndexSize; ++i) {
            if (i % 4 == 0)
                fprintf(out, "\n    ");
            fprintf(out, "{%ld, %ld, %ld}, ",
                    longIndexData[i][0], longIndexData[i][1],
                    longIndexData[i][2]);
        }
    }
    fprintf(out, "\n};\n\n");
 #define EMIT_SIMPLE(FUNC, BLOCKS, BASE) do { \
    fprintf(out, "uint32_t CgeRune" #FUNC "(uint32_t r){\n"); \
    fprintf(out, "    long t;\n    if(r>1114111ul)return r;\n"); \
    blockAccess(&BLOCKS, 0, out, "t", "r", BASE "1"); \
    blockAccess(&BLOCKS, 1, out, "t", "r", BASE "2"); \
    blockAccess(&BLOCKS, 2, out, "t", "r", BASE "3"); \
    blockAccess(&BLOCKS, 3, out, "t", "r", BASE "4"); \
    fprintf(out, "    return t?t+r:r;\n}\n\n"); \
 } while(0)
 #define EMIT_FULL(FUNC, SIMPLE, FULL_BLOCKS, FULL_BASE, SIMPLE_FUNC) do { \
    fprintf(out, "size_t CgeRune" #FUNC "Full(uint32_t r, uint32_t* out){\n"); \
    fprintf(out, "    long t;\n    if(r>1114111ul){\n        *out=r;\n        return 1;\n    }\n"); \
    blockAccess(&FULL_BLOCKS, 0, out, "t", "r", FULL_BASE "1"); \
    blockAccess(&FULL_BLOCKS, 1, out, "t", "r", FULL_BASE "2"); \
    blockAccess(&FULL_BLOCKS, 2, out, "t", "r", FULL_BASE "3"); \
    blockAccess(&FULL_BLOCKS, 3, out, "t", "r", FULL_BASE "4"); \
    fprintf(out, "    if(t>=0){\n"); \
    fprintf(out, "        const int32_t* p=case_data[t];\n"); \
    fprintf(out, "        size_t i=0;\n"); \
    fprintf(out, "        while(p[i] && i<3){out[i]=p[i];i++;}\n"); \
    fprintf(out, "        return i;\n    }\n"); \
    fprintf(out, "    *out=CgeRune" #SIMPLE "(r);\n    return 1;\n}\n\n"); \
 } while(0)
    fprintf(out, "int CgeRuneCategory(uint32_t r){\n");
    fprintf(out, "    long t;\n    if(r>1114111ul)return %d;\n", (int)categoryClassify("Cn"));
    blockAccess(&categoryBlocks, 0, out, "t", "r", "cat1");
    blockAccess(&categoryBlocks, 1, out, "t", "r", "cat2");
    blockAccess(&categoryBlocks, 2, out, "t", "r", "cat3");
    blockAccess(&categoryBlocks, 3, out, "t", "r", "cat4");
    fprintf(out, "    return t;\n}\n\n"); \
    EMIT_SIMPLE(Lower, lowerBlocks, "low");
    EMIT_SIMPLE(Upper, upperBlocks, "upp");
    EMIT_SIMPLE(Title, titleBlocks, "tit");
    EMIT_SIMPLE(Fold, foldBlocks, "fod");
    EMIT_FULL(Lower, Lower, lowerFullBlocks, "lfx", Lower);
    EMIT_FULL(Upper, Upper, upperFullBlocks, "ufx", Upper);
    EMIT_FULL(Title, Title, titleFullBlocks, "tfx", Title);
    EMIT_FULL(Fold, Fold, foldFullBlocks, "ffx", Fold);
 }
 #undef DUMP
 #undef EMIT_SIMPLE
 #undef EMIT_FULL
 int main() {
    if (!(in = fopen("UnicodeData.txt", "r"))) {
        fprintf(stderr, "UnicodeData.txt not found. Download it from:\n");
        fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt\n");
        return -1;
    }
    out = stdout;
    fprintf(stderr, "Processing UnicodeData.txt\n");
    blockInit(&categoryBlocks, 4, 1, 16, 8, 8);
    entryProcess(in, entryUnicodeData, "<*, First>", "<*, Last>", 1, 0, 15);
    fclose(in);
    if (!(in = fopen("CaseFolding.txt", "r"))) {
        fprintf(stderr, "CaseFolding.txt not found. Download it from:\n");
        fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt\n");
        return -1;
    }
    fprintf(stderr, "Processing CaseFolding.txt\n");
    entryProcess(in, entryCaseFolding, NULL, NULL, 0, 0, 3);
    fclose(in);
    fprintf(stderr, "Processing SpecialCasing.txt\n");
    if (!(in = fopen("SpecialCasing.txt", "r"))) {
        fprintf(stderr, "SpecialCasing.txt not found. Download it from:\n");
        fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt\n");
        return -1;
    }
    entryProcess(in, entrySpecialCasing, NULL, NULL, 0, 0, 4);
    fclose(in);
    caseInfo = caseInfoSort(caseInfo);
    caseInfoReduce();
    blocksBuild();
    outputCode();
    return 0;
 }
--- a/generator/TextProc.h
+++ b/generator/TextProc.h
@@ -0,0 +1,76 @@
 #ifndef TEXTPROC_H
 #define TEXTPROC_H
 #include <ctype.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <string.h>
 #define MAX_FIELDS 16
 static int glob(const char* pattern, const char* text) {
    const char* star = NULL;
    const char* restart = text;
    while (*text) {
        if (*pattern == *text || *pattern == '?')
            pattern++, text++;
        else if (*pattern == '*')
            star = ++pattern, restart = text;
        else if (star)
            pattern = star, text = ++restart;
        else
            return 0;
    }
    while (*pattern == '*')
        pattern++;
    return (*pattern == '\0');
 }
 static char* trimLeft(char* line) {
    for (; *line && isspace(*line); ++line);
    return line;
 }
 static void trimRight(char* line) {
    char* last = line;
    for (; *line; ++line)
        if (!isspace(*line))
            last = line + 1;
    *last = '\0';
 }
 static void trimComment(char* line) {
    char* separator = strchr(line, '#');
    if (separator) *separator = '\0';
 }
 static char* trim(char* line) {
    trimRight(line);
    return trimLeft(line);
 }
 static size_t fieldParse(char* line, char** fields, char separator) {
    size_t index = 0;
    do {
        fields[index] = line;
        if ((line = strchr(line, separator)))
            *(line++) = '\0';
        fields[index] = trim(fields[index]);
        index++;
    } while (line && index < MAX_FIELDS);
    return index;
 }
 static int processLine(FILE* in, char* line, size_t size, char** fields) {
    if (!fgets(line, size, in))
        return 0;
    trimComment(line);
    return fieldParse(line, fields, ';');
 }
 #endif /* TEXTPROC_H */
--- a/generator/ValueList.h
+++ b/generator/ValueList.h
@@ -0,0 +1,41 @@
 #ifndef VALUELIST_H
 #define VALUELIST_H
 #include <stddef.h>
 #include <string.h>
 #include <stdlib.h>
 struct ValueList {
    long* data;
    size_t size;
    struct ValueList* next;
 };
 static long valueListFind(struct ValueList* list, long* data, size_t size) {
    long index = 0;
    while (list) {
        if (list->size == size && !memcmp(list->data, data, size * sizeof(long)))
            return index;
        index++, list = list->next;
    }
    return -1;
 }
 static long valueListIntern(struct ValueList** list, long* data, size_t size) {
    long index = 0;
    while (*list) ++index, list = &(*list)->next;
    if (!(*list = malloc(sizeof(struct ValueList))))
        abort();
    if (!((*list)->data = malloc(size * sizeof(long))))
        abort();
    memcpy((*list)->data, data, size * sizeof(long));
    (*list)->size = size;
    (*list)->next = NULL;
    return index;
 }
 #endif /* VALUELIST_H */