Initial commit

This commit is contained in:
2026-06-14 22:51:45 +03:00
commit 78bf3c74b6
18 changed files with 4096 additions and 0 deletions

68
.gitignore vendored Normal file
View File

@@ -0,0 +1,68 @@
# ---> C
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
# ---> CMake
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps
CMakeUserPresets.json

29
CMakeLists.txt Normal file
View File

@@ -0,0 +1,29 @@
cmake_minimum_required(VERSION 3.10)
project(CgeStr LANGUAGES C)
set(CMAKE_C_STANDARD 99)
set(CMAKE_C_STANDARD_REQUIRED ON)
set(SOURCES
Rune.c
Str.c
UCD.c
Utf8.c
Utf16.c
)
set(HEADERS
CgeStr.h
)
add_library(CgeStr STATIC ${SOURCES} ${HEADERS})
target_include_directories(CgeStr PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS CgeStr
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)
install(FILES ${HEADERS} DESTINATION include)

76
CgeStr.h Normal file
View File

@@ -0,0 +1,76 @@
#ifndef CGE_STR_H
#define CGE_STR_H
#include <stdint.h>
typedef struct CgeStr {
const char* data;
size_t size;
} CgeStr;
#define CGE_STR_LIT(s) \
{(s), sizeof(s) - 1}
typedef void (*CgeStrIterCb)(uint32_t rune, void* user);
typedef void (*CgeStrWriteCb)(const char* data, size_t size, void* user);
enum CgeCat {
CGE_CAT_LU, CGE_CAT_LL, CGE_CAT_LT, CGE_CAT_LM, CGE_CAT_LO, CGE_CAT_MN,
CGE_CAT_MC, CGE_CAT_ME, CGE_CAT_ND, CGE_CAT_NL, CGE_CAT_NO, CGE_CAT_PC,
CGE_CAT_PD, CGE_CAT_PS, CGE_CAT_PE, CGE_CAT_PI, CGE_CAT_PF, CGE_CAT_PO,
CGE_CAT_SM, CGE_CAT_SC, CGE_CAT_SK, CGE_CAT_SO, CGE_CAT_ZS, CGE_CAT_ZL,
CGE_CAT_ZP, CGE_CAT_CC, CGE_CAT_CF, CGE_CAT_CS, CGE_CAT_CO, CGE_CAT_CN
};
int CgeRuneCategory(uint32_t rune);
uint32_t CgeRuneLower(uint32_t rune);
uint32_t CgeRuneUpper(uint32_t rune);
uint32_t CgeRuneTitle(uint32_t rune);
uint32_t CgeRuneFold(uint32_t rune);
size_t CgeRuneLowerFull(uint32_t rune, uint32_t* out);
size_t CgeRuneUpperFull(uint32_t rune, uint32_t* out);
size_t CgeRuneTitleFull(uint32_t rune, uint32_t* out);
size_t CgeRuneFoldFull(uint32_t rune, uint32_t* out);
int CgeRuneIsControl(uint32_t rune);
int CgeRuneIsDigit(uint32_t rune);
int CgeRuneIsGraphic(uint32_t rune);
int CgeRuneIsLetter(uint32_t rune);
int CgeRuneIsLower(uint32_t rune);
int CgeRuneIsMark(uint32_t rune);
int CgeRuneIsNumber(uint32_t rune);
int CgeRuneIsPrint(uint32_t rune);
int CgeRuneIsPunct(uint32_t rune);
int CgeRuneIsSpace(uint32_t rune);
int CgeRuneIsSymbol(uint32_t rune);
int CgeRuneIsTitle(uint32_t rune);
int CgeRuneIsUpper(uint32_t rune);
int CgeUtf8Encode(uint32_t rune, char* data);
int CgeUtf8EncodeLax(uint32_t rune, char* data);
int CgeUtf8Decode(const char* data, size_t size, uint32_t* rune);
int CgeUtf8DecodeLax(const char* data, size_t size, uint32_t* rune);
int CgeUtf16Encode(uint32_t rune, uint16_t* data);
int CgeUtf16EncodeLax(uint32_t rune, uint16_t* data);
int CgeUtf16Decode(const uint16_t* data, size_t size, uint32_t* rune);
int CgeUtf16DecodeLax(const uint16_t* data, size_t size, uint32_t* rune);
void CgeStrIter(CgeStr str, CgeStrIterCb cb, void* user);
void CgeStrToLower(CgeStr str, CgeStrWriteCb cb, void* user);
void CgeStrToUpper(CgeStr str, CgeStrWriteCb cb, void* user);
void CgeStrFold(CgeStr str, CgeStrWriteCb cb, void* user);
int CgeStrCmp(CgeStr lhs, CgeStr rhs);
int CgeStrICmp(CgeStr lhs, CgeStr rhs);
size_t CgeStrIndexRune(CgeStr str, uint32_t rune);
size_t CgeStrLastIndexRune(CgeStr str, uint32_t rune);
size_t CgeStrIndexStr(CgeStr str, CgeStr substr);
size_t CgeStrLastIndexStr(CgeStr str, CgeStr substr);
int CgeStrHasPrefix(CgeStr str, CgeStr prefix);
int CgeStrHasSuffix(CgeStr str, CgeStr suffix);
CgeStr CgeStrTrimLeft(CgeStr str);
CgeStr CgeStrTrimRight(CgeStr str);
CgeStr CgeStrTrim(CgeStr str);
CgeStr CgeStrSplit(CgeStr *s, uint32_t delim);
#endif /* CGE_STR_H */

12
LICENSE Normal file
View File

@@ -0,0 +1,12 @@
Copyright (C) 2026 by blankhex me@blankhex.com
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.

23
Makefile.mingw Normal file
View File

@@ -0,0 +1,23 @@
# MinGW Makefile for CgeStr
CC = gcc
AR = ar
CFLAGS = -std=c99 -O2 -Wall -Wextra
ARFLAGS = rcs
TARGET = libCgeStr.a
SOURCES = Rune.c Str.c UCD.c Utf8.c Utf16.c
OBJECTS = $(SOURCES:.c=.o)
.PHONY: all clean
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(AR) $(ARFLAGS) $@ $^
%.o: %.c CgeStr.h
$(CC) $(CFLAGS) -c $< -o $@
clean:
del $(OBJECTS) $(TARGET) 2>nul || exit 0

33
Makefile.posix Normal file
View File

@@ -0,0 +1,33 @@
# POSIX Makefile for CgeStr
CC = gcc
AR = ar
CFLAGS = -std=c99 -O2 -Wall -Wextra -fPIC
ARFLAGS = rcs
TARGET = libCgeStr.a
SOURCES = Rune.c Str.c UCD.c Utf8.c Utf16.c
OBJECTS = $(SOURCES:.c=.o)
.PHONY: all clean install
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(AR) $(ARFLAGS) $@ $^
%.o: %.c CgeStr.h
$(CC) $(CFLAGS) -c $< -o $@
clean:
rm -f $(OBJECTS) $(TARGET)
install: $(TARGET)
cp $(TARGET) /usr/local/lib/
cp CgeStr.h /usr/local/include/
ldconfig || echo "Run ldconfig manually if needed"
uninstall:
rm -f /usr/local/lib/libCgeStr.a
rm -f /usr/local/include/CgeStr.h
ldconfig || true

23
Makefile.win32 Normal file
View File

@@ -0,0 +1,23 @@
# Makefile.win32 for MSVC (NMake)
# Usage: Open "x86 Native Tools Command Prompt", then:
# nmake -f Makefile.win32
CC = cl
LIB = lib
CFLAGS = /c /nologo /W3 /O2
LIBFLAGS = /nologo
TARGET = CgeStr.lib
SOURCES = Rune.c Str.c UCD.c Utf8.c Utf16.c
OBJECTS = $(SOURCES:.c=.obj)
$(TARGET): $(OBJECTS)
$(LIB) $(LIBFLAGS) /OUT:$(TARGET) $(OBJECTS)
{.}.c{}.obj:
$(CC) $(CFLAGS) /Fo$@ $<
clean:
del $(OBJECTS) $(TARGET) 2>nul
.PHONY: clean

56
README.md Normal file
View File

@@ -0,0 +1,56 @@
# CgeStr - Unicode String Library for C
A lightweight, dependency-free C library for UTF-8 string processing with full
Unicode support.
## Features
- UTF-8 encoding and decoding
- UTF-16 encoding and decoding
- Unicode case mapping: lowercase, uppercase, titlecase, case folding
- Full case mapping functions returning multiple runes when needed
- Character classification: isControl, isDigit, isLetter, isSpace, and others
- Unicode category lookup via `CgeRuneCategory`
- Case-sensitive and case-insensitive string comparison
- Substring and rune search
- Prefix and suffix checking
- String trimming (left, right, both)
- String splitting by rune
- Iteration over Unicode code points using callback interface
## Build Systems
- CMake - supports Linux, macOS, Windows (MSVC, MinGW)
- Makefile.posix - for GCC/Clang on POSIX systems
- Makefile.mingw - for MinGW on Windows
- Makefile.win32 - for MSVC with NMake
Builds a static library. No shared library or external dependencies.
## Usage Example
```c
#include "CgeStr.h"
#include <stdio.h>
void print_rune(uint32_t rune, void* user) {
printf("U+%04X ", rune);
}
int main() {
CgeStr str = CGE_STR_LIT("Héllo, 世界!");
CgeStrIter(str, print_rune, NULL);
printf("\n");
return 0;
}
```
## Portability
- Written in C89+stdint.h
- No dynamic memory allocation
- No external dependencies
## License
0BSD - a permissive license with no attribution required.

176
Rune.c Normal file
View File

@@ -0,0 +1,176 @@
#include "CgeStr.h"
int CgeRuneIsControl(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_CC:
return 1;
}
return 0;
}
int CgeRuneIsDigit(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_ND:
return 1;
}
return 0;
}
int CgeRuneIsGraphic(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_LL:
case CGE_CAT_LM:
case CGE_CAT_LO:
case CGE_CAT_LT:
case CGE_CAT_LU:
case CGE_CAT_MC:
case CGE_CAT_ME:
case CGE_CAT_MN:
case CGE_CAT_ND:
case CGE_CAT_NL:
case CGE_CAT_NO:
case CGE_CAT_PC:
case CGE_CAT_PD:
case CGE_CAT_PE:
case CGE_CAT_PF:
case CGE_CAT_PI:
case CGE_CAT_PO:
case CGE_CAT_PS:
case CGE_CAT_SC:
case CGE_CAT_SK:
case CGE_CAT_SM:
case CGE_CAT_SO:
case CGE_CAT_ZS:
return 1;
}
return 0;
}
int CgeRuneIsLetter(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_LL:
case CGE_CAT_LM:
case CGE_CAT_LO:
case CGE_CAT_LT:
case CGE_CAT_LU:
return 1;
}
return 0;
}
int CgeRuneIsLower(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_LL:
return 1;
}
return 0;
}
int CgeRuneIsMark(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_MC:
case CGE_CAT_ME:
case CGE_CAT_MN:
return 1;
}
return 0;
}
int CgeRuneIsNumber(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_ND:
case CGE_CAT_NL:
case CGE_CAT_NO:
return 1;
}
return 0;
}
int CgeRuneIsPrint(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_LL:
case CGE_CAT_LM:
case CGE_CAT_LO:
case CGE_CAT_LT:
case CGE_CAT_LU:
case CGE_CAT_MC:
case CGE_CAT_ME:
case CGE_CAT_MN:
case CGE_CAT_ND:
case CGE_CAT_NL:
case CGE_CAT_NO:
case CGE_CAT_PC:
case CGE_CAT_PD:
case CGE_CAT_PE:
case CGE_CAT_PF:
case CGE_CAT_PI:
case CGE_CAT_PO:
case CGE_CAT_PS:
case CGE_CAT_SC:
case CGE_CAT_SK:
case CGE_CAT_SM:
case CGE_CAT_SO:
return 1;
}
return rune == ' ';
}
int CgeRuneIsPunct(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_PC:
case CGE_CAT_PD:
case CGE_CAT_PE:
case CGE_CAT_PF:
case CGE_CAT_PI:
case CGE_CAT_PO:
case CGE_CAT_PS:
return 1;
}
return 0;
}
int CgeRuneIsSpace(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_ZL:
case CGE_CAT_ZP:
case CGE_CAT_ZS:
return 1;
}
switch (rune) {
case '\f':
case '\n':
case '\r':
case '\t':
case '\v':
return 1;
}
return 0;
}
int CgeRuneIsSymbol(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_SC:
case CGE_CAT_SK:
case CGE_CAT_SM:
case CGE_CAT_SO:
return 1;
}
return 0;
}
int CgeRuneIsTitle(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_LT:
return 1;
}
return 0;
}
int CgeRuneIsUpper(uint32_t rune) {
switch (CgeRuneCategory(rune)) {
case CGE_CAT_LU:
return 1;
}
return 0;
}

307
Str.c Normal file
View File

@@ -0,0 +1,307 @@
#include "CgeStr.h"
#include <stdint.h>
#include <string.h>
#define INVALID_RUNE 0xFFFD
#define MAX_UNI_STREAM 4
struct UniStream {
uint32_t data[MAX_UNI_STREAM];
size_t head;
size_t tail;
size_t size;
};
static int uniStreamPut(struct UniStream* stream, uint32_t rune) {
if (stream->size >= MAX_UNI_STREAM)
return 0;
stream->data[stream->tail] = rune;
stream->tail = (stream->tail + 1) & (MAX_UNI_STREAM - 1);
stream->size++;
return 1;
}
static int uniStreamGet(struct UniStream* stream, uint32_t* rune) {
if (!stream->size)
return 0;
*rune = stream->data[stream->head];
stream->head = (stream->head + 1) & (MAX_UNI_STREAM - 1);
stream->size--;
return 1;
}
void CgeStrIter(CgeStr str, CgeStrIterCb cb, void* user) {
const char* current = str.data;
const char* end = str.data + str.size;
while (current < end) {
uint32_t rune;
current += CgeUtf8DecodeLax(current, end - current, &rune);
cb(rune, user);
}
}
void CgeStrToLower(CgeStr str, CgeStrWriteCb cb, void* user) {
const char* current = str.data;
const char* end = str.data + str.size;
while (current < end) {
uint32_t rune;
uint32_t mapped[3];
int count, i;
current += CgeUtf8DecodeLax(current, end - current, &rune);
count = (int)CgeRuneLowerFull(rune, mapped);
for (i = 0; i < count; i++) {
char scratch[4];
cb(scratch, CgeUtf8EncodeLax(mapped[i], scratch), user);
}
}
}
void CgeStrToUpper(CgeStr str, CgeStrWriteCb cb, void* user) {
const char* current = str.data;
const char* end = str.data + str.size;
while (current < end) {
uint32_t rune;
uint32_t mapped[3];
int count, i;
current += CgeUtf8DecodeLax(current, end - current, &rune);
count = (int)CgeRuneUpperFull(rune, mapped);
for (i = 0; i < count; i++) {
char scratch[4];
cb(scratch, CgeUtf8EncodeLax(mapped[i], scratch), user);
}
}
}
void CgeStrFold(CgeStr str, CgeStrWriteCb cb, void* user) {
const char* current = str.data;
const char* end = str.data + str.size;
while (current < end) {
uint32_t rune;
uint32_t mapped[3];
int count, i;
current += CgeUtf8DecodeLax(current, end - current, &rune);
count = (int)CgeRuneFoldFull(rune, mapped);
for (i = 0; i < count; i++) {
char scratch[4];
cb(scratch, CgeUtf8EncodeLax(mapped[i], scratch), user);
}
}
}
int CgeStrCmp(CgeStr lhs, CgeStr rhs) {
size_t leastSize;
int result;
leastSize = (lhs.size < rhs.size) ? lhs.size : rhs.size;
result = memcmp(lhs.data, rhs.data, leastSize);
if (result < 0)
return -1;
else if (result > 0)
return 1;
if (lhs.size < rhs.size)
return -1;
else if (lhs.size > rhs.size)
return 1;
return 0;
}
int CgeStrICmp(CgeStr lhs, CgeStr rhs) {
struct UniStream buf1 = {{0}, 0, 0, 0};
struct UniStream buf2 = {{0}, 0, 0, 0};
const char* current1 = lhs.data;
const char* current2 = rhs.data;
const char* end1 = lhs.data + lhs.size;
const char* end2 = rhs.data + rhs.size;
while (1) {
uint32_t rune1, rune2;
if (!buf1.size && current1 < end1) {
uint32_t scratch, folded[3];
int i, count;
current1 += CgeUtf8DecodeLax(current1, end1 - current1, &scratch);
count = (int)CgeRuneFoldFull(scratch, folded);
for (i = 0; i < count; i++)
uniStreamPut(&buf1, folded[i]);
}
if (!buf2.size && current2 < end2) {
uint32_t scratch, folded[3];
int i, count;
current2 += CgeUtf8DecodeLax(current2, end2 - current2, &scratch);
count = (int)CgeRuneFoldFull(scratch, folded);
for (i = 0; i < count; i++)
uniStreamPut(&buf2, folded[i]);
}
if (!buf1.size && !buf2.size)
return 0;
if (!uniStreamGet(&buf1, &rune1))
return -1;
if (!uniStreamGet(&buf2, &rune2))
return 1;
if (rune1 < rune2)
return -1;
if (rune1 > rune2)
return 1;
}
}
size_t CgeStrIndexRune(CgeStr str, uint32_t rune) {
size_t i = 0;
while (i < str.size) {
uint32_t r;
int count;
count = CgeUtf8DecodeLax(str.data + i, str.size - i, &r);
if (r == rune)
return i;
i += count;
}
return (size_t)-1;
}
size_t CgeStrLastIndexRune(CgeStr str, uint32_t rune) {
size_t i = str.size;
while (i > 0) {
size_t current = i;
uint32_t r;
while (current > 0 && (str.data[current - 1] & 0xC0) == 0x80)
current--;
if (!current)
current = i - 1;
CgeUtf8DecodeLax(str.data + current, i - current, &r);
if (r == rune)
return current;
i = current;
}
return (size_t)-1;
}
size_t CgeStrIndexStr(CgeStr str, CgeStr substr) {
size_t i;
if (!substr.size)
return 0;
if (str.size < substr.size)
return (size_t)-1;
for (i = 0; i <= str.size - substr.size; i++) {
if (!memcmp(str.data + i, substr.data, substr.size))
return i;
}
return (size_t)-1;
}
size_t CgeStrLastIndexStr(CgeStr str, CgeStr substr) {
size_t i;
if (!substr.size)
return str.size;
if (str.size < substr.size)
return (size_t)-1;
for (i = str.size - substr.size; i != (size_t)-1; i--) {
if (!memcmp(str.data + i, substr.data, substr.size))
return i;
}
return (size_t)-1;
}
int CgeStrHasPrefix(CgeStr str, CgeStr prefix) {
if (prefix.size > str.size)
return 0;
return !memcmp(str.data, prefix.data, prefix.size);
}
int CgeStrHasSuffix(CgeStr str, CgeStr suffix) {
if (suffix.size > str.size)
return 0;
return !memcmp(str.data + str.size - suffix.size, suffix.data, suffix.size);
}
CgeStr CgeStrTrimLeft(CgeStr str) {
while (str.size) {
uint32_t rune;
int count;
count = CgeUtf8DecodeLax(str.data, str.size, &rune);
if (!CgeRuneIsSpace(rune))
break;
str.data += count;
str.size -= count;
}
return str;
}
CgeStr CgeStrTrimRight(CgeStr str) {
while (str.size) {
size_t pos = str.size;
uint32_t rune;
while (pos > 0 && (str.data[pos - 1] & 0xC0) == 0x80)
pos--;
if (pos == 0)
pos = 1;
CgeUtf8DecodeLax(str.data + pos - 1, str.size - (pos - 1), &rune);
if (!CgeRuneIsSpace(rune))
break;
str.size = pos - 1;
}
return str;
}
CgeStr CgeStrTrim(CgeStr str) {
return CgeStrTrimRight(CgeStrTrimLeft(str));
}
CgeStr CgeStrSplit(CgeStr *s, uint32_t delim) {
size_t pos;
int count;
uint32_t r;
CgeStr result;
pos = CgeStrIndexRune(*s, delim);
if (pos == (size_t)-1) {
result = *s;
s->size = 0;
return result;
}
count = CgeUtf8DecodeLax(s->data + pos, s->size - pos, &r);
result.data = s->data;
result.size = pos;
s->data += pos + count;
s->size -= pos + count;
return result;
}

2350
UCD.c Normal file

File diff suppressed because it is too large Load Diff

64
Utf16.c Normal file
View File

@@ -0,0 +1,64 @@
#include "CgeStr.h"
#define INVALID_RUNE 0xFFFD
int CgeUtf16Encode(uint32_t rune, uint16_t* data) {
if (rune <= 0xFFFF) {
if (rune >= 0xD800 && rune <= 0xDFFF)
return -1;
data[0] = (uint16_t)rune;
return 1;
}
if (rune <= 0x10FFFF) {
uint32_t x = rune - 0x10000;
data[0] = (uint16_t)(0xD800 + (x >> 10));
data[1] = (uint16_t)(0xDC00 + (x & 0x3FF));
return 2;
}
return -1;
}
int CgeUtf16EncodeLax(uint32_t rune, uint16_t* data) {
int result;
result = CgeUtf16Encode(rune, data);
if (result == -1)
result = CgeUtf16Encode(INVALID_RUNE, data);
return result;
}
int CgeUtf16Decode(const uint16_t* data, size_t size, uint32_t* rune) {
uint16_t trail, lead = data[0];
if (size == 0)
return -1;
if (lead < 0xD800 || lead > 0xDFFF) {
*rune = lead;
return 1;
} else if (lead >= 0xD800 && lead <= 0xDBFF) {
if (size < 2)
return -1;
trail = data[1];
if (trail >= 0xDC00 && trail <= 0xDFFF) {
*rune = 0x10000 + ((lead & 0x3FF) << 10) + (trail & 0x3FF);
return 2;
}
}
return -1;
}
int CgeUtf16DecodeLax(const uint16_t* data, size_t size, uint32_t* rune) {
int result;
result = CgeUtf16Decode(data, size, rune);
if (result == -1) {
*rune = INVALID_RUNE;
result = 1;
}
return result;
}

101
Utf8.c Normal file
View File

@@ -0,0 +1,101 @@
#include "CgeStr.h"
#define INVALID_RUNE 0xFFFD
int CgeUtf8Encode(uint32_t rune, char* data) {
if (rune < 0x80) {
data[0] = (char)rune;
return 1;
}
if (rune < 0x800) {
data[0] = (char)(0xC0 | (rune >> 6));
data[1] = (char)(0x80 | (rune & 0x3F));
return 2;
}
if (rune < 0x10000) {
if (rune >= 0xD800 && rune <= 0xDFFF)
return -1;
data[0] = (char)(0xE0 | (rune >> 12));
data[1] = (char)(0x80 | ((rune >> 6) & 0x3F));
data[2] = (char)(0x80 | (rune & 0x3F));
return 3;
}
if (rune <= 0x10FFFF) {
data[0] = (char)(0xF0 | (rune >> 18));
data[1] = (char)(0x80 | ((rune >> 12) & 0x3F));
data[2] = (char)(0x80 | ((rune >> 6) & 0x3F));
data[3] = (char)(0x80 | (rune & 0x3F));
return 4;
}
return -1;
}
int CgeUtf8EncodeLax(uint32_t rune, char* data) {
int result;
result = CgeUtf8Encode(rune, data);
if (result == -1)
result = CgeUtf8Encode(INVALID_RUNE, data);
return result;
}
int CgeUtf8Decode(const char* data, size_t size, uint32_t* rune) {
unsigned char byte = (unsigned char)data[0];
int i, n;
if (size == 0)
return -1;
if (byte < 0x80) {
*rune = byte;
return 1;
}
if ((byte & 0xE0) == 0xC0) {
n = 2;
*rune = byte & 0x1F;
} else if ((byte & 0xF0) == 0xE0) {
n = 3;
*rune = byte & 0x0F;
} else if ((byte & 0xF8) == 0xF0) {
n = 4;
*rune = byte & 0x07;
} else {
return -1;
}
if (size < (size_t)n)
return -1;
for (i = 1; i < n; i++) {
byte = (unsigned char)data[i];
if ((byte & 0xC0) != 0x80)
return -1;
*rune = (*rune << 6) | (byte & 0x3F);
}
if ((n == 2 && *rune < 0x80) ||
(n == 3 && *rune < 0x800) ||
(n == 4 && *rune < 0x10000)) {
return -1;
}
if (*rune > 0x10FFFF || (*rune >= 0xD800 && *rune <= 0xDFFF)) {
return -1;
}
return n;
}
int CgeUtf8DecodeLax(const char* data, size_t size, uint32_t* rune) {
int result;
result = CgeUtf8Decode(data, size, rune);
if (result == -1) {
*rune = INVALID_RUNE;
result = 1;
}
return result;
}

168
generator/Blocks.h Normal file
View File

@@ -0,0 +1,168 @@
#ifndef BLOCKS_H
#define BLOCKS_H
#include <stddef.h>
#include <stdlib.h>
#include "ValueList.h"
struct BlockLevel {
long* data;
size_t size;
size_t capacity;
struct ValueList* list;
};
struct Blocks {
struct BlockLevel* levels;
size_t depth;
};
static int ilog2(unsigned long value) {
int result = -1;
while (value) value >>= 1, result++;
return result;
}
static long blockInsert(struct Blocks* blocks, long value, size_t depth) {
struct BlockLevel* level = blocks->levels + depth;
if (blocks->depth - 1 != depth) {
if ((value = blockInsert(blocks, value, depth + 1)) == -1)
return -1;
}
level->data[level->size++] = value;
if (level->size >= level->capacity) {
level->size = 0;
if (depth == 0 || (value = valueListFind(level->list, level->data, level->capacity)) == -1) {
value = valueListIntern(&level->list, level->data, level->capacity);
}
return value;
}
return -1;
}
static long blockFindR(struct Blocks* blocks, long value, long offset, size_t depth) {
struct BlockLevel* level = &blocks->levels[depth];
struct ValueList* current;
size_t i, bits = 0;
for (i = depth + 1; i < blocks->depth; i++) {
bits += ilog2(blocks->levels[i].capacity);
}
current = level->list;
while (offset--) current = current->next;
offset = (value >> bits) & ((unsigned long)level->capacity - 1);
offset = current->data[offset];
if (depth != blocks->depth - 1)
return blockFindR(blocks, value, offset, depth + 1);
return offset;
}
static long blockFind(struct Blocks* blocks, long value) {
size_t i, bits = 0;
long offset;
for (i = 0; i < blocks->depth; i++) {
bits += ilog2(blocks->levels[i].capacity);
}
offset = value >> bits;
return blockFindR(blocks, value, offset, 0);
}
static void blockInit(struct Blocks* blocks, size_t depth, ...) {
va_list args;
blocks->depth = 0;
if (!(blocks->levels = malloc(sizeof(struct BlockLevel) * depth)))
abort();
va_start(args, depth);
while (blocks->depth < depth) {
struct BlockLevel* level = &blocks->levels[blocks->depth];
level->list = NULL;
level->size = 0;
level->capacity = va_arg(args, int);
if (!(level->data = malloc(level->capacity * sizeof(long))))
abort();
blocks->depth++;
}
va_end(args);
}
static void blockDump(struct Blocks* blocks, size_t depth, FILE* out,
const char* name, const char* type) {
struct BlockLevel* level = &blocks->levels[depth];
struct ValueList* current;
size_t i, j, printed = 0;
fprintf(out, "static const %s %s[] = {\n ", type, name);
current = level->list;
for (current = level->list; current; current = current->next) {
for (i = 0; i < level->capacity; i++) {
if (printed++ >= 15) {
fprintf(out, "\n ");
printed = 1;
}
fprintf(out, "%ld, ", current->data[i]);
}
}
fprintf(out, "\n};\n\n");
}
static void blockAccess(struct Blocks* blocks, size_t depth, FILE* out,
const char* var, const char* arg, const char* name) {
struct BlockLevel* level = &blocks->levels[depth];
long i, bits = 0, offset, mask;
for (i = depth + 1; i < blocks->depth; i++) {
bits += ilog2(blocks->levels[i].capacity);
}
offset = ilog2(blocks->levels[depth].capacity);
mask = level->capacity - 1;
fprintf(out, " %s = (long)%s", var, name);
if (depth) {
if (offset) {
fprintf(out, "[(%s<<%ld)", var, offset);
} else {
fprintf(out, "[%s", var);
}
}
else
fprintf(out, "[");
if (mask || !depth) {
if (depth)
fprintf(out, "+");
if (mask)
fprintf(out, "(");
if (bits)
fprintf(out, "(%s>>%ld)", arg, (long)bits);
else
fprintf(out, "%s", arg);
if (depth)
fprintf(out, "&%ld", mask);
if (mask)
fprintf(out, ")");
}
fprintf(out, "];\n");
}
#endif /* BLOCKS_H */

28
generator/README.md Normal file
View File

@@ -0,0 +1,28 @@
# Generator
This utility should be used to regenerate tables based on Unicode Character
Database (UCD).
Current version of this utility builds tables and functions for the following
properties:
- Case mappings for lower, upper, title cases (1:1 and 1:M)
- Case folding (1:1 and 1:M)
- General category
## Usage
Compile `Tables.c`
```
gcc Tables.c -o Tables
```
Download and put `UnicodeData.txt`, `CaseFolding.txt`, `SpecialCasing.txt` near
the compiled `Tables` program, run it and pipe output into `UCD.c`.
```
Tables > ../UCD.c
```
You can download required files from [here](https://www.unicode.org/Public/UCD/latest/ucd)

465
generator/Tables.c Normal file
View File

@@ -0,0 +1,465 @@
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "Blocks.h"
#include "TextProc.h"
#define MAX_LINE 512
struct CaseInfo {
long rune;
struct {
long lower;
long upper;
long title;
long fold;
} simple;
struct {
long lower[4];
long upper[4];
long title[4];
long fold[4];
} full;
struct CaseInfo* prev;
struct CaseInfo* next;
};
typedef int (*EntryCb)(long rune, int fill, char** fields, size_t size);
static void entryProcess(FILE* in, EntryCb cb, const char* globStart,
const char* globEnd, size_t globField,
size_t codeField, size_t minFields) {
long code, startCode, prevCode = -1;
char line[MAX_LINE];
char* fields[MAX_FIELDS];
int emitted = 0;
size_t columns;
while ((columns = processLine(in, line, sizeof(line), fields))) {
if (columns < minFields)
continue;
code = strtol(fields[codeField], NULL, 16);
while (prevCode + 1 < code)
emitted = cb(++prevCode, 1, fields, columns);
if (globStart && globEnd && columns >= globField &&
glob(globStart, fields[globField])) {
startCode = code;
while ((columns = processLine(in, line, sizeof(line), fields))) {
if (columns < minFields)
continue;
break;
}
if (!glob(globEnd, fields[globField])) {
fprintf(stderr, "Abnormal input - can find last element\n");
abort();
}
code = strtol(fields[codeField], NULL, 16);
while (startCode <= code) {
emitted = cb(startCode, 0, fields, columns);
startCode++;
}
} else {
emitted = cb(code, 0, fields, columns);
}
prevCode = code;
}
while (prevCode + 1 < 0x110000)
emitted = cb(++prevCode, 1, fields, columns);
while (emitted == -1)
emitted = cb(code++, 1, fields, columns);
}
static struct CaseInfo* caseInfoSort(struct CaseInfo* head) {
struct CaseInfo* current;
struct CaseInfo* next;
int swapped;
if (!head || !head->next)
return head;
do {
swapped = 0;
current = head;
while (current->next) {
next = current->next;
if (current->rune > next->rune) {
swapped = 1;
if (current->prev)
current->prev->next = next;
if (next->next)
next->next->prev = current;
current->next = next->next;
next->prev = current->prev;
current->prev = next;
next->next = current;
if (current == head)
head = next;
} else
current = current->next;
}
} while (swapped);
return head;
}
static struct CaseInfo* caseInfoGet(struct CaseInfo** head, long rune) {
struct CaseInfo* current = *head;
struct CaseInfo* node;
while (current != NULL) {
if (current->rune == rune)
return current;
current = current->next;
}
if (!(node = malloc(sizeof(*node))))
return NULL;
memset(node, 0, sizeof(*node));
node->rune = rune;
node->next = *head;
node->prev = NULL;
if (*head)
(*head)->prev = node;
*head = node;
return node;
}
static size_t categoryClassify(const char* name) {
static const char *categories[] = {
"Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Mc", "Me", "Nd", "Nl", "No", "Pc",
"Pd", "Ps", "Pe", "Pi", "Pf", "Po", "Sm", "Sc", "Sk", "So", "Zs", "Zl",
"Zp", "Cc", "Cf", "Cs", "Co", "Cn", NULL,
};
size_t index = 0;
while (categories[index]) {
if (!strncmp(categories[index], name, 2))
return index;
++index;
}
return categoryClassify("Cn");
}
FILE* in;
FILE* out;
struct Blocks categoryBlocks;
struct CaseInfo* caseInfo = NULL;
static int entryUnicodeData(long rune, int fill, char** fields, size_t size) {
long lowercase, uppercase, titlecase;
struct CaseInfo* node;
if (fill) {
return blockInsert(&categoryBlocks, categoryClassify("Cn"), 0);
} else {
lowercase = strtol(fields[13], NULL, 16);
uppercase = strtol(fields[12], NULL, 16);
titlecase = strtol(fields[14], NULL, 16);
if (lowercase || uppercase || titlecase) {
node = caseInfoGet(&caseInfo, rune);
node->simple.lower = lowercase;
node->simple.upper = uppercase;
node->simple.title = titlecase;
}
return blockInsert(&categoryBlocks, categoryClassify(fields[2]), 0);
}
}
static void arrayParseFromStr(const char* field, long* array) {
char* endptr = (char*)field;
size_t written = 0;
while (1) {
array[written] = strtol(endptr, &endptr, 16);
if (!array[written])
break;
written++;
}
}
static int entryCaseFolding(long rune, int fill, char** fields, size_t size) {
struct CaseInfo* node;
if (fill || !strcmp("T", fields[1]))
return 1;
node = caseInfoGet(&caseInfo, rune);
if (strcmp("F", fields[1])) {
node->simple.fold = strtol(fields[2], NULL, 16);
} else {
arrayParseFromStr(fields[2], node->full.fold);
}
return 1;
}
static int entrySpecialCasing(long rune, int fill, char** fields, size_t size) {
struct CaseInfo* node;
if (fill || strcmp("", fields[4]))
return 1;
node = caseInfoGet(&caseInfo, rune);
arrayParseFromStr(fields[1], node->full.lower);
arrayParseFromStr(fields[3], node->full.upper);
arrayParseFromStr(fields[2], node->full.title);
return 1;
}
static void mappingRemoveSingle(long* array) {
if (array[0] && !array[1])
array[0] = 0;
}
static void caseInfoReduce(void) {
struct CaseInfo* current = caseInfo;
while (current) {
if (!current->simple.title && current->simple.upper)
current->simple.title = current->simple.upper;
if (!current->full.title[0] && current->full.upper[0])
memcpy(current->full.title, current->full.upper, 4 * sizeof(long));
mappingRemoveSingle(current->full.lower);
mappingRemoveSingle(current->full.upper);
mappingRemoveSingle(current->full.title);
mappingRemoveSingle(current->full.fold);
current = current->next;
}
}
struct Blocks lowerBlocks, upperBlocks, titleBlocks, foldBlocks;
struct Blocks lowerFullBlocks, upperFullBlocks, titleFullBlocks, foldFullBlocks;
long longIndexData[1024][4];
size_t longIndexSize = 0;
static long longIndexGet(long* array) {
size_t i;
for (i = 0; i < longIndexSize; i++) {
if (!memcmp(array, longIndexData[i], 4 * sizeof(long)))
return i;
}
memcpy(longIndexData[longIndexSize], array, 4 * sizeof(long));
return longIndexSize++;
}
static void blocksBuild(void) {
struct CaseInfo* current = caseInfo;
int emitted;
long last = -1;
blockInit(&lowerBlocks, 4, 1, 64, 16, 1);
blockInit(&upperBlocks, 4, 1, 64, 16, 1);
blockInit(&titleBlocks, 4, 1, 64, 16, 1);
blockInit(&foldBlocks, 4, 1, 64, 16, 1);
blockInit(&lowerFullBlocks, 4, 1, 64, 32, 1);
blockInit(&upperFullBlocks, 4, 1, 64, 32, 1);
blockInit(&titleFullBlocks, 4, 1, 64, 32, 1);
blockInit(&foldFullBlocks, 4, 1, 64, 32, 1);
while (current) {
while (last + 1 < current->rune) {
blockInsert(&lowerBlocks, 0, 0);
blockInsert(&upperBlocks, 0, 0);
blockInsert(&titleBlocks, 0, 0);
blockInsert(&foldBlocks, 0, 0);
blockInsert(&lowerFullBlocks, -1, 0);
blockInsert(&upperFullBlocks, -1, 0);
blockInsert(&titleFullBlocks, -1, 0);
blockInsert(&foldFullBlocks, -1, 0);
last++;
}
if (current->simple.lower)
blockInsert(&lowerBlocks, current->simple.lower - current->rune, 0);
else
blockInsert(&lowerBlocks, 0, 0);
if (current->simple.upper)
blockInsert(&upperBlocks, current->simple.upper - current->rune, 0);
else
blockInsert(&upperBlocks, 0, 0);
if (current->simple.title)
blockInsert(&titleBlocks, current->simple.title - current->rune, 0);
else
blockInsert(&titleBlocks, 0, 0);
if (current->simple.fold)
blockInsert(&foldBlocks, current->simple.fold - current->rune, 0);
else
blockInsert(&foldBlocks, 0, 0);
if (current->full.lower[0])
blockInsert(&lowerFullBlocks, longIndexGet(current->full.lower), 0);
else
blockInsert(&lowerFullBlocks, -1, 0);
if (current->full.upper[0])
blockInsert(&upperFullBlocks, longIndexGet(current->full.upper), 0);
else
blockInsert(&upperFullBlocks, -1, 0);
if (current->full.title[0])
blockInsert(&titleFullBlocks, longIndexGet(current->full.title), 0);
else
blockInsert(&titleFullBlocks, -1, 0);
if (current->full.fold[0])
emitted = blockInsert(&foldFullBlocks, longIndexGet(current->full.fold), 0);
else
emitted = blockInsert(&foldFullBlocks, -1, 0);
last = current->rune;
current = current->next;
}
while (last + 1 < 0x110000 || emitted == -1) {
blockInsert(&lowerBlocks, 0, 0);
blockInsert(&upperBlocks, 0, 0);
blockInsert(&titleBlocks, 0, 0);
blockInsert(&foldBlocks, 0, 0);
blockInsert(&lowerFullBlocks, -1, 0);
blockInsert(&upperFullBlocks, -1, 0);
blockInsert(&titleFullBlocks, -1, 0);
emitted = blockInsert(&foldFullBlocks, -1, 0);
last++;
}
}
static void outputCode(void) {
#define DUMP(NAME, BLOCK, TYPE1, TYPE2, TYPE3, TYPE4) \
blockDump(&BLOCK, 0, out, NAME "1", TYPE1); \
blockDump(&BLOCK, 1, out, NAME "2", TYPE2); \
blockDump(&BLOCK, 2, out, NAME "3", TYPE3); \
blockDump(&BLOCK, 3, out, NAME "4", TYPE4)
fprintf(out, "/* Auto-generated case mapping tables */\n\n");
fprintf(out, "#include <stdint.h>\n\n");
DUMP("cat", categoryBlocks, "uint8_t", "uint16_t", "uint16_t", "uint8_t");
DUMP("low", lowerBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
DUMP("upp", upperBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
DUMP("tit", titleBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
DUMP("fod", foldBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
DUMP("lfx", lowerFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
DUMP("ufx", upperFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
DUMP("tfx", titleFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
DUMP("ffx", foldFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
fprintf(out, "static const int32_t case_data[][3] = {");
{
size_t i;
for (i = 0; i < longIndexSize; ++i) {
if (i % 4 == 0)
fprintf(out, "\n ");
fprintf(out, "{%ld, %ld, %ld}, ",
longIndexData[i][0], longIndexData[i][1],
longIndexData[i][2]);
}
}
fprintf(out, "\n};\n\n");
#define EMIT_SIMPLE(FUNC, BLOCKS, BASE) do { \
fprintf(out, "uint32_t CgeRune" #FUNC "(uint32_t r){\n"); \
fprintf(out, " long t;\n if(r>1114111ul)return r;\n"); \
blockAccess(&BLOCKS, 0, out, "t", "r", BASE "1"); \
blockAccess(&BLOCKS, 1, out, "t", "r", BASE "2"); \
blockAccess(&BLOCKS, 2, out, "t", "r", BASE "3"); \
blockAccess(&BLOCKS, 3, out, "t", "r", BASE "4"); \
fprintf(out, " return t?t+r:r;\n}\n\n"); \
} while(0)
#define EMIT_FULL(FUNC, SIMPLE, FULL_BLOCKS, FULL_BASE, SIMPLE_FUNC) do { \
fprintf(out, "size_t CgeRune" #FUNC "Full(uint32_t r, uint32_t* out){\n"); \
fprintf(out, " long t;\n if(r>1114111ul){\n *out=r;\n return 1;\n }\n"); \
blockAccess(&FULL_BLOCKS, 0, out, "t", "r", FULL_BASE "1"); \
blockAccess(&FULL_BLOCKS, 1, out, "t", "r", FULL_BASE "2"); \
blockAccess(&FULL_BLOCKS, 2, out, "t", "r", FULL_BASE "3"); \
blockAccess(&FULL_BLOCKS, 3, out, "t", "r", FULL_BASE "4"); \
fprintf(out, " if(t>=0){\n"); \
fprintf(out, " const int32_t* p=case_data[t];\n"); \
fprintf(out, " size_t i=0;\n"); \
fprintf(out, " while(p[i] && i<3){out[i]=p[i];i++;}\n"); \
fprintf(out, " return i;\n }\n"); \
fprintf(out, " *out=CgeRune" #SIMPLE "(r);\n return 1;\n}\n\n"); \
} while(0)
fprintf(out, "int CgeRuneCategory(uint32_t r){\n");
fprintf(out, " long t;\n if(r>1114111ul)return %d;\n", (int)categoryClassify("Cn"));
blockAccess(&categoryBlocks, 0, out, "t", "r", "cat1");
blockAccess(&categoryBlocks, 1, out, "t", "r", "cat2");
blockAccess(&categoryBlocks, 2, out, "t", "r", "cat3");
blockAccess(&categoryBlocks, 3, out, "t", "r", "cat4");
fprintf(out, " return t;\n}\n\n"); \
EMIT_SIMPLE(Lower, lowerBlocks, "low");
EMIT_SIMPLE(Upper, upperBlocks, "upp");
EMIT_SIMPLE(Title, titleBlocks, "tit");
EMIT_SIMPLE(Fold, foldBlocks, "fod");
EMIT_FULL(Lower, Lower, lowerFullBlocks, "lfx", Lower);
EMIT_FULL(Upper, Upper, upperFullBlocks, "ufx", Upper);
EMIT_FULL(Title, Title, titleFullBlocks, "tfx", Title);
EMIT_FULL(Fold, Fold, foldFullBlocks, "ffx", Fold);
}
#undef DUMP
#undef EMIT_SIMPLE
#undef EMIT_FULL
int main() {
if (!(in = fopen("UnicodeData.txt", "r"))) {
fprintf(stderr, "UnicodeData.txt not found. Download it from:\n");
fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt\n");
return -1;
}
out = stdout;
fprintf(stderr, "Processing UnicodeData.txt\n");
blockInit(&categoryBlocks, 4, 1, 16, 8, 8);
entryProcess(in, entryUnicodeData, "<*, First>", "<*, Last>", 1, 0, 15);
fclose(in);
if (!(in = fopen("CaseFolding.txt", "r"))) {
fprintf(stderr, "CaseFolding.txt not found. Download it from:\n");
fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt\n");
return -1;
}
fprintf(stderr, "Processing CaseFolding.txt\n");
entryProcess(in, entryCaseFolding, NULL, NULL, 0, 0, 3);
fclose(in);
fprintf(stderr, "Processing SpecialCasing.txt\n");
if (!(in = fopen("SpecialCasing.txt", "r"))) {
fprintf(stderr, "SpecialCasing.txt not found. Download it from:\n");
fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt\n");
return -1;
}
entryProcess(in, entrySpecialCasing, NULL, NULL, 0, 0, 4);
fclose(in);
caseInfo = caseInfoSort(caseInfo);
caseInfoReduce();
blocksBuild();
outputCode();
return 0;
}

76
generator/TextProc.h Normal file
View File

@@ -0,0 +1,76 @@
#ifndef TEXTPROC_H
#define TEXTPROC_H
#include <ctype.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#define MAX_FIELDS 16
static int glob(const char* pattern, const char* text) {
const char* star = NULL;
const char* restart = text;
while (*text) {
if (*pattern == *text || *pattern == '?')
pattern++, text++;
else if (*pattern == '*')
star = ++pattern, restart = text;
else if (star)
pattern = star, text = ++restart;
else
return 0;
}
while (*pattern == '*')
pattern++;
return (*pattern == '\0');
}
static char* trimLeft(char* line) {
for (; *line && isspace(*line); ++line);
return line;
}
static void trimRight(char* line) {
char* last = line;
for (; *line; ++line)
if (!isspace(*line))
last = line + 1;
*last = '\0';
}
static void trimComment(char* line) {
char* separator = strchr(line, '#');
if (separator) *separator = '\0';
}
static char* trim(char* line) {
trimRight(line);
return trimLeft(line);
}
static size_t fieldParse(char* line, char** fields, char separator) {
size_t index = 0;
do {
fields[index] = line;
if ((line = strchr(line, separator)))
*(line++) = '\0';
fields[index] = trim(fields[index]);
index++;
} while (line && index < MAX_FIELDS);
return index;
}
static int processLine(FILE* in, char* line, size_t size, char** fields) {
if (!fgets(line, size, in))
return 0;
trimComment(line);
return fieldParse(line, fields, ';');
}
#endif /* TEXTPROC_H */

41
generator/ValueList.h Normal file
View File

@@ -0,0 +1,41 @@
#ifndef VALUELIST_H
#define VALUELIST_H
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
struct ValueList {
long* data;
size_t size;
struct ValueList* next;
};
static long valueListFind(struct ValueList* list, long* data, size_t size) {
long index = 0;
while (list) {
if (list->size == size && !memcmp(list->data, data, size * sizeof(long)))
return index;
index++, list = list->next;
}
return -1;
}
static long valueListIntern(struct ValueList** list, long* data, size_t size) {
long index = 0;
while (*list) ++index, list = &(*list)->next;
if (!(*list = malloc(sizeof(struct ValueList))))
abort();
if (!((*list)->data = malloc(size * sizeof(long))))
abort();
memcpy((*list)->data, data, size * sizeof(long));
(*list)->size = size;
(*list)->next = NULL;
return index;
}
#endif /* VALUELIST_H */