Initial commit
This commit is contained in:
168
generator/Blocks.h
Normal file
168
generator/Blocks.h
Normal file
@@ -0,0 +1,168 @@
|
||||
#ifndef BLOCKS_H
|
||||
#define BLOCKS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include "ValueList.h"
|
||||
|
||||
struct BlockLevel {
|
||||
long* data;
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
struct ValueList* list;
|
||||
};
|
||||
|
||||
struct Blocks {
|
||||
struct BlockLevel* levels;
|
||||
size_t depth;
|
||||
};
|
||||
|
||||
static int ilog2(unsigned long value) {
|
||||
int result = -1;
|
||||
while (value) value >>= 1, result++;
|
||||
return result;
|
||||
}
|
||||
|
||||
static long blockInsert(struct Blocks* blocks, long value, size_t depth) {
|
||||
struct BlockLevel* level = blocks->levels + depth;
|
||||
|
||||
if (blocks->depth - 1 != depth) {
|
||||
if ((value = blockInsert(blocks, value, depth + 1)) == -1)
|
||||
return -1;
|
||||
}
|
||||
|
||||
level->data[level->size++] = value;
|
||||
if (level->size >= level->capacity) {
|
||||
level->size = 0;
|
||||
|
||||
if (depth == 0 || (value = valueListFind(level->list, level->data, level->capacity)) == -1) {
|
||||
value = valueListIntern(&level->list, level->data, level->capacity);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static long blockFindR(struct Blocks* blocks, long value, long offset, size_t depth) {
|
||||
struct BlockLevel* level = &blocks->levels[depth];
|
||||
struct ValueList* current;
|
||||
size_t i, bits = 0;
|
||||
|
||||
for (i = depth + 1; i < blocks->depth; i++) {
|
||||
bits += ilog2(blocks->levels[i].capacity);
|
||||
}
|
||||
|
||||
current = level->list;
|
||||
while (offset--) current = current->next;
|
||||
|
||||
offset = (value >> bits) & ((unsigned long)level->capacity - 1);
|
||||
offset = current->data[offset];
|
||||
|
||||
if (depth != blocks->depth - 1)
|
||||
return blockFindR(blocks, value, offset, depth + 1);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static long blockFind(struct Blocks* blocks, long value) {
|
||||
size_t i, bits = 0;
|
||||
long offset;
|
||||
|
||||
for (i = 0; i < blocks->depth; i++) {
|
||||
bits += ilog2(blocks->levels[i].capacity);
|
||||
}
|
||||
|
||||
offset = value >> bits;
|
||||
return blockFindR(blocks, value, offset, 0);
|
||||
}
|
||||
|
||||
static void blockInit(struct Blocks* blocks, size_t depth, ...) {
|
||||
va_list args;
|
||||
|
||||
blocks->depth = 0;
|
||||
if (!(blocks->levels = malloc(sizeof(struct BlockLevel) * depth)))
|
||||
abort();
|
||||
|
||||
va_start(args, depth);
|
||||
|
||||
while (blocks->depth < depth) {
|
||||
struct BlockLevel* level = &blocks->levels[blocks->depth];
|
||||
|
||||
level->list = NULL;
|
||||
level->size = 0;
|
||||
level->capacity = va_arg(args, int);
|
||||
if (!(level->data = malloc(level->capacity * sizeof(long))))
|
||||
abort();
|
||||
blocks->depth++;
|
||||
}
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static void blockDump(struct Blocks* blocks, size_t depth, FILE* out,
|
||||
const char* name, const char* type) {
|
||||
struct BlockLevel* level = &blocks->levels[depth];
|
||||
struct ValueList* current;
|
||||
size_t i, j, printed = 0;
|
||||
|
||||
fprintf(out, "static const %s %s[] = {\n ", type, name);
|
||||
current = level->list;
|
||||
for (current = level->list; current; current = current->next) {
|
||||
for (i = 0; i < level->capacity; i++) {
|
||||
if (printed++ >= 15) {
|
||||
fprintf(out, "\n ");
|
||||
printed = 1;
|
||||
}
|
||||
fprintf(out, "%ld, ", current->data[i]);
|
||||
}
|
||||
}
|
||||
fprintf(out, "\n};\n\n");
|
||||
}
|
||||
|
||||
static void blockAccess(struct Blocks* blocks, size_t depth, FILE* out,
|
||||
const char* var, const char* arg, const char* name) {
|
||||
struct BlockLevel* level = &blocks->levels[depth];
|
||||
long i, bits = 0, offset, mask;
|
||||
|
||||
for (i = depth + 1; i < blocks->depth; i++) {
|
||||
bits += ilog2(blocks->levels[i].capacity);
|
||||
}
|
||||
|
||||
offset = ilog2(blocks->levels[depth].capacity);
|
||||
mask = level->capacity - 1;
|
||||
|
||||
fprintf(out, " %s = (long)%s", var, name);
|
||||
if (depth) {
|
||||
if (offset) {
|
||||
fprintf(out, "[(%s<<%ld)", var, offset);
|
||||
} else {
|
||||
fprintf(out, "[%s", var);
|
||||
}
|
||||
}
|
||||
else
|
||||
fprintf(out, "[");
|
||||
|
||||
if (mask || !depth) {
|
||||
if (depth)
|
||||
fprintf(out, "+");
|
||||
|
||||
if (mask)
|
||||
fprintf(out, "(");
|
||||
|
||||
if (bits)
|
||||
fprintf(out, "(%s>>%ld)", arg, (long)bits);
|
||||
else
|
||||
fprintf(out, "%s", arg);
|
||||
|
||||
if (depth)
|
||||
fprintf(out, "&%ld", mask);
|
||||
|
||||
if (mask)
|
||||
fprintf(out, ")");
|
||||
}
|
||||
|
||||
fprintf(out, "];\n");
|
||||
}
|
||||
|
||||
#endif /* BLOCKS_H */
|
||||
28
generator/README.md
Normal file
28
generator/README.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Generator
|
||||
|
||||
This utility should be used to regenerate tables based on Unicode Character
|
||||
Database (UCD).
|
||||
|
||||
Current version of this utility builds tables and functions for the following
|
||||
properties:
|
||||
|
||||
- Case mappings for lower, upper, title cases (1:1 and 1:M)
|
||||
- Case folding (1:1 and 1:M)
|
||||
- General category
|
||||
|
||||
## Usage
|
||||
|
||||
Compile `Tables.c`
|
||||
|
||||
```
|
||||
gcc Tables.c -o Tables
|
||||
```
|
||||
|
||||
Download and put `UnicodeData.txt`, `CaseFolding.txt`, `SpecialCasing.txt` near
|
||||
the compiled `Tables` program, run it and pipe output into `UCD.c`.
|
||||
|
||||
```
|
||||
Tables > ../UCD.c
|
||||
```
|
||||
|
||||
You can download required files from [here](https://www.unicode.org/Public/UCD/latest/ucd)
|
||||
465
generator/Tables.c
Normal file
465
generator/Tables.c
Normal file
@@ -0,0 +1,465 @@
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "Blocks.h"
|
||||
#include "TextProc.h"
|
||||
|
||||
#define MAX_LINE 512
|
||||
|
||||
struct CaseInfo {
|
||||
long rune;
|
||||
struct {
|
||||
long lower;
|
||||
long upper;
|
||||
long title;
|
||||
long fold;
|
||||
} simple;
|
||||
struct {
|
||||
long lower[4];
|
||||
long upper[4];
|
||||
long title[4];
|
||||
long fold[4];
|
||||
} full;
|
||||
struct CaseInfo* prev;
|
||||
struct CaseInfo* next;
|
||||
};
|
||||
|
||||
typedef int (*EntryCb)(long rune, int fill, char** fields, size_t size);
|
||||
|
||||
static void entryProcess(FILE* in, EntryCb cb, const char* globStart,
|
||||
const char* globEnd, size_t globField,
|
||||
size_t codeField, size_t minFields) {
|
||||
long code, startCode, prevCode = -1;
|
||||
char line[MAX_LINE];
|
||||
char* fields[MAX_FIELDS];
|
||||
int emitted = 0;
|
||||
size_t columns;
|
||||
|
||||
while ((columns = processLine(in, line, sizeof(line), fields))) {
|
||||
if (columns < minFields)
|
||||
continue;
|
||||
|
||||
code = strtol(fields[codeField], NULL, 16);
|
||||
while (prevCode + 1 < code)
|
||||
emitted = cb(++prevCode, 1, fields, columns);
|
||||
|
||||
if (globStart && globEnd && columns >= globField &&
|
||||
glob(globStart, fields[globField])) {
|
||||
startCode = code;
|
||||
while ((columns = processLine(in, line, sizeof(line), fields))) {
|
||||
if (columns < minFields)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (!glob(globEnd, fields[globField])) {
|
||||
fprintf(stderr, "Abnormal input - can find last element\n");
|
||||
abort();
|
||||
}
|
||||
code = strtol(fields[codeField], NULL, 16);
|
||||
while (startCode <= code) {
|
||||
emitted = cb(startCode, 0, fields, columns);
|
||||
startCode++;
|
||||
}
|
||||
} else {
|
||||
emitted = cb(code, 0, fields, columns);
|
||||
}
|
||||
prevCode = code;
|
||||
}
|
||||
|
||||
while (prevCode + 1 < 0x110000)
|
||||
emitted = cb(++prevCode, 1, fields, columns);
|
||||
while (emitted == -1)
|
||||
emitted = cb(code++, 1, fields, columns);
|
||||
}
|
||||
|
||||
static struct CaseInfo* caseInfoSort(struct CaseInfo* head) {
|
||||
struct CaseInfo* current;
|
||||
struct CaseInfo* next;
|
||||
int swapped;
|
||||
|
||||
if (!head || !head->next)
|
||||
return head;
|
||||
|
||||
do {
|
||||
swapped = 0;
|
||||
current = head;
|
||||
while (current->next) {
|
||||
next = current->next;
|
||||
if (current->rune > next->rune) {
|
||||
swapped = 1;
|
||||
|
||||
if (current->prev)
|
||||
current->prev->next = next;
|
||||
if (next->next)
|
||||
next->next->prev = current;
|
||||
|
||||
current->next = next->next;
|
||||
next->prev = current->prev;
|
||||
current->prev = next;
|
||||
next->next = current;
|
||||
|
||||
if (current == head)
|
||||
head = next;
|
||||
} else
|
||||
current = current->next;
|
||||
}
|
||||
} while (swapped);
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
static struct CaseInfo* caseInfoGet(struct CaseInfo** head, long rune) {
|
||||
struct CaseInfo* current = *head;
|
||||
struct CaseInfo* node;
|
||||
|
||||
while (current != NULL) {
|
||||
if (current->rune == rune)
|
||||
return current;
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
if (!(node = malloc(sizeof(*node))))
|
||||
return NULL;
|
||||
|
||||
memset(node, 0, sizeof(*node));
|
||||
node->rune = rune;
|
||||
node->next = *head;
|
||||
node->prev = NULL;
|
||||
if (*head)
|
||||
(*head)->prev = node;
|
||||
*head = node;
|
||||
return node;
|
||||
}
|
||||
|
||||
static size_t categoryClassify(const char* name) {
|
||||
static const char *categories[] = {
|
||||
"Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Mc", "Me", "Nd", "Nl", "No", "Pc",
|
||||
"Pd", "Ps", "Pe", "Pi", "Pf", "Po", "Sm", "Sc", "Sk", "So", "Zs", "Zl",
|
||||
"Zp", "Cc", "Cf", "Cs", "Co", "Cn", NULL,
|
||||
};
|
||||
size_t index = 0;
|
||||
|
||||
while (categories[index]) {
|
||||
if (!strncmp(categories[index], name, 2))
|
||||
return index;
|
||||
++index;
|
||||
}
|
||||
|
||||
return categoryClassify("Cn");
|
||||
}
|
||||
|
||||
FILE* in;
|
||||
FILE* out;
|
||||
|
||||
struct Blocks categoryBlocks;
|
||||
struct CaseInfo* caseInfo = NULL;
|
||||
|
||||
static int entryUnicodeData(long rune, int fill, char** fields, size_t size) {
|
||||
long lowercase, uppercase, titlecase;
|
||||
struct CaseInfo* node;
|
||||
|
||||
if (fill) {
|
||||
return blockInsert(&categoryBlocks, categoryClassify("Cn"), 0);
|
||||
} else {
|
||||
lowercase = strtol(fields[13], NULL, 16);
|
||||
uppercase = strtol(fields[12], NULL, 16);
|
||||
titlecase = strtol(fields[14], NULL, 16);
|
||||
|
||||
if (lowercase || uppercase || titlecase) {
|
||||
node = caseInfoGet(&caseInfo, rune);
|
||||
node->simple.lower = lowercase;
|
||||
node->simple.upper = uppercase;
|
||||
node->simple.title = titlecase;
|
||||
}
|
||||
|
||||
return blockInsert(&categoryBlocks, categoryClassify(fields[2]), 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void arrayParseFromStr(const char* field, long* array) {
|
||||
char* endptr = (char*)field;
|
||||
size_t written = 0;
|
||||
|
||||
while (1) {
|
||||
array[written] = strtol(endptr, &endptr, 16);
|
||||
if (!array[written])
|
||||
break;
|
||||
written++;
|
||||
}
|
||||
}
|
||||
|
||||
static int entryCaseFolding(long rune, int fill, char** fields, size_t size) {
|
||||
struct CaseInfo* node;
|
||||
|
||||
if (fill || !strcmp("T", fields[1]))
|
||||
return 1;
|
||||
|
||||
node = caseInfoGet(&caseInfo, rune);
|
||||
if (strcmp("F", fields[1])) {
|
||||
node->simple.fold = strtol(fields[2], NULL, 16);
|
||||
} else {
|
||||
arrayParseFromStr(fields[2], node->full.fold);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int entrySpecialCasing(long rune, int fill, char** fields, size_t size) {
|
||||
struct CaseInfo* node;
|
||||
|
||||
if (fill || strcmp("", fields[4]))
|
||||
return 1;
|
||||
|
||||
node = caseInfoGet(&caseInfo, rune);
|
||||
arrayParseFromStr(fields[1], node->full.lower);
|
||||
arrayParseFromStr(fields[3], node->full.upper);
|
||||
arrayParseFromStr(fields[2], node->full.title);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void mappingRemoveSingle(long* array) {
|
||||
if (array[0] && !array[1])
|
||||
array[0] = 0;
|
||||
}
|
||||
|
||||
static void caseInfoReduce(void) {
|
||||
struct CaseInfo* current = caseInfo;
|
||||
while (current) {
|
||||
if (!current->simple.title && current->simple.upper)
|
||||
current->simple.title = current->simple.upper;
|
||||
if (!current->full.title[0] && current->full.upper[0])
|
||||
memcpy(current->full.title, current->full.upper, 4 * sizeof(long));
|
||||
|
||||
mappingRemoveSingle(current->full.lower);
|
||||
mappingRemoveSingle(current->full.upper);
|
||||
mappingRemoveSingle(current->full.title);
|
||||
mappingRemoveSingle(current->full.fold);
|
||||
current = current->next;
|
||||
}
|
||||
}
|
||||
|
||||
struct Blocks lowerBlocks, upperBlocks, titleBlocks, foldBlocks;
|
||||
struct Blocks lowerFullBlocks, upperFullBlocks, titleFullBlocks, foldFullBlocks;
|
||||
|
||||
long longIndexData[1024][4];
|
||||
size_t longIndexSize = 0;
|
||||
|
||||
static long longIndexGet(long* array) {
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < longIndexSize; i++) {
|
||||
if (!memcmp(array, longIndexData[i], 4 * sizeof(long)))
|
||||
return i;
|
||||
}
|
||||
|
||||
memcpy(longIndexData[longIndexSize], array, 4 * sizeof(long));
|
||||
return longIndexSize++;
|
||||
}
|
||||
|
||||
static void blocksBuild(void) {
|
||||
struct CaseInfo* current = caseInfo;
|
||||
int emitted;
|
||||
long last = -1;
|
||||
|
||||
blockInit(&lowerBlocks, 4, 1, 64, 16, 1);
|
||||
blockInit(&upperBlocks, 4, 1, 64, 16, 1);
|
||||
blockInit(&titleBlocks, 4, 1, 64, 16, 1);
|
||||
blockInit(&foldBlocks, 4, 1, 64, 16, 1);
|
||||
blockInit(&lowerFullBlocks, 4, 1, 64, 32, 1);
|
||||
blockInit(&upperFullBlocks, 4, 1, 64, 32, 1);
|
||||
blockInit(&titleFullBlocks, 4, 1, 64, 32, 1);
|
||||
blockInit(&foldFullBlocks, 4, 1, 64, 32, 1);
|
||||
|
||||
while (current) {
|
||||
while (last + 1 < current->rune) {
|
||||
blockInsert(&lowerBlocks, 0, 0);
|
||||
blockInsert(&upperBlocks, 0, 0);
|
||||
blockInsert(&titleBlocks, 0, 0);
|
||||
blockInsert(&foldBlocks, 0, 0);
|
||||
blockInsert(&lowerFullBlocks, -1, 0);
|
||||
blockInsert(&upperFullBlocks, -1, 0);
|
||||
blockInsert(&titleFullBlocks, -1, 0);
|
||||
blockInsert(&foldFullBlocks, -1, 0);
|
||||
last++;
|
||||
}
|
||||
|
||||
if (current->simple.lower)
|
||||
blockInsert(&lowerBlocks, current->simple.lower - current->rune, 0);
|
||||
else
|
||||
blockInsert(&lowerBlocks, 0, 0);
|
||||
|
||||
if (current->simple.upper)
|
||||
blockInsert(&upperBlocks, current->simple.upper - current->rune, 0);
|
||||
else
|
||||
blockInsert(&upperBlocks, 0, 0);
|
||||
|
||||
if (current->simple.title)
|
||||
blockInsert(&titleBlocks, current->simple.title - current->rune, 0);
|
||||
else
|
||||
blockInsert(&titleBlocks, 0, 0);
|
||||
|
||||
if (current->simple.fold)
|
||||
blockInsert(&foldBlocks, current->simple.fold - current->rune, 0);
|
||||
else
|
||||
blockInsert(&foldBlocks, 0, 0);
|
||||
|
||||
if (current->full.lower[0])
|
||||
blockInsert(&lowerFullBlocks, longIndexGet(current->full.lower), 0);
|
||||
else
|
||||
blockInsert(&lowerFullBlocks, -1, 0);
|
||||
|
||||
if (current->full.upper[0])
|
||||
blockInsert(&upperFullBlocks, longIndexGet(current->full.upper), 0);
|
||||
else
|
||||
blockInsert(&upperFullBlocks, -1, 0);
|
||||
|
||||
if (current->full.title[0])
|
||||
blockInsert(&titleFullBlocks, longIndexGet(current->full.title), 0);
|
||||
else
|
||||
blockInsert(&titleFullBlocks, -1, 0);
|
||||
|
||||
if (current->full.fold[0])
|
||||
emitted = blockInsert(&foldFullBlocks, longIndexGet(current->full.fold), 0);
|
||||
else
|
||||
emitted = blockInsert(&foldFullBlocks, -1, 0);
|
||||
|
||||
last = current->rune;
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
while (last + 1 < 0x110000 || emitted == -1) {
|
||||
blockInsert(&lowerBlocks, 0, 0);
|
||||
blockInsert(&upperBlocks, 0, 0);
|
||||
blockInsert(&titleBlocks, 0, 0);
|
||||
blockInsert(&foldBlocks, 0, 0);
|
||||
blockInsert(&lowerFullBlocks, -1, 0);
|
||||
blockInsert(&upperFullBlocks, -1, 0);
|
||||
blockInsert(&titleFullBlocks, -1, 0);
|
||||
emitted = blockInsert(&foldFullBlocks, -1, 0);
|
||||
last++;
|
||||
}
|
||||
}
|
||||
|
||||
static void outputCode(void) {
|
||||
#define DUMP(NAME, BLOCK, TYPE1, TYPE2, TYPE3, TYPE4) \
|
||||
blockDump(&BLOCK, 0, out, NAME "1", TYPE1); \
|
||||
blockDump(&BLOCK, 1, out, NAME "2", TYPE2); \
|
||||
blockDump(&BLOCK, 2, out, NAME "3", TYPE3); \
|
||||
blockDump(&BLOCK, 3, out, NAME "4", TYPE4)
|
||||
|
||||
fprintf(out, "/* Auto-generated case mapping tables */\n\n");
|
||||
fprintf(out, "#include <stdint.h>\n\n");
|
||||
|
||||
DUMP("cat", categoryBlocks, "uint8_t", "uint16_t", "uint16_t", "uint8_t");
|
||||
DUMP("low", lowerBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
|
||||
DUMP("upp", upperBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
|
||||
DUMP("tit", titleBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
|
||||
DUMP("fod", foldBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
|
||||
DUMP("lfx", lowerFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
|
||||
DUMP("ufx", upperFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
|
||||
DUMP("tfx", titleFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
|
||||
DUMP("ffx", foldFullBlocks, "uint8_t", "uint8_t", "uint8_t", "uint32_t");
|
||||
|
||||
fprintf(out, "static const int32_t case_data[][3] = {");
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < longIndexSize; ++i) {
|
||||
if (i % 4 == 0)
|
||||
fprintf(out, "\n ");
|
||||
fprintf(out, "{%ld, %ld, %ld}, ",
|
||||
longIndexData[i][0], longIndexData[i][1],
|
||||
longIndexData[i][2]);
|
||||
}
|
||||
}
|
||||
fprintf(out, "\n};\n\n");
|
||||
|
||||
#define EMIT_SIMPLE(FUNC, BLOCKS, BASE) do { \
|
||||
fprintf(out, "uint32_t CgeRune" #FUNC "(uint32_t r){\n"); \
|
||||
fprintf(out, " long t;\n if(r>1114111ul)return r;\n"); \
|
||||
blockAccess(&BLOCKS, 0, out, "t", "r", BASE "1"); \
|
||||
blockAccess(&BLOCKS, 1, out, "t", "r", BASE "2"); \
|
||||
blockAccess(&BLOCKS, 2, out, "t", "r", BASE "3"); \
|
||||
blockAccess(&BLOCKS, 3, out, "t", "r", BASE "4"); \
|
||||
fprintf(out, " return t?t+r:r;\n}\n\n"); \
|
||||
} while(0)
|
||||
|
||||
#define EMIT_FULL(FUNC, SIMPLE, FULL_BLOCKS, FULL_BASE, SIMPLE_FUNC) do { \
|
||||
fprintf(out, "size_t CgeRune" #FUNC "Full(uint32_t r, uint32_t* out){\n"); \
|
||||
fprintf(out, " long t;\n if(r>1114111ul){\n *out=r;\n return 1;\n }\n"); \
|
||||
blockAccess(&FULL_BLOCKS, 0, out, "t", "r", FULL_BASE "1"); \
|
||||
blockAccess(&FULL_BLOCKS, 1, out, "t", "r", FULL_BASE "2"); \
|
||||
blockAccess(&FULL_BLOCKS, 2, out, "t", "r", FULL_BASE "3"); \
|
||||
blockAccess(&FULL_BLOCKS, 3, out, "t", "r", FULL_BASE "4"); \
|
||||
fprintf(out, " if(t>=0){\n"); \
|
||||
fprintf(out, " const int32_t* p=case_data[t];\n"); \
|
||||
fprintf(out, " size_t i=0;\n"); \
|
||||
fprintf(out, " while(p[i] && i<3){out[i]=p[i];i++;}\n"); \
|
||||
fprintf(out, " return i;\n }\n"); \
|
||||
fprintf(out, " *out=CgeRune" #SIMPLE "(r);\n return 1;\n}\n\n"); \
|
||||
} while(0)
|
||||
|
||||
fprintf(out, "int CgeRuneCategory(uint32_t r){\n");
|
||||
fprintf(out, " long t;\n if(r>1114111ul)return %d;\n", (int)categoryClassify("Cn"));
|
||||
blockAccess(&categoryBlocks, 0, out, "t", "r", "cat1");
|
||||
blockAccess(&categoryBlocks, 1, out, "t", "r", "cat2");
|
||||
blockAccess(&categoryBlocks, 2, out, "t", "r", "cat3");
|
||||
blockAccess(&categoryBlocks, 3, out, "t", "r", "cat4");
|
||||
fprintf(out, " return t;\n}\n\n"); \
|
||||
|
||||
EMIT_SIMPLE(Lower, lowerBlocks, "low");
|
||||
EMIT_SIMPLE(Upper, upperBlocks, "upp");
|
||||
EMIT_SIMPLE(Title, titleBlocks, "tit");
|
||||
EMIT_SIMPLE(Fold, foldBlocks, "fod");
|
||||
|
||||
EMIT_FULL(Lower, Lower, lowerFullBlocks, "lfx", Lower);
|
||||
EMIT_FULL(Upper, Upper, upperFullBlocks, "ufx", Upper);
|
||||
EMIT_FULL(Title, Title, titleFullBlocks, "tfx", Title);
|
||||
EMIT_FULL(Fold, Fold, foldFullBlocks, "ffx", Fold);
|
||||
}
|
||||
|
||||
#undef DUMP
|
||||
#undef EMIT_SIMPLE
|
||||
#undef EMIT_FULL
|
||||
|
||||
int main() {
|
||||
if (!(in = fopen("UnicodeData.txt", "r"))) {
|
||||
fprintf(stderr, "UnicodeData.txt not found. Download it from:\n");
|
||||
fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt\n");
|
||||
return -1;
|
||||
}
|
||||
out = stdout;
|
||||
|
||||
fprintf(stderr, "Processing UnicodeData.txt\n");
|
||||
blockInit(&categoryBlocks, 4, 1, 16, 8, 8);
|
||||
entryProcess(in, entryUnicodeData, "<*, First>", "<*, Last>", 1, 0, 15);
|
||||
fclose(in);
|
||||
|
||||
if (!(in = fopen("CaseFolding.txt", "r"))) {
|
||||
fprintf(stderr, "CaseFolding.txt not found. Download it from:\n");
|
||||
fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt\n");
|
||||
return -1;
|
||||
}
|
||||
fprintf(stderr, "Processing CaseFolding.txt\n");
|
||||
entryProcess(in, entryCaseFolding, NULL, NULL, 0, 0, 3);
|
||||
fclose(in);
|
||||
|
||||
fprintf(stderr, "Processing SpecialCasing.txt\n");
|
||||
if (!(in = fopen("SpecialCasing.txt", "r"))) {
|
||||
fprintf(stderr, "SpecialCasing.txt not found. Download it from:\n");
|
||||
fprintf(stderr, "https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt\n");
|
||||
return -1;
|
||||
}
|
||||
entryProcess(in, entrySpecialCasing, NULL, NULL, 0, 0, 4);
|
||||
fclose(in);
|
||||
|
||||
caseInfo = caseInfoSort(caseInfo);
|
||||
caseInfoReduce();
|
||||
blocksBuild();
|
||||
outputCode();
|
||||
|
||||
return 0;
|
||||
}
|
||||
76
generator/TextProc.h
Normal file
76
generator/TextProc.h
Normal file
@@ -0,0 +1,76 @@
|
||||
#ifndef TEXTPROC_H
|
||||
#define TEXTPROC_H
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_FIELDS 16
|
||||
|
||||
static int glob(const char* pattern, const char* text) {
|
||||
const char* star = NULL;
|
||||
const char* restart = text;
|
||||
|
||||
while (*text) {
|
||||
if (*pattern == *text || *pattern == '?')
|
||||
pattern++, text++;
|
||||
else if (*pattern == '*')
|
||||
star = ++pattern, restart = text;
|
||||
else if (star)
|
||||
pattern = star, text = ++restart;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
while (*pattern == '*')
|
||||
pattern++;
|
||||
return (*pattern == '\0');
|
||||
}
|
||||
|
||||
static char* trimLeft(char* line) {
|
||||
for (; *line && isspace(*line); ++line);
|
||||
return line;
|
||||
}
|
||||
|
||||
static void trimRight(char* line) {
|
||||
char* last = line;
|
||||
|
||||
for (; *line; ++line)
|
||||
if (!isspace(*line))
|
||||
last = line + 1;
|
||||
*last = '\0';
|
||||
}
|
||||
|
||||
static void trimComment(char* line) {
|
||||
char* separator = strchr(line, '#');
|
||||
if (separator) *separator = '\0';
|
||||
}
|
||||
|
||||
static char* trim(char* line) {
|
||||
trimRight(line);
|
||||
return trimLeft(line);
|
||||
}
|
||||
|
||||
static size_t fieldParse(char* line, char** fields, char separator) {
|
||||
size_t index = 0;
|
||||
|
||||
do {
|
||||
fields[index] = line;
|
||||
if ((line = strchr(line, separator)))
|
||||
*(line++) = '\0';
|
||||
fields[index] = trim(fields[index]);
|
||||
index++;
|
||||
} while (line && index < MAX_FIELDS);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static int processLine(FILE* in, char* line, size_t size, char** fields) {
|
||||
if (!fgets(line, size, in))
|
||||
return 0;
|
||||
|
||||
trimComment(line);
|
||||
return fieldParse(line, fields, ';');
|
||||
}
|
||||
|
||||
#endif /* TEXTPROC_H */
|
||||
41
generator/ValueList.h
Normal file
41
generator/ValueList.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef VALUELIST_H
|
||||
#define VALUELIST_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
struct ValueList {
|
||||
long* data;
|
||||
size_t size;
|
||||
struct ValueList* next;
|
||||
};
|
||||
|
||||
static long valueListFind(struct ValueList* list, long* data, size_t size) {
|
||||
long index = 0;
|
||||
|
||||
while (list) {
|
||||
if (list->size == size && !memcmp(list->data, data, size * sizeof(long)))
|
||||
return index;
|
||||
index++, list = list->next;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static long valueListIntern(struct ValueList** list, long* data, size_t size) {
|
||||
long index = 0;
|
||||
|
||||
while (*list) ++index, list = &(*list)->next;
|
||||
if (!(*list = malloc(sizeof(struct ValueList))))
|
||||
abort();
|
||||
if (!((*list)->data = malloc(size * sizeof(long))))
|
||||
abort();
|
||||
|
||||
memcpy((*list)->data, data, size * sizeof(long));
|
||||
(*list)->size = size;
|
||||
(*list)->next = NULL;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
#endif /* VALUELIST_H */
|
||||
Reference in New Issue
Block a user