Initial commit
This commit is contained in:
101
Utf8.c
Normal file
101
Utf8.c
Normal file
@@ -0,0 +1,101 @@
|
||||
#include "CgeStr.h"
|
||||
|
||||
#define INVALID_RUNE 0xFFFD
|
||||
|
||||
int CgeUtf8Encode(uint32_t rune, char* data) {
|
||||
if (rune < 0x80) {
|
||||
data[0] = (char)rune;
|
||||
return 1;
|
||||
}
|
||||
if (rune < 0x800) {
|
||||
data[0] = (char)(0xC0 | (rune >> 6));
|
||||
data[1] = (char)(0x80 | (rune & 0x3F));
|
||||
return 2;
|
||||
}
|
||||
if (rune < 0x10000) {
|
||||
if (rune >= 0xD800 && rune <= 0xDFFF)
|
||||
return -1;
|
||||
data[0] = (char)(0xE0 | (rune >> 12));
|
||||
data[1] = (char)(0x80 | ((rune >> 6) & 0x3F));
|
||||
data[2] = (char)(0x80 | (rune & 0x3F));
|
||||
return 3;
|
||||
}
|
||||
if (rune <= 0x10FFFF) {
|
||||
data[0] = (char)(0xF0 | (rune >> 18));
|
||||
data[1] = (char)(0x80 | ((rune >> 12) & 0x3F));
|
||||
data[2] = (char)(0x80 | ((rune >> 6) & 0x3F));
|
||||
data[3] = (char)(0x80 | (rune & 0x3F));
|
||||
return 4;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int CgeUtf8EncodeLax(uint32_t rune, char* data) {
|
||||
int result;
|
||||
|
||||
result = CgeUtf8Encode(rune, data);
|
||||
if (result == -1)
|
||||
result = CgeUtf8Encode(INVALID_RUNE, data);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int CgeUtf8Decode(const char* data, size_t size, uint32_t* rune) {
|
||||
unsigned char byte = (unsigned char)data[0];
|
||||
int i, n;
|
||||
|
||||
if (size == 0)
|
||||
return -1;
|
||||
|
||||
if (byte < 0x80) {
|
||||
*rune = byte;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((byte & 0xE0) == 0xC0) {
|
||||
n = 2;
|
||||
*rune = byte & 0x1F;
|
||||
} else if ((byte & 0xF0) == 0xE0) {
|
||||
n = 3;
|
||||
*rune = byte & 0x0F;
|
||||
} else if ((byte & 0xF8) == 0xF0) {
|
||||
n = 4;
|
||||
*rune = byte & 0x07;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (size < (size_t)n)
|
||||
return -1;
|
||||
|
||||
for (i = 1; i < n; i++) {
|
||||
byte = (unsigned char)data[i];
|
||||
if ((byte & 0xC0) != 0x80)
|
||||
return -1;
|
||||
*rune = (*rune << 6) | (byte & 0x3F);
|
||||
}
|
||||
|
||||
if ((n == 2 && *rune < 0x80) ||
|
||||
(n == 3 && *rune < 0x800) ||
|
||||
(n == 4 && *rune < 0x10000)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*rune > 0x10FFFF || (*rune >= 0xD800 && *rune <= 0xDFFF)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
int CgeUtf8DecodeLax(const char* data, size_t size, uint32_t* rune) {
|
||||
int result;
|
||||
|
||||
result = CgeUtf8Decode(data, size, rune);
|
||||
if (result == -1) {
|
||||
*rune = INVALID_RUNE;
|
||||
result = 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Reference in New Issue
Block a user