Files
CgeStr/Utf8.c

102 lines
2.2 KiB
C
Raw Normal View History

2026-06-14 22:51:45 +03:00
#include "CgeStr.h"
#define INVALID_RUNE 0xFFFD
int CgeUtf8Encode(uint32_t rune, char* data) {
if (rune < 0x80) {
data[0] = (char)rune;
return 1;
}
if (rune < 0x800) {
data[0] = (char)(0xC0 | (rune >> 6));
data[1] = (char)(0x80 | (rune & 0x3F));
return 2;
}
if (rune < 0x10000) {
if (rune >= 0xD800 && rune <= 0xDFFF)
return -1;
data[0] = (char)(0xE0 | (rune >> 12));
data[1] = (char)(0x80 | ((rune >> 6) & 0x3F));
data[2] = (char)(0x80 | (rune & 0x3F));
return 3;
}
if (rune <= 0x10FFFF) {
data[0] = (char)(0xF0 | (rune >> 18));
data[1] = (char)(0x80 | ((rune >> 12) & 0x3F));
data[2] = (char)(0x80 | ((rune >> 6) & 0x3F));
data[3] = (char)(0x80 | (rune & 0x3F));
return 4;
}
return -1;
}
int CgeUtf8EncodeLax(uint32_t rune, char* data) {
int result;
result = CgeUtf8Encode(rune, data);
if (result == -1)
result = CgeUtf8Encode(INVALID_RUNE, data);
return result;
}
int CgeUtf8Decode(const char* data, size_t size, uint32_t* rune) {
unsigned char byte = (unsigned char)data[0];
int i, n;
if (size == 0)
return -1;
if (byte < 0x80) {
*rune = byte;
return 1;
}
if ((byte & 0xE0) == 0xC0) {
n = 2;
*rune = byte & 0x1F;
} else if ((byte & 0xF0) == 0xE0) {
n = 3;
*rune = byte & 0x0F;
} else if ((byte & 0xF8) == 0xF0) {
n = 4;
*rune = byte & 0x07;
} else {
return -1;
}
if (size < (size_t)n)
return -1;
for (i = 1; i < n; i++) {
byte = (unsigned char)data[i];
if ((byte & 0xC0) != 0x80)
return -1;
*rune = (*rune << 6) | (byte & 0x3F);
}
if ((n == 2 && *rune < 0x80) ||
(n == 3 && *rune < 0x800) ||
(n == 4 && *rune < 0x10000)) {
return -1;
}
if (*rune > 0x10FFFF || (*rune >= 0xD800 && *rune <= 0xDFFF)) {
return -1;
}
return n;
}
int CgeUtf8DecodeLax(const char* data, size_t size, uint32_t* rune) {
int result;
result = CgeUtf8Decode(data, size, rune);
if (result == -1) {
*rune = INVALID_RUNE;
result = 1;
}
return result;
}