#include #include #include #include void printUsage(void) { printf("Utf8Test \n"); exit(1); } int main(int argc, char **argv) { BH_IO *inFile, *outFile; char inBuffer[8], outBuffer[8]; uint32_t unit; size_t i, inSize, outSize; if (argc < 2) printUsage(); inFile = BH_FileNew(argv[1], BH_FILE_READ | BH_FILE_EXIST, NULL); outFile = BH_FileNew(argv[2], BH_FILE_WRITE | BH_FILE_TRUNCATE, NULL); if (!inFile || !outFile) return -1; inSize = 0; while (1) { /* Read one byte and try to decode */ if (!inSize || !(outSize = BH_UnicodeDecodeUtf8(inBuffer, inSize, &unit))) { BH_IOPeek(inFile, inBuffer + inSize, 1, &outSize); BH_IORead(inFile, inBuffer + inSize, 1, &outSize); inSize += outSize; if (!outSize) break; continue; } /* Remove readed amount */ for (i = 0; i < inSize - outSize; i++) inBuffer[i] = inBuffer[i + outSize]; inSize -= outSize; /* Change unit if incorrect and write to output */ if (unit == -1) unit = 0xFFFD; outSize = BH_UnicodeEncodeUtf8(unit, outBuffer); BH_IOWrite(outFile, outBuffer, outSize, NULL); } /* Incomplete UTF-8 sequence */ if (inSize) { outSize = BH_UnicodeEncodeUtf8(0xFFFD, outBuffer); BH_IOWrite(outFile, outBuffer, outSize, NULL); } BH_IOFree(inFile); BH_IOFree(outFile); return 0; }