char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) { if (!lib_initialized) { lib_initialize(); } size_t actual_length = strlen(inbuf); if ((length == (size_t)-1) || (length > actual_length)) { length = actual_length; } ucs4_t* winbuf = utf8_to_ucs4(inbuf, length); if (winbuf == (ucs4_t*)-1) { /* Can not convert input UTF8 to UCS4 */ errnum = OPENCC_ERROR_ENCODING; return (char*)-1; } /* Set up UTF8 buffer */ size_t outbuf_len = length; size_t outsize = outbuf_len; char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1)); char* outbuf = original_outbuf; original_outbuf[0] = '\0'; /* Set conversion buffer */ size_t wbufsize = length + 64; ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1)); ucs4_t* pinbuf = winbuf; ucs4_t* poutbuf = woutbuf; size_t inbuf_left, outbuf_left; inbuf_left = ucs4len(winbuf); outbuf_left = wbufsize; while (inbuf_left > 0) { size_t retval = opencc_convert(t_opencc, &pinbuf, &inbuf_left, &poutbuf, &outbuf_left); if (retval == (size_t)-1) { free(outbuf); free(winbuf); free(woutbuf); return (char*)-1; } *poutbuf = L'\0'; char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1); if (ubuff == (char*)-1) { free(outbuf); free(winbuf); free(woutbuf); errnum = OPENCC_ERROR_ENCODING; return (char*)-1; } size_t ubuff_len = strlen(ubuff); while (ubuff_len > outsize) { size_t outbuf_offset = outbuf - original_outbuf; outsize += outbuf_len; outbuf_len += outbuf_len; original_outbuf = (char*)realloc(original_outbuf, sizeof(char) * outbuf_len); outbuf = original_outbuf + outbuf_offset; } strncpy(outbuf, ubuff, ubuff_len); free(ubuff); outbuf += ubuff_len; *outbuf = '\0'; outbuf_left = wbufsize; poutbuf = woutbuf; } free(winbuf); free(woutbuf); original_outbuf = (char*)realloc(original_outbuf, sizeof(char) * (strlen(original_outbuf) + 1)); return original_outbuf; }
char * opencc_convert_utf8(opencc_t t_opencc, const char * inbuf, size_t length) { if (!lib_initialized) lib_initialize(); if (length == (size_t) -1 || length > strlen(inbuf)) length = strlen(inbuf); /* 將輸入數據轉換爲ucs4_t字符串 */ ucs4_t * winbuf = utf8_to_ucs4(inbuf, length); if (winbuf == (ucs4_t *) -1) { /* 輸入數據轉換失敗 */ errnum = OPENCC_ERROR_ENCODIND; return (char *) -1; } /* 設置輸出UTF8文本緩衝區空間 */ size_t outbuf_len = length; size_t outsize = outbuf_len; char * original_outbuf = (char *) malloc(sizeof(char) * (outbuf_len + 1)); char * outbuf = original_outbuf; original_outbuf[0] = '\0'; /* 設置轉換緩衝區空間 */ size_t wbufsize = length + 64; ucs4_t * woutbuf = (ucs4_t *) malloc(sizeof(ucs4_t) * (wbufsize + 1)); ucs4_t * pinbuf = winbuf; ucs4_t * poutbuf = woutbuf; size_t inbuf_left, outbuf_left; inbuf_left = ucs4len(winbuf); outbuf_left = wbufsize; while (inbuf_left > 0) { size_t retval = opencc_convert(t_opencc, &pinbuf, &inbuf_left, &poutbuf, &outbuf_left); if (retval == (size_t) -1) { free(outbuf); free(winbuf); free(woutbuf); return (char *) -1; } *poutbuf = L'\0'; char * ubuff = ucs4_to_utf8(woutbuf, (size_t) -1); if (ubuff == (char *) -1) { free(outbuf); free(winbuf); free(woutbuf); errnum = OPENCC_ERROR_ENCODIND; return (char *) -1; } size_t ubuff_len = strlen(ubuff); while (ubuff_len > outsize) { size_t outbuf_offset = outbuf - original_outbuf; outsize += outbuf_len; outbuf_len += outbuf_len; original_outbuf = (char *) realloc(original_outbuf, sizeof(char) * outbuf_len); outbuf = original_outbuf + outbuf_offset; } strncpy(outbuf, ubuff, ubuff_len); free(ubuff); outbuf += ubuff_len; *outbuf = '\0'; outbuf_left = wbufsize; poutbuf = woutbuf; } free(winbuf); free(woutbuf); original_outbuf = (char *) realloc(original_outbuf, sizeof(char) * (strlen(original_outbuf) + 1)); return original_outbuf; }