Esempio n. 1
0
char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) {
  if (!lib_initialized) {
    lib_initialize();
  }
	size_t actual_length = strlen(inbuf);
  if ((length == (size_t)-1) || (length > actual_length)) {
    length = actual_length;
  }
  ucs4_t* winbuf = utf8_to_ucs4(inbuf, length);
  if (winbuf == (ucs4_t*)-1) {
    /* Can not convert input UTF8 to UCS4 */
    errnum = OPENCC_ERROR_ENCODING;
    return (char*)-1;
  }
  /* Set up UTF8 buffer */
  size_t outbuf_len = length;
  size_t outsize = outbuf_len;
  char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1));
  char* outbuf = original_outbuf;
  original_outbuf[0] = '\0';
  /* Set conversion buffer */
  size_t wbufsize = length + 64;
  ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1));
  ucs4_t* pinbuf = winbuf;
  ucs4_t* poutbuf = woutbuf;
  size_t inbuf_left, outbuf_left;
  inbuf_left = ucs4len(winbuf);
  outbuf_left = wbufsize;
  while (inbuf_left > 0) {
    size_t retval = opencc_convert(t_opencc,
                                   &pinbuf,
                                   &inbuf_left,
                                   &poutbuf,
                                   &outbuf_left);
    if (retval == (size_t)-1) {
      free(outbuf);
      free(winbuf);
      free(woutbuf);
      return (char*)-1;
    }
    *poutbuf = L'\0';
    char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1);
    if (ubuff == (char*)-1) {
      free(outbuf);
      free(winbuf);
      free(woutbuf);
      errnum = OPENCC_ERROR_ENCODING;
      return (char*)-1;
    }
    size_t ubuff_len = strlen(ubuff);
    while (ubuff_len > outsize) {
      size_t outbuf_offset = outbuf - original_outbuf;
      outsize += outbuf_len;
      outbuf_len += outbuf_len;
      original_outbuf =
        (char*)realloc(original_outbuf, sizeof(char) * outbuf_len);
      outbuf = original_outbuf + outbuf_offset;
    }
    strncpy(outbuf, ubuff, ubuff_len);
    free(ubuff);
    outbuf += ubuff_len;
    *outbuf = '\0';
    outbuf_left = wbufsize;
    poutbuf = woutbuf;
  }
  free(winbuf);
  free(woutbuf);
  original_outbuf = (char*)realloc(original_outbuf,
                                   sizeof(char) * (strlen(original_outbuf) + 1));
  return original_outbuf;
}
Esempio n. 2
0
char * opencc_convert_utf8(opencc_t t_opencc, const char * inbuf, size_t length)
{
    if (!lib_initialized)
        lib_initialize();

    if (length == (size_t) -1 || length > strlen(inbuf))
        length = strlen(inbuf);

    /* 將輸入數據轉換爲ucs4_t字符串 */
    ucs4_t * winbuf = utf8_to_ucs4(inbuf, length);
    if (winbuf == (ucs4_t *) -1)
    {
        /* 輸入數據轉換失敗 */
        errnum = OPENCC_ERROR_ENCODIND;
        return (char *) -1;
    }

    /* 設置輸出UTF8文本緩衝區空間 */
    size_t outbuf_len = length;
    size_t outsize = outbuf_len;
    char * original_outbuf = (char *) malloc(sizeof(char) * (outbuf_len + 1));
    char * outbuf = original_outbuf;
    original_outbuf[0] = '\0';

    /* 設置轉換緩衝區空間 */
    size_t wbufsize = length + 64;
    ucs4_t * woutbuf = (ucs4_t *) malloc(sizeof(ucs4_t) * (wbufsize + 1));

    ucs4_t * pinbuf = winbuf;
    ucs4_t * poutbuf = woutbuf;
    size_t inbuf_left, outbuf_left;

    inbuf_left = ucs4len(winbuf);
    outbuf_left = wbufsize;

    while (inbuf_left > 0)
    {
        size_t retval = opencc_convert(t_opencc, &pinbuf, &inbuf_left, &poutbuf, &outbuf_left);
        if (retval == (size_t) -1)
        {
            free(outbuf);
            free(winbuf);
            free(woutbuf);
            return (char *) -1;
        }

        *poutbuf = L'\0';

        char * ubuff = ucs4_to_utf8(woutbuf, (size_t) -1);

        if (ubuff == (char *) -1)
        {
            free(outbuf);
            free(winbuf);
            free(woutbuf);
            errnum = OPENCC_ERROR_ENCODIND;
            return (char *) -1;
        }

        size_t ubuff_len = strlen(ubuff);

        while (ubuff_len > outsize)
        {
            size_t outbuf_offset = outbuf - original_outbuf;
            outsize += outbuf_len;
            outbuf_len += outbuf_len;
            original_outbuf = (char *) realloc(original_outbuf, sizeof(char) * outbuf_len);
            outbuf = original_outbuf + outbuf_offset;
        }

        strncpy(outbuf, ubuff, ubuff_len);
        free(ubuff);

        outbuf += ubuff_len;
        *outbuf = '\0';

        outbuf_left = wbufsize;
        poutbuf = woutbuf;
    }

    free(winbuf);
    free(woutbuf);

    original_outbuf = (char *) realloc(original_outbuf,
                                       sizeof(char) * (strlen(original_outbuf) + 1));

    return original_outbuf;
}