コード例 #1
0
ファイル: zconv.c プロジェクト: huxiaomin/tstools
int gb_utf8(const char *gb, char *utf8, size_t cnt)
{
        int wc = 0; /* word count */
        char *putf = utf8;
        uint32_t ucs4; /* UCS-4 data */
        uint16_t ucs2; /* UCS-2 data */
        uint16_t gb2; /*  GB 2-byte data */
        size_t max = sizeof(GB_UCS) / 4 - 1; /* high index */

        while(cnt > 0) {
                gb2 = (uint16_t)*gb++;
                if(gb2 == 0x0000) {
                        break;
                }
                else if(gb2 <= 0x007F) {
                        *putf++ = (char)gb2;
                        cnt -= 1;
                }
                else {
                        gb2 <<= 8;
                        gb2 |= (uint16_t)(uint8_t)(*gb++);
                        ucs2 = half_search(gb2, DFLT_UCS, max, GB_UCS);
                        ucs4 = (uint32_t)ucs2;
                        ucs4_to_utf8(ucs4, &putf);
                        cnt -= 2;
                }
                wc++;
        }
        *putf = 0x00;

        return wc;
}
コード例 #2
0
ファイル: unicode.cpp プロジェクト: djmitche/lyx
vector<char>
ucs4_to_utf8(vector<char_type> const & ucs4str)
{
	if (ucs4str.empty())
		return vector<char>();

	return ucs4_to_utf8(&ucs4str[0], ucs4str.size());
}
コード例 #3
0
ファイル: zconv.c プロジェクト: huxiaomin/tstools
int latin_utf8(const uint8_t *latin, char *utf8, size_t cnt, int coding)
{
        int wc = 0; /* word count */
        const uint8_t *platin = latin;
        uint8_t lt; /* latin data */
        char *putf8 = utf8;
        uint32_t ucs4; /* UCS-4 data */
        const uint16_t *tab; /* DVB coding table */

        switch(coding) {
                case CODING_DVB6937   : tab = DVB6937_UCS; break;
                case CODING_ISO6937   : tab = ISO6937_UCS; break;
                case CODING_DVB8859_1 :
                case CODING_ISO8859_1 : tab = ISO8859_1_UCS;  break;
                case CODING_DVB8859_2 :
                case CODING_ISO8859_2 : tab = ISO8859_2_UCS;  break;
                case CODING_DVB8859_3 :
                case CODING_ISO8859_3 : tab = ISO8859_3_UCS;  break;
                case CODING_DVB8859_4 :
                case CODING_ISO8859_4 : tab = ISO8859_4_UCS;  break;
                case CODING_DVB8859_5 : tab = DVB8859_5_UCS;  break;
                case CODING_ISO8859_5 : tab = ISO8859_5_UCS;  break;
                case CODING_DVB8859_6 :
                case CODING_ISO8859_6 : tab = ISO8859_6_UCS;  break;
                case CODING_DVB8859_7 : /* ISO8859_7 omit 0xA4, 0xA5, 0xAA */
                case CODING_ISO8859_7 : tab = ISO8859_7_UCS;  break;
                case CODING_DVB8859_8 : /* ISO8859_8 omit 0xFD, 0xFE */
                case CODING_ISO8859_8 : tab = ISO8859_8_UCS;  break;
                case CODING_DVB8859_9 :
                case CODING_ISO8859_9 : tab = ISO8859_9_UCS;  break;
                case CODING_DVB8859_10:
                case CODING_ISO8859_10: tab = ISO8859_10_UCS; break;
                case CODING_DVB8859_11:
                case CODING_ISO8859_11: tab = ISO8859_11_UCS; break;
                case CODING_DVB8859_13:
                case CODING_ISO8859_13: tab = ISO8859_13_UCS; break;
                case CODING_DVB8859_14:
                case CODING_ISO8859_14: tab = ISO8859_14_UCS; break;
                case CODING_DVB8859_15: tab = DVB8859_15_UCS; break;
                case CODING_ISO8859_15: tab = ISO8859_15_UCS; break;
                case CODING_ISO8859_16: tab = ISO8859_16_UCS; break;
                default:                tab = ISO8859_15_UCS; break; /* improvement of Latin1 */
        }

        while(cnt > 0) {
                lt = *platin++;
                ucs4 = (lt < 0xA0) ? lt : *(tab + lt - 0xA0);
                if(0x00000000 == ucs4) {
                        break;
                }
                ucs4_to_utf8(ucs4, &putf8);
                cnt--;
                wc++;
        }
        *putf8 = 0x00;

        return wc;
}
コード例 #4
0
ファイル: decompose.c プロジェクト: now/ned
/* {{{1
 * This function is the same as utf_normalize() except that at most ‘len˚
 * bytes are normalized from ‘str’.
 */
char *
utf_normalize_n(const char *str, NormalizeMode mode, size_t len)
{
	unichar *wcs = _utf_normalize_wc(str, len, true, mode);
	char *utf = ucs4_to_utf8(wcs, NULL, NULL);

	free(wcs);
	return utf;
}
コード例 #5
0
ファイル: decompose.c プロジェクト: now/ned
/* {{{1
 * Normalize (compose/decompose) characters in ‘str˚ so that strings that
 * actually contain the same characters will be recognized as equal for
 * comparison for example.
 */
char *
utf_normalize(const char *str, NormalizeMode mode)
{
	unichar *wcs = _utf_normalize_wc(str, 0, false, mode);
	char *utf = ucs4_to_utf8(wcs, NULL, NULL);

	free(wcs);
	return utf;
}
コード例 #6
0
ファイル: zconv.c プロジェクト: huxiaomin/tstools
int utf16_utf8(const uint16_t *utf16, char *utf8, size_t cnt, int endian)
{
        int wc = 0; /* word count */
        const uint16_t *putf16 = utf16;
        char *putf8 = utf8;
        uint32_t ucs4; /* UCS-4 data */

        while(cnt > 0) {
                utf16_to_ucs4(&putf16, &ucs4, endian);
                if(0x00000000 == ucs4) {
                        break;
                }
                ucs4_to_utf8(ucs4, &putf8);
                cnt -= 2;
                wc++;
        }
        *putf8 = 0x00;

        return wc;
}
コード例 #7
0
ファイル: opencc_dict.c プロジェクト: tornadory/OpenCC
void write_text_file()
{
	FILE * fp;
	int i;
	fp = fopen("datrie.txt","w");
	fprintf(fp, "%d\n", lexicon_count);

	for (i = 0; i < lexicon_count; i ++)
	{
		char * buff = ucs4_to_utf8(lexicon[i].value, (size_t) -1);
		fprintf(fp, "%s\n", buff);
		free(buff);
	}
	
	for (i = 0; i < DATRIE_SIZE; i ++)
	{
		if (dat[i].parent != DATRIE_UNUSED)
		{
			fprintf(fp,"%d %d %d %d\n", i, dat[i].base, dat[i].parent, dat[i].word);
		}
	}
	
	fclose(fp);
}
コード例 #8
0
ファイル: console-input.c プロジェクト: philippe-nuaa/xboot
static ssize_t console_input_read(struct console_t * console, unsigned char * buf, size_t count)
{
	struct console_input_data_t * dat = (struct console_input_data_t *)console->priv;
	struct event_t event;
	u32_t key;
	u8_t sym[16];
	size_t len;

	if(pump_event(runtime_get()->__event_base, &event) && (event.type == EVENT_TYPE_KEY_DOWN) && (event.device == dat->input))
	{
		key = event.e.key_down.key;
		switch(key)
		{
		case KEY_BACKSPACE:
			sym[0] = 0x7f;
			len = 1;
			break;

		case KEY_TAB:
			sym[0] = 0x9;
			len = 1;
			break;

		case KEY_ENTER:
			sym[0] = 0xd;
			len = 1;
			break;

		case KEY_UP:
			sym[0] = '\e';
			sym[1] = '[';
			sym[2] = 'A';
			len = 3;
			break;

		case KEY_DOWN:
			sym[0] = '\e';
			sym[1] = '[';
			sym[2] = 'B';
			len = 3;
			break;

		case KEY_LEFT:
			sym[0] = '\e';
			sym[1] = '[';
			sym[2] = 'D';
			len = 3;
			break;

		case KEY_RIGHT:
			sym[0] = '\e';
			sym[1] = '[';
			sym[2] = 'C';
			len = 3;
			break;

		case KEY_PAGE_UP:
			sym[0] = '\e';
			sym[1] = '[';
			sym[2] = '5';
			sym[3] = '~';
			len = 4;
			break;

		case KEY_PAGE_DOWN:
			sym[0] = '\e';
			sym[1] = '[';
			sym[2] = '6';
			sym[3] = '~';
			len = 4;
			break;

		case KEY_HOME:
			sym[0] = '\e';
			sym[1] = '[';
			sym[2] = '1';
			sym[3] = '~';
			len = 4;
			break;

		case KEY_END:
			sym[0] = '\e';
			sym[1] = '[';
			sym[2] = '4';
			sym[3] = '~';
			len = 4;
			break;

		case KEY_VOLUME_UP:
		case KEY_VOLUME_DOWN:
		case KEY_MENU:
		case KEY_BACK:
		case KEY_POWER:
			len = 0;
			break;

		default:
			ucs4_to_utf8(&key, 1, (char *)sym, sizeof(sym));
			len = strlen((const char *)sym);
			break;
		}
		fifo_put(dat->fifo, sym, len);
	}

	return fifo_get(dat->fifo, buf, count);
}
コード例 #9
0
ファイル: opencc.c プロジェクト: johnnywjy/OpenCC
char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) {
  if (!lib_initialized) {
    lib_initialize();
  }
	size_t actual_length = strlen(inbuf);
  if ((length == (size_t)-1) || (length > actual_length)) {
    length = actual_length;
  }
  ucs4_t* winbuf = utf8_to_ucs4(inbuf, length);
  if (winbuf == (ucs4_t*)-1) {
    /* Can not convert input UTF8 to UCS4 */
    errnum = OPENCC_ERROR_ENCODING;
    return (char*)-1;
  }
  /* Set up UTF8 buffer */
  size_t outbuf_len = length;
  size_t outsize = outbuf_len;
  char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1));
  char* outbuf = original_outbuf;
  original_outbuf[0] = '\0';
  /* Set conversion buffer */
  size_t wbufsize = length + 64;
  ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1));
  ucs4_t* pinbuf = winbuf;
  ucs4_t* poutbuf = woutbuf;
  size_t inbuf_left, outbuf_left;
  inbuf_left = ucs4len(winbuf);
  outbuf_left = wbufsize;
  while (inbuf_left > 0) {
    size_t retval = opencc_convert(t_opencc,
                                   &pinbuf,
                                   &inbuf_left,
                                   &poutbuf,
                                   &outbuf_left);
    if (retval == (size_t)-1) {
      free(outbuf);
      free(winbuf);
      free(woutbuf);
      return (char*)-1;
    }
    *poutbuf = L'\0';
    char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1);
    if (ubuff == (char*)-1) {
      free(outbuf);
      free(winbuf);
      free(woutbuf);
      errnum = OPENCC_ERROR_ENCODING;
      return (char*)-1;
    }
    size_t ubuff_len = strlen(ubuff);
    while (ubuff_len > outsize) {
      size_t outbuf_offset = outbuf - original_outbuf;
      outsize += outbuf_len;
      outbuf_len += outbuf_len;
      original_outbuf =
        (char*)realloc(original_outbuf, sizeof(char) * outbuf_len);
      outbuf = original_outbuf + outbuf_offset;
    }
    strncpy(outbuf, ubuff, ubuff_len);
    free(ubuff);
    outbuf += ubuff_len;
    *outbuf = '\0';
    outbuf_left = wbufsize;
    poutbuf = woutbuf;
  }
  free(winbuf);
  free(woutbuf);
  original_outbuf = (char*)realloc(original_outbuf,
                                   sizeof(char) * (strlen(original_outbuf) + 1));
  return original_outbuf;
}
コード例 #10
0
ファイル: opencc.c プロジェクト: izenecloud/icma
char * opencc_convert_utf8(opencc_t t_opencc, const char * inbuf, size_t length)
{
    if (!lib_initialized)
        lib_initialize();

    if (length == (size_t) -1 || length > strlen(inbuf))
        length = strlen(inbuf);

    /* 將輸入數據轉換爲ucs4_t字符串 */
    ucs4_t * winbuf = utf8_to_ucs4(inbuf, length);
    if (winbuf == (ucs4_t *) -1)
    {
        /* 輸入數據轉換失敗 */
        errnum = OPENCC_ERROR_ENCODIND;
        return (char *) -1;
    }

    /* 設置輸出UTF8文本緩衝區空間 */
    size_t outbuf_len = length;
    size_t outsize = outbuf_len;
    char * original_outbuf = (char *) malloc(sizeof(char) * (outbuf_len + 1));
    char * outbuf = original_outbuf;
    original_outbuf[0] = '\0';

    /* 設置轉換緩衝區空間 */
    size_t wbufsize = length + 64;
    ucs4_t * woutbuf = (ucs4_t *) malloc(sizeof(ucs4_t) * (wbufsize + 1));

    ucs4_t * pinbuf = winbuf;
    ucs4_t * poutbuf = woutbuf;
    size_t inbuf_left, outbuf_left;

    inbuf_left = ucs4len(winbuf);
    outbuf_left = wbufsize;

    while (inbuf_left > 0)
    {
        size_t retval = opencc_convert(t_opencc, &pinbuf, &inbuf_left, &poutbuf, &outbuf_left);
        if (retval == (size_t) -1)
        {
            free(outbuf);
            free(winbuf);
            free(woutbuf);
            return (char *) -1;
        }

        *poutbuf = L'\0';

        char * ubuff = ucs4_to_utf8(woutbuf, (size_t) -1);

        if (ubuff == (char *) -1)
        {
            free(outbuf);
            free(winbuf);
            free(woutbuf);
            errnum = OPENCC_ERROR_ENCODIND;
            return (char *) -1;
        }

        size_t ubuff_len = strlen(ubuff);

        while (ubuff_len > outsize)
        {
            size_t outbuf_offset = outbuf - original_outbuf;
            outsize += outbuf_len;
            outbuf_len += outbuf_len;
            original_outbuf = (char *) realloc(original_outbuf, sizeof(char) * outbuf_len);
            outbuf = original_outbuf + outbuf_offset;
        }

        strncpy(outbuf, ubuff, ubuff_len);
        free(ubuff);

        outbuf += ubuff_len;
        *outbuf = '\0';

        outbuf_left = wbufsize;
        poutbuf = woutbuf;
    }

    free(winbuf);
    free(woutbuf);

    original_outbuf = (char *) realloc(original_outbuf,
                                       sizeof(char) * (strlen(original_outbuf) + 1));

    return original_outbuf;
}
コード例 #11
0
void wxPLDevBase::PSDrawText( PLUNICODE* ucs4, int ucs4Len, bool drawText )
{
  int i = 0;

  char utf8_string[max_string_length];
  char utf8[5];
  memset( utf8_string, '\0', max_string_length );

  /* Get PLplot escape character */
  char plplotEsc;
  plgesc( &plplotEsc );

  /* Get the curent font */
  fontScale = 1.0;
  yOffset = 0.0;
  PLUNICODE fci;
  plgfci( &fci );
  PSSetFont( fci );
  textWidth=0;
  textHeight=0;

  while( i < ucs4Len ) {
    if( ucs4[i] < PL_FCI_MARK ) {	/* not a font change */
      if( ucs4[i] != (PLUNICODE)plplotEsc ) {  /* a character to display */
        ucs4_to_utf8( ucs4[i], utf8 );
        strncat( utf8_string, utf8, max_string_length );
      	i++;
      	continue;
      }
      i++;
      if( ucs4[i] == (PLUNICODE)plplotEsc ) {   /* a escape character to display */
        ucs4_to_utf8( ucs4[i], utf8 );
        strncat( utf8_string, utf8, max_string_length );
        i++;
        continue;
      } else {
      	if( ucs4[i] == (PLUNICODE)'u' ) {	/* Superscript */
          // draw string so far
          PSDrawTextToDC( utf8_string, drawText );

          // change font scale
      		if( yOffset<0.0 )
            fontScale *= 1.25;  /* Subscript scaling parameter */
      		else
            fontScale *= 0.8;  /* Subscript scaling parameter */
          PSSetFont( fci );

      		yOffset += scaley * fontSize * fontScale / 2.;
      	}
      	if( ucs4[i] == (PLUNICODE)'d' ) {	/* Subscript */
          // draw string so far
          PSDrawTextToDC( utf8_string, drawText );

          // change font scale
          double old_fontScale=fontScale;
      		if( yOffset>0.0 )
            fontScale *= 1.25;  /* Subscript scaling parameter */
      		else
            fontScale *= 0.8;  /* Subscript scaling parameter */
          PSSetFont( fci );

      		yOffset -= scaley * fontSize * old_fontScale / 2.;
      	}
      	if( ucs4[i] == (PLUNICODE)'-' ) {	/* underline */
          // draw string so far
          PSDrawTextToDC( utf8_string, drawText );

          underlined = !underlined;
          PSSetFont( fci );
      	}
      	if( ucs4[i] == (PLUNICODE)'+' ) {	/* overline */
          /* not implemented yet */
        }
        i++;
      }
    } else { /* a font change */
      // draw string so far
      PSDrawTextToDC( utf8_string, drawText );

      // get new font
      fci = ucs4[i];
      PSSetFont( fci );
      i++;
    }
  }

  PSDrawTextToDC( utf8_string, drawText );
}