int gb_utf8(const char *gb, char *utf8, size_t cnt) { int wc = 0; /* word count */ char *putf = utf8; uint32_t ucs4; /* UCS-4 data */ uint16_t ucs2; /* UCS-2 data */ uint16_t gb2; /* GB 2-byte data */ size_t max = sizeof(GB_UCS) / 4 - 1; /* high index */ while(cnt > 0) { gb2 = (uint16_t)*gb++; if(gb2 == 0x0000) { break; } else if(gb2 <= 0x007F) { *putf++ = (char)gb2; cnt -= 1; } else { gb2 <<= 8; gb2 |= (uint16_t)(uint8_t)(*gb++); ucs2 = half_search(gb2, DFLT_UCS, max, GB_UCS); ucs4 = (uint32_t)ucs2; ucs4_to_utf8(ucs4, &putf); cnt -= 2; } wc++; } *putf = 0x00; return wc; }
vector<char> ucs4_to_utf8(vector<char_type> const & ucs4str) { if (ucs4str.empty()) return vector<char>(); return ucs4_to_utf8(&ucs4str[0], ucs4str.size()); }
int latin_utf8(const uint8_t *latin, char *utf8, size_t cnt, int coding) { int wc = 0; /* word count */ const uint8_t *platin = latin; uint8_t lt; /* latin data */ char *putf8 = utf8; uint32_t ucs4; /* UCS-4 data */ const uint16_t *tab; /* DVB coding table */ switch(coding) { case CODING_DVB6937 : tab = DVB6937_UCS; break; case CODING_ISO6937 : tab = ISO6937_UCS; break; case CODING_DVB8859_1 : case CODING_ISO8859_1 : tab = ISO8859_1_UCS; break; case CODING_DVB8859_2 : case CODING_ISO8859_2 : tab = ISO8859_2_UCS; break; case CODING_DVB8859_3 : case CODING_ISO8859_3 : tab = ISO8859_3_UCS; break; case CODING_DVB8859_4 : case CODING_ISO8859_4 : tab = ISO8859_4_UCS; break; case CODING_DVB8859_5 : tab = DVB8859_5_UCS; break; case CODING_ISO8859_5 : tab = ISO8859_5_UCS; break; case CODING_DVB8859_6 : case CODING_ISO8859_6 : tab = ISO8859_6_UCS; break; case CODING_DVB8859_7 : /* ISO8859_7 omit 0xA4, 0xA5, 0xAA */ case CODING_ISO8859_7 : tab = ISO8859_7_UCS; break; case CODING_DVB8859_8 : /* ISO8859_8 omit 0xFD, 0xFE */ case CODING_ISO8859_8 : tab = ISO8859_8_UCS; break; case CODING_DVB8859_9 : case CODING_ISO8859_9 : tab = ISO8859_9_UCS; break; case CODING_DVB8859_10: case CODING_ISO8859_10: tab = ISO8859_10_UCS; break; case CODING_DVB8859_11: case CODING_ISO8859_11: tab = ISO8859_11_UCS; break; case CODING_DVB8859_13: case CODING_ISO8859_13: tab = ISO8859_13_UCS; break; case CODING_DVB8859_14: case CODING_ISO8859_14: tab = ISO8859_14_UCS; break; case CODING_DVB8859_15: tab = DVB8859_15_UCS; break; case CODING_ISO8859_15: tab = ISO8859_15_UCS; break; case CODING_ISO8859_16: tab = ISO8859_16_UCS; break; default: tab = ISO8859_15_UCS; break; /* improvement of Latin1 */ } while(cnt > 0) { lt = *platin++; ucs4 = (lt < 0xA0) ? lt : *(tab + lt - 0xA0); if(0x00000000 == ucs4) { break; } ucs4_to_utf8(ucs4, &putf8); cnt--; wc++; } *putf8 = 0x00; return wc; }
/* {{{1 * This function is the same as utf_normalize() except that at most ‘len˚ * bytes are normalized from ‘str’. */ char * utf_normalize_n(const char *str, NormalizeMode mode, size_t len) { unichar *wcs = _utf_normalize_wc(str, len, true, mode); char *utf = ucs4_to_utf8(wcs, NULL, NULL); free(wcs); return utf; }
/* {{{1 * Normalize (compose/decompose) characters in ‘str˚ so that strings that * actually contain the same characters will be recognized as equal for * comparison for example. */ char * utf_normalize(const char *str, NormalizeMode mode) { unichar *wcs = _utf_normalize_wc(str, 0, false, mode); char *utf = ucs4_to_utf8(wcs, NULL, NULL); free(wcs); return utf; }
int utf16_utf8(const uint16_t *utf16, char *utf8, size_t cnt, int endian) { int wc = 0; /* word count */ const uint16_t *putf16 = utf16; char *putf8 = utf8; uint32_t ucs4; /* UCS-4 data */ while(cnt > 0) { utf16_to_ucs4(&putf16, &ucs4, endian); if(0x00000000 == ucs4) { break; } ucs4_to_utf8(ucs4, &putf8); cnt -= 2; wc++; } *putf8 = 0x00; return wc; }
void write_text_file() { FILE * fp; int i; fp = fopen("datrie.txt","w"); fprintf(fp, "%d\n", lexicon_count); for (i = 0; i < lexicon_count; i ++) { char * buff = ucs4_to_utf8(lexicon[i].value, (size_t) -1); fprintf(fp, "%s\n", buff); free(buff); } for (i = 0; i < DATRIE_SIZE; i ++) { if (dat[i].parent != DATRIE_UNUSED) { fprintf(fp,"%d %d %d %d\n", i, dat[i].base, dat[i].parent, dat[i].word); } } fclose(fp); }
static ssize_t console_input_read(struct console_t * console, unsigned char * buf, size_t count) { struct console_input_data_t * dat = (struct console_input_data_t *)console->priv; struct event_t event; u32_t key; u8_t sym[16]; size_t len; if(pump_event(runtime_get()->__event_base, &event) && (event.type == EVENT_TYPE_KEY_DOWN) && (event.device == dat->input)) { key = event.e.key_down.key; switch(key) { case KEY_BACKSPACE: sym[0] = 0x7f; len = 1; break; case KEY_TAB: sym[0] = 0x9; len = 1; break; case KEY_ENTER: sym[0] = 0xd; len = 1; break; case KEY_UP: sym[0] = '\e'; sym[1] = '['; sym[2] = 'A'; len = 3; break; case KEY_DOWN: sym[0] = '\e'; sym[1] = '['; sym[2] = 'B'; len = 3; break; case KEY_LEFT: sym[0] = '\e'; sym[1] = '['; sym[2] = 'D'; len = 3; break; case KEY_RIGHT: sym[0] = '\e'; sym[1] = '['; sym[2] = 'C'; len = 3; break; case KEY_PAGE_UP: sym[0] = '\e'; sym[1] = '['; sym[2] = '5'; sym[3] = '~'; len = 4; break; case KEY_PAGE_DOWN: sym[0] = '\e'; sym[1] = '['; sym[2] = '6'; sym[3] = '~'; len = 4; break; case KEY_HOME: sym[0] = '\e'; sym[1] = '['; sym[2] = '1'; sym[3] = '~'; len = 4; break; case KEY_END: sym[0] = '\e'; sym[1] = '['; sym[2] = '4'; sym[3] = '~'; len = 4; break; case KEY_VOLUME_UP: case KEY_VOLUME_DOWN: case KEY_MENU: case KEY_BACK: case KEY_POWER: len = 0; break; default: ucs4_to_utf8(&key, 1, (char *)sym, sizeof(sym)); len = strlen((const char *)sym); break; } fifo_put(dat->fifo, sym, len); } return fifo_get(dat->fifo, buf, count); }
char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) { if (!lib_initialized) { lib_initialize(); } size_t actual_length = strlen(inbuf); if ((length == (size_t)-1) || (length > actual_length)) { length = actual_length; } ucs4_t* winbuf = utf8_to_ucs4(inbuf, length); if (winbuf == (ucs4_t*)-1) { /* Can not convert input UTF8 to UCS4 */ errnum = OPENCC_ERROR_ENCODING; return (char*)-1; } /* Set up UTF8 buffer */ size_t outbuf_len = length; size_t outsize = outbuf_len; char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1)); char* outbuf = original_outbuf; original_outbuf[0] = '\0'; /* Set conversion buffer */ size_t wbufsize = length + 64; ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1)); ucs4_t* pinbuf = winbuf; ucs4_t* poutbuf = woutbuf; size_t inbuf_left, outbuf_left; inbuf_left = ucs4len(winbuf); outbuf_left = wbufsize; while (inbuf_left > 0) { size_t retval = opencc_convert(t_opencc, &pinbuf, &inbuf_left, &poutbuf, &outbuf_left); if (retval == (size_t)-1) { free(outbuf); free(winbuf); free(woutbuf); return (char*)-1; } *poutbuf = L'\0'; char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1); if (ubuff == (char*)-1) { free(outbuf); free(winbuf); free(woutbuf); errnum = OPENCC_ERROR_ENCODING; return (char*)-1; } size_t ubuff_len = strlen(ubuff); while (ubuff_len > outsize) { size_t outbuf_offset = outbuf - original_outbuf; outsize += outbuf_len; outbuf_len += outbuf_len; original_outbuf = (char*)realloc(original_outbuf, sizeof(char) * outbuf_len); outbuf = original_outbuf + outbuf_offset; } strncpy(outbuf, ubuff, ubuff_len); free(ubuff); outbuf += ubuff_len; *outbuf = '\0'; outbuf_left = wbufsize; poutbuf = woutbuf; } free(winbuf); free(woutbuf); original_outbuf = (char*)realloc(original_outbuf, sizeof(char) * (strlen(original_outbuf) + 1)); return original_outbuf; }
char * opencc_convert_utf8(opencc_t t_opencc, const char * inbuf, size_t length) { if (!lib_initialized) lib_initialize(); if (length == (size_t) -1 || length > strlen(inbuf)) length = strlen(inbuf); /* 將輸入數據轉換爲ucs4_t字符串 */ ucs4_t * winbuf = utf8_to_ucs4(inbuf, length); if (winbuf == (ucs4_t *) -1) { /* 輸入數據轉換失敗 */ errnum = OPENCC_ERROR_ENCODIND; return (char *) -1; } /* 設置輸出UTF8文本緩衝區空間 */ size_t outbuf_len = length; size_t outsize = outbuf_len; char * original_outbuf = (char *) malloc(sizeof(char) * (outbuf_len + 1)); char * outbuf = original_outbuf; original_outbuf[0] = '\0'; /* 設置轉換緩衝區空間 */ size_t wbufsize = length + 64; ucs4_t * woutbuf = (ucs4_t *) malloc(sizeof(ucs4_t) * (wbufsize + 1)); ucs4_t * pinbuf = winbuf; ucs4_t * poutbuf = woutbuf; size_t inbuf_left, outbuf_left; inbuf_left = ucs4len(winbuf); outbuf_left = wbufsize; while (inbuf_left > 0) { size_t retval = opencc_convert(t_opencc, &pinbuf, &inbuf_left, &poutbuf, &outbuf_left); if (retval == (size_t) -1) { free(outbuf); free(winbuf); free(woutbuf); return (char *) -1; } *poutbuf = L'\0'; char * ubuff = ucs4_to_utf8(woutbuf, (size_t) -1); if (ubuff == (char *) -1) { free(outbuf); free(winbuf); free(woutbuf); errnum = OPENCC_ERROR_ENCODIND; return (char *) -1; } size_t ubuff_len = strlen(ubuff); while (ubuff_len > outsize) { size_t outbuf_offset = outbuf - original_outbuf; outsize += outbuf_len; outbuf_len += outbuf_len; original_outbuf = (char *) realloc(original_outbuf, sizeof(char) * outbuf_len); outbuf = original_outbuf + outbuf_offset; } strncpy(outbuf, ubuff, ubuff_len); free(ubuff); outbuf += ubuff_len; *outbuf = '\0'; outbuf_left = wbufsize; poutbuf = woutbuf; } free(winbuf); free(woutbuf); original_outbuf = (char *) realloc(original_outbuf, sizeof(char) * (strlen(original_outbuf) + 1)); return original_outbuf; }
void wxPLDevBase::PSDrawText( PLUNICODE* ucs4, int ucs4Len, bool drawText ) { int i = 0; char utf8_string[max_string_length]; char utf8[5]; memset( utf8_string, '\0', max_string_length ); /* Get PLplot escape character */ char plplotEsc; plgesc( &plplotEsc ); /* Get the curent font */ fontScale = 1.0; yOffset = 0.0; PLUNICODE fci; plgfci( &fci ); PSSetFont( fci ); textWidth=0; textHeight=0; while( i < ucs4Len ) { if( ucs4[i] < PL_FCI_MARK ) { /* not a font change */ if( ucs4[i] != (PLUNICODE)plplotEsc ) { /* a character to display */ ucs4_to_utf8( ucs4[i], utf8 ); strncat( utf8_string, utf8, max_string_length ); i++; continue; } i++; if( ucs4[i] == (PLUNICODE)plplotEsc ) { /* a escape character to display */ ucs4_to_utf8( ucs4[i], utf8 ); strncat( utf8_string, utf8, max_string_length ); i++; continue; } else { if( ucs4[i] == (PLUNICODE)'u' ) { /* Superscript */ // draw string so far PSDrawTextToDC( utf8_string, drawText ); // change font scale if( yOffset<0.0 ) fontScale *= 1.25; /* Subscript scaling parameter */ else fontScale *= 0.8; /* Subscript scaling parameter */ PSSetFont( fci ); yOffset += scaley * fontSize * fontScale / 2.; } if( ucs4[i] == (PLUNICODE)'d' ) { /* Subscript */ // draw string so far PSDrawTextToDC( utf8_string, drawText ); // change font scale double old_fontScale=fontScale; if( yOffset>0.0 ) fontScale *= 1.25; /* Subscript scaling parameter */ else fontScale *= 0.8; /* Subscript scaling parameter */ PSSetFont( fci ); yOffset -= scaley * fontSize * old_fontScale / 2.; } if( ucs4[i] == (PLUNICODE)'-' ) { /* underline */ // draw string so far PSDrawTextToDC( utf8_string, drawText ); underlined = !underlined; PSSetFont( fci ); } if( ucs4[i] == (PLUNICODE)'+' ) { /* overline */ /* not implemented yet */ } i++; } } else { /* a font change */ // draw string so far PSDrawTextToDC( utf8_string, drawText ); // get new font fci = ucs4[i]; PSSetFont( fci ); i++; } } PSDrawTextToDC( utf8_string, drawText ); }