gchar * mc_search__recode_str (const char *str, gsize str_len, const char *charset_from, const char *charset_to, gsize * bytes_written) { gchar *ret; gsize bytes_read; GIConv conv; if (charset_from == NULL || charset_to == NULL || !strcmp (charset_to, charset_from)) { *bytes_written = str_len; return g_strndup (str, str_len); } conv = g_iconv_open (charset_to, charset_from); if (conv == INVALID_CONV) { *bytes_written = str_len; return g_strndup (str, str_len); } ret = g_convert_with_iconv (str, str_len, conv, &bytes_read, bytes_written, NULL); g_iconv_close (conv); if (ret == NULL) { *bytes_written = str_len; return g_strndup (str, str_len); } return ret; }
static GnmValue * qpro_new_string (QProReadState *state, gchar const *data) { return value_new_string_nocopy ( g_convert_with_iconv (data, -1, state->converter, NULL, NULL, NULL)); }
/** * Send a line over an IO Channel * * @param c IO Channel * @param iconv iconv to use, -1 for none * @param l Line * @param error Error */ GIOStatus irc_send_line(GIOChannel *c, GIConv iconv, const struct irc_line *l, GError **error) { char *raw, *cvrt = NULL; GIOStatus ret; gsize bytes_written = 0; g_assert(c); raw = irc_line_string_nl(l); if (iconv != (GIConv)-1) { cvrt = g_convert_with_iconv(raw, -1, iconv, NULL, NULL, error); if (cvrt == NULL) return G_IO_STATUS_ERROR; g_free(raw); } else { cvrt = raw; } ret = g_io_channel_write_chars(c, cvrt, -1, &bytes_written, error); g_free(cvrt); if (ret == G_IO_STATUS_AGAIN) { g_assert(bytes_written == 0); } return ret; }
static tvbuff_t * dissect_cbs_data(guint8 sms_encoding, tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo, guint16 offset ) { tvbuff_t * tvb_out = NULL; guint8 out_len; int length = tvb_length(tvb) - offset; gchar *utf8_text = NULL; static unsigned char msgbuf[1024]; guint8 * input_string = tvb_get_ephemeral_string(tvb, offset, length); GIConv cd; GError *l_conv_error = NULL; switch(sms_encoding){ case SMS_ENCODING_7BIT: case SMS_ENCODING_7BIT_LANG: out_len = gsm_sms_char_7bit_unpack(0, length, sizeof(msgbuf), input_string, msgbuf); msgbuf[out_len] = '\0'; utf8_text = gsm_sms_chars_to_utf8(msgbuf, out_len); tvb_out = tvb_new_child_real_data(tvb, utf8_text, out_len, out_len); add_new_data_source(pinfo, tvb_out, "unpacked 7 bit data"); break; case SMS_ENCODING_8BIT: tvb_out = tvb_new_subset(tvb, offset, length, length); break; case SMS_ENCODING_UCS2: case SMS_ENCODING_UCS2_LANG: if ((cd = g_iconv_open("UTF-8","UCS-2BE")) != (GIConv) -1) { utf8_text = g_convert_with_iconv(input_string, length, cd, NULL, NULL, &l_conv_error); if(!l_conv_error) { tvb_out = tvb_new_subset(tvb, offset, length, length); } else proto_tree_add_text(tree, tvb, offset, length, "CBS String: g_convert_with_iconv FAILED"); g_free(utf8_text); g_iconv_close(cd); } else { proto_tree_add_text(tree, tvb, offset, length, "CBS String: g_iconv_open FAILED contact wireshark"); } break; default: proto_tree_add_text(tree, tvb, offset, length, "Unhandled encoding %d of CBS String", sms_encoding); break; } return tvb_out; }
tvbuff_t * dissect_cbs_data(guint8 sms_encoding, tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo, guint16 offset ) { tvbuff_t * tvb_out = NULL; int length = tvb_length(tvb) - offset; gchar *utf8_text = NULL, *utf8_out; guint8 * input_string; GIConv cd; GError *l_conv_error = NULL; switch(sms_encoding){ case SMS_ENCODING_7BIT: case SMS_ENCODING_7BIT_LANG: utf8_text = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), tvb, offset<<3, (length*8)/7); utf8_out = g_strdup(utf8_text); tvb_out = tvb_new_child_real_data(tvb, utf8_out, (guint)strlen(utf8_out), (guint)strlen(utf8_out)); tvb_set_free_cb(tvb_out, g_free); add_new_data_source(pinfo, tvb_out, "unpacked 7 bit data"); break; case SMS_ENCODING_8BIT: tvb_out = tvb_new_subset(tvb, offset, length, length); break; case SMS_ENCODING_UCS2: case SMS_ENCODING_UCS2_LANG: input_string = tvb_get_string(wmem_packet_scope(), tvb, offset, length); if ((cd = g_iconv_open("UTF-8","UCS-2BE")) != (GIConv) -1) { utf8_text = g_convert_with_iconv(input_string, length, cd, NULL, NULL, &l_conv_error); if(!l_conv_error) { tvb_out = tvb_new_subset(tvb, offset, length, length); } else proto_tree_add_text(tree, tvb, offset, length, "CBS String: g_convert_with_iconv FAILED"); g_free(utf8_text); g_iconv_close(cd); } else { proto_tree_add_text(tree, tvb, offset, length, "CBS String: g_iconv_open FAILED contact wireshark"); } break; default: proto_tree_add_text(tree, tvb, offset, length, "Unhandled encoding %d of CBS String", sms_encoding); break; } return tvb_out; }
static gboolean dif_get_line (DifInputContext *ctxt) { char *raw; if (NULL == (raw = gsf_input_textline_ascii_gets (ctxt->input))) return FALSE; g_free (ctxt->line); ctxt->line = g_convert_with_iconv (raw, -1, ctxt->converter, NULL, &ctxt->line_len, NULL); ctxt->line_no++; return ctxt->line != NULL; }
gint rlib_charencoder_convert(GIConv converter, gchar **inbuf, gsize *inbytes_left, gchar **outbuf, gsize *outbytes_left) { #ifdef DISABLE_UTF8 /* The strlen is passed in here so we bump it by 1 */ *outbuf = g_strdup(*inbuf); return 0; #else if((converter == (GIConv) -1) || (converter == (GIConv) 0)) { *outbuf = g_strdup(*inbuf); return 1; } else { *outbuf = g_convert_with_iconv(*inbuf, strlen(*inbuf), converter, inbytes_left, outbytes_left, NULL); return *outbuf ? 0 : -1; } #endif }
static char translate_character (GIConv cd, char c) { gchar *tmp_buff = NULL; gsize bytes_read, bytes_written = 0; const char *ibuf = &c; char ch = UNKNCHAR; int ibuflen = 1; tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL); if (tmp_buff) ch = tmp_buff[0]; g_free (tmp_buff); return ch; }
static void sylk_parse_sheet (SylkReader *state) { char *buf, *utf8buf; gsize utf8_len; while (!state->finished && (buf = gsf_input_textline_ascii_gets (state->input)) != NULL) { g_strchomp (buf); utf8buf = g_convert_with_iconv (buf, -1, state->converter, NULL, &utf8_len, NULL); state->line_no++; sylk_parse_line (state, utf8buf, utf8_len); g_free (utf8buf); } if (!state->finished) sylk_read_warning (state, _("Missing closing 'E'")); }
gchar* g_convert (const gchar *str, gssize len, const gchar *to_codeset, const gchar *from_codeset, gsize *bytes_read, gsize *bytes_written, GError **error) { gchar *res; #ifdef HAVE_ICONV_H GIConv cd; g_return_val_if_fail (str != NULL, NULL); g_return_val_if_fail (to_codeset != NULL, NULL); g_return_val_if_fail (from_codeset != NULL, NULL); cd = open_converter (to_codeset, from_codeset, error); if (cd == (GIConv) -1) { if (bytes_read) *bytes_read = 0; if (bytes_written) *bytes_written = 0; return NULL; } res = g_convert_with_iconv (str, len, cd, bytes_read, bytes_written, error); g_iconv_close (cd); #else res = g_strdup(str); #endif return res; }
CString TextCodecGtk::encode(const UChar* characters, size_t length, UnencodableHandling handling) { if (!length) return ""; if (m_iconvEncoder == reinterpret_cast<GIConv>(-1)) createIConvEncoder(); if (m_iconvEncoder == reinterpret_cast<GIConv>(-1)) return CString(); size_t count; GOwnPtr<GError> err; GOwnPtr<char> buffer; buffer.outPtr() = g_convert_with_iconv(reinterpret_cast<const char*>(characters), length * sizeof(UChar), m_iconvEncoder, 0, &count, &err.outPtr()); if (err) { LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message); return CString(); } return CString(buffer.get(), count); }
GIOStatus irc_recv_line(GIOChannel *c, GIConv iconv, GError **error, struct irc_line **l) { gchar *raw = NULL, *cvrt = NULL; GIOStatus status; gsize in_len; g_assert(l != NULL); *l = NULL; g_assert(c); status = g_io_channel_read_line(c, &raw, &in_len, NULL, error); if (status != G_IO_STATUS_NORMAL) { g_free(raw); return status; } if (iconv == (GIConv)-1) { cvrt = raw; } else { cvrt = g_convert_with_iconv(raw, -1, iconv, NULL, NULL, error); if (cvrt == NULL) { cvrt = raw; status = G_IO_STATUS_ERROR; } else { g_free(raw); } } *l = irc_parse_line(cvrt); g_free(cvrt); return status; }
String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) { // Get a converter for the passed-in encoding. if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) { createIConvDecoder(); ASSERT(m_iconvDecoder != reinterpret_cast<GIConv>(-1)); if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) { LOG_ERROR("Error creating IConv encoder even though encoding was in table."); return String(); } } size_t countWritten, countRead, conversionLength; const char* conversionBytes; char* prefixedBytes = 0; if (m_numBufferedBytes) { conversionLength = length + m_numBufferedBytes; prefixedBytes = static_cast<char*>(fastMalloc(conversionLength)); memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes); memcpy(prefixedBytes + m_numBufferedBytes, bytes, length); conversionBytes = prefixedBytes; // all buffered bytes are consumed now m_numBufferedBytes = 0; } else { // no previously buffered partial data, // just convert the data that was passed in conversionBytes = bytes; conversionLength = length; } GOwnPtr<GError> err; GOwnPtr<UChar> buffer; buffer.outPtr() = reinterpret_cast<UChar*>(g_convert_with_iconv(conversionBytes, conversionLength, m_iconvDecoder, &countRead, &countWritten, &err.outPtr())); if (err) { LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message); m_numBufferedBytes = 0; // reset state for subsequent calls to decode fastFree(prefixedBytes); sawError = true; return String(); } // Partial input at the end of the string may not result in an error being raised. // From the gnome library documentation on g_convert_with_iconv: // "Even if the conversion was successful, this may be less than len if there were partial characters at the end of the input." // That's why we need to compare conversionLength against countRead m_numBufferedBytes = conversionLength - countRead; if (m_numBufferedBytes > 0) { if (flush) { LOG_ERROR("Partial bytes at end of input while flush requested."); m_numBufferedBytes = 0; // reset state for subsequent calls to decode fastFree(prefixedBytes); sawError = true; return String(); } memcpy(m_bufferedBytes, conversionBytes + countRead, m_numBufferedBytes); } fastFree(prefixedBytes); Vector<UChar> result; result.append(buffer.get(), countWritten / sizeof(UChar)); return String::adopt(result); }
void build_zip(const gchar *dirname, const gchar *zipfilename) { gchar sutra_name[256]; strcpy(sutra_name, zipfilename); gchar *p; p = strrchr(sutra_name, '.'); if (p) { *p = '\0'; } GIConv locale_converter; locale_converter = g_iconv_open("gb18030","UTF-8"); gchar *locale_sutra_name; locale_sutra_name = g_convert_with_iconv(sutra_name,-1,locale_converter,NULL,NULL,NULL); g_iconv_close(locale_converter); gchar *quotefilename; std::string cmd; cmd = "mv "; quotefilename = g_shell_quote(dirname); cmd += quotefilename; g_free(quotefilename); cmd += '/'; quotefilename = g_shell_quote(zipfilename); cmd += quotefilename; g_free(quotefilename); cmd += ' '; quotefilename = g_shell_quote(dirname); cmd += quotefilename; g_free(quotefilename); cmd += "/txt"; system(cmd.c_str()); cmd = "unzip "; quotefilename = g_shell_quote(dirname); cmd += quotefilename; g_free(quotefilename); cmd += "/txt/"; quotefilename = g_shell_quote(zipfilename); cmd += quotefilename; g_free(quotefilename); cmd += " -d "; quotefilename = g_shell_quote(dirname); cmd += quotefilename; g_free(quotefilename); cmd += "/txt"; system(cmd.c_str()); cmd = "mv "; quotefilename = g_shell_quote(dirname); cmd += quotefilename; g_free(quotefilename); cmd += "/txt/"; quotefilename = g_shell_quote(zipfilename); cmd += quotefilename; g_free(quotefilename); cmd += ' '; quotefilename = g_shell_quote(dirname); cmd += quotefilename; g_free(quotefilename); system(cmd.c_str()); std::string fullfilename; fullfilename = dirname; fullfilename += "/txt"; GDir *dir = g_dir_open(fullfilename.c_str(), 0, NULL); const gchar *txtfilename; while ((txtfilename = g_dir_read_name(dir))!=NULL) { fullfilename = dirname; fullfilename += "/txt/"; fullfilename += txtfilename; if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_REGULAR)) { convert_txt(dirname, txtfilename, locale_sutra_name); } } g_dir_close(dir); g_free(locale_sutra_name); }
static gboolean handle_client_detect(GIOChannel *ioc, struct pending_client *pc) { GIOStatus status; gsize read; gchar header[2]; GError *error = NULL; status = g_io_channel_read_chars(ioc, header, 1, &read, &error); if (status != G_IO_STATUS_NORMAL && status != G_IO_STATUS_AGAIN) { if (error != NULL) g_error_free(error); return FALSE; } if (header[0] == SOCKS_VERSION) { listener_log(LOG_TRACE, pc->listener, "Detected SOCKS."); pc->type = CLIENT_TYPE_SOCKS; pc->socks.state = SOCKS_STATE_NEW; return TRUE; } else { struct irc_line *l = NULL; gchar *raw = NULL, *cvrt = NULL; gchar *complete; GIOStatus status; gboolean ret; gsize in_len; pc->type = CLIENT_TYPE_REGULAR; g_assert(ioc != NULL); status = g_io_channel_read_line(ioc, &raw, &in_len, NULL, &error); if (status != G_IO_STATUS_NORMAL) { g_free(raw); g_error_free(error); return status; } complete = g_malloc(in_len+2); complete[0] = header[0]; memcpy(complete+1, raw, in_len); complete[in_len+1] = '\0'; g_free(raw); if (pc->listener->iconv == (GIConv)-1) { cvrt = complete; } else { cvrt = g_convert_with_iconv(complete, -1, pc->listener->iconv, NULL, NULL, &error); if (cvrt == NULL) { cvrt = complete; status = G_IO_STATUS_ERROR; if (error != NULL) g_error_free(error); } else { g_free(complete); } } l = irc_parse_line(cvrt); ret = pc->listener->ops->handle_client_line(pc, l); free_line(l); g_free(cvrt); return ret; } }
void convert(char *filename,char *idxheadfilename) { struct stat stats; if (stat (idxheadfilename, &stats) == -1) { printf("idxhead file not exist!\n"); return; } FILE *idxheadfile; idxheadfile = fopen(idxheadfilename,"r"); gchar *buffer; buffer = (gchar *)g_malloc (stats.st_size + 1); size_t fread_size; fread_size = fread (buffer, 1, stats.st_size, idxheadfile); if (fread_size != (size_t)stats.st_size) { g_print("fread error!\n"); } fclose (idxheadfile); buffer[stats.st_size] = '\0'; //gboolean sametypesequence = FALSE; //if (strstr(buffer,"sametypesequence=")) //sametypesequence = TRUE; //in the next code we will always treat sametypesequence to be TRUE. //as now all old stardict dictionaries use these two feature. FILE *idxfile,*dicfile; gchar str[256],basename[256]; strcpy(basename,idxheadfilename); basename[strlen(idxheadfilename)-8]='\0'; sprintf(str,"%s.idx",basename); idxfile = fopen(str,"w"); sprintf(str,"%s.dict",basename); dicfile = fopen(str,"w"); fwrite(buffer, 1, stats.st_size, idxfile); g_free(buffer); long wordcount_offset = ftell(idxfile); glong tmpglong=0; fwrite(&(tmpglong),sizeof(glong),1,idxfile); int fd=open(filename,O_RDONLY); if(fd==-1) { g_print("open fail\n"); return; } // get length of dicfile. struct stat stStat; if(fstat(fd,&stStat)!=0) { g_print("stat fail\n"); return; } int iFileSize=stStat.st_size; // get item count lseek(fd,0-sizeof(int)*2,SEEK_END); unsigned int iCapacity,iStyle; ssize_t read_size; read_size = read(fd,&iCapacity,sizeof(int)); if (read_size == -1) { g_print("read() error!\n"); } read_size = read(fd,&iStyle,sizeof(int)); if (read_size == -1) { g_print("read() error!\n"); } //disable the next two line when the convert file is from the same arch machine. #ifndef DISABLE_CONVERT_ENDIAN vConvertEndian(&iCapacity); vConvertEndian(&iStyle); #endif unsigned char cIndex=(unsigned char)(iStyle>>24); unsigned char cWord=(unsigned char)(iStyle>>16); unsigned char cMeaning=(unsigned char)(iStyle>>8); unsigned char cMark=(unsigned char)iStyle; g_print("flag: %c %c %c\n",cIndex,cWord,cMeaning); // mmap the file to memory caddr_t pFileMem=(caddr_t)mmap( (caddr_t)0,iFileSize-sizeof(int)*2, PROT_READ,MAP_SHARED|MAP_NORESERVE,fd,0 ); if(pFileMem==MAP_FAILED) { g_print("mmap fail\n"); return; } // begin to read items. caddr_t p=pFileMem; caddr_t pMeaning, pMark; gchar *utf8_str; #ifndef DISABLE_CONVERT_LOCALE gchar *locale_str; gsize locale_write_size; GIConv locale_converter; GIConv utf8_converter; // locale_converter = g_iconv_open("GB2312","BIG5"); // utf8_converter = g_iconv_open("UTF-8","GB2312"); locale_converter = g_iconv_open("BIG5","GB2312"); utf8_converter = g_iconv_open("UTF-8","BIG5"); //the locale_converter have no problem!but why it fail later? /* locale_str = g_convert_with_iconv("�~�^���",8,locale_converter,NULL,&locale_write_size,NULL); if (!locale_str) { g_print("convert fail\n"); } else printf("%s",locale_str); return;*/ #endif gsize write_size; long tmp_long,wordcount=0; int word_len, meaning_len,mark_len=0; gulong iLength=0; GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),iCapacity); struct _worditem worditem; glong old_size; while(p<pFileMem+iFileSize-sizeof(int)*2 && iLength<iCapacity) { iLength++; word_len = strlen(p); #ifndef DISABLE_CONVERT_LOCALE locale_str = g_convert_with_iconv(p,word_len,locale_converter,NULL,&locale_write_size,NULL); if (locale_str) { utf8_str = g_convert_with_iconv(locale_str,locale_write_size,utf8_converter,NULL,NULL,NULL); g_free(locale_str); } else { printf("%s convert to other locale error!!!\n",p); utf8_str = NULL; } #else utf8_str = g_locale_to_utf8(p,word_len,NULL,NULL,NULL); #endif if (utf8_str) g_strstrip(utf8_str); if (!utf8_str || (*utf8_str=='\0')) { printf("%s convert to utf8 error!!!\n",p); pMeaning=p+word_len+1; meaning_len = strlen(pMeaning); if ( !cMark ) p = pMeaning+meaning_len+1; else { pMark = pMeaning+meaning_len+1; mark_len = strlen(pMark); p = pMark+mark_len+1; } if (utf8_str) g_free(utf8_str); continue; } worditem.word = utf8_str; //utf8_str will be free at last. worditem.data = NULL; worditem.datasize = 0; old_size=0; pMeaning=p+word_len+1; meaning_len = strlen(pMeaning); if ( !cMark ) // no Mark field, eg py2gb pMark = NULL; else { pMark = pMeaning+meaning_len+1; mark_len = strlen(pMark); #ifndef DISABLE_CONVERT_LOCALE locale_str = g_convert_with_iconv(pMark,mark_len,locale_converter,NULL,&locale_write_size,NULL); if (locale_str) { utf8_str = g_convert_with_iconv(locale_str,locale_write_size,utf8_converter,NULL,&write_size,NULL); g_free(locale_str); } else { printf("%s convert to other locale error!!!\n",pMark); utf8_str = NULL; } #else utf8_str = g_locale_to_utf8(pMark,mark_len,NULL,&write_size,NULL); //mark may contains Chinese too,ie. "not" #endif if (utf8_str) { gchar *p_str = to_utf8_phonetic(utf8_str, write_size); g_free(utf8_str); write_size = strlen(p_str); worditem.datasize += (write_size + 1); worditem.data = (gchar *)g_realloc(worditem.data,worditem.datasize); /*if ((cWord == LIB_WORD_GB) || (cWord == LIB_WORD_BIG5)) memcpy(worditem.data+old_size,"Y",sizeof(gchar)); // Chinese Yin Biao else memcpy(worditem.data+old_size,"T",sizeof(gchar)); // English Phonetic old_size+=sizeof(gchar); tmp_long = write_size; memcpy(worditem.data+old_size,&tmp_long,sizeof(glong)); old_size+=sizeof(glong);*/ memcpy(worditem.data+old_size,p_str,write_size+1); old_size+= (write_size+1); g_free(p_str); } else { worditem.datasize += 1; worditem.data = (gchar *)g_realloc(worditem.data,worditem.datasize); memcpy(worditem.data+old_size,"", 1); printf("%s 's mark convert to utf8 error!\n",p); } } #ifndef DISABLE_CONVERT_LOCALE locale_str = g_convert_with_iconv(pMeaning, meaning_len,locale_converter,NULL,&locale_write_size,NULL); if (locale_str) { utf8_str = g_convert_with_iconv(locale_str,locale_write_size,utf8_converter,NULL,&write_size,NULL); g_free(locale_str); } else { printf("%s convert to other locale error!!!\n",pMeaning); utf8_str = NULL; } #else utf8_str = g_locale_to_utf8(pMeaning,meaning_len,NULL,&write_size,NULL); #endif if (utf8_str) { tmp_long = write_size; } else { printf("%s 's meaning convert to utf8 error!\n",p); tmp_long = 1; utf8_str = g_strdup(""); } worditem.datasize += tmp_long; worditem.data = (gchar *)g_realloc(worditem.data,worditem.datasize); /*memcpy(worditem.data+old_size,"M",sizeof(gchar)); old_size+=sizeof(gchar); memcpy(worditem.data+old_size,&tmp_long,sizeof(glong)); old_size+=sizeof(glong);*/ memcpy(worditem.data+old_size,utf8_str,tmp_long); old_size+= tmp_long; g_free(utf8_str); g_array_append_val(array, worditem); if ( !cMark ) p = pMeaning+meaning_len+1; else p = pMark+mark_len+1; wordcount++; } #ifndef DISABLE_CONVERT_LOCALE g_iconv_close(locale_converter); g_iconv_close(utf8_converter); #endif //g_qsort_with_data(parray->pdata,parray->len,sizeof(gpointer),comparefunc,NULL); g_array_sort(array,comparefunc); long offset_old=0; gulong i; gchar *previous_word = g_strdup(""); //there should have no word equal this. glong previous_datasize = 0; gchar *previous_data = g_strdup(""); struct _worditem *pworditem; for (i=0;i<array->len;i++) { pworditem = &g_array_index(array, struct _worditem, i); // should use g_ascii_strcasecmp() ?? if (strcmp(previous_word,pworditem->word)==0) { if ((previous_datasize == pworditem->datasize)&& (memcmp(previous_data,pworditem->data,previous_datasize)==0)) { g_print("word %s is complete duplicated! droped!\n" ,previous_word); wordcount--; continue; } else { g_print("word %s is duplicated! droped!\n" ,previous_word); wordcount--; continue; //i don't want to emerge here,it is a bad work! /* g_print("word %s is duplicated! merged!\n" ,previous_word); fseek(dicfile,-sizeof(glong),SEEK_CUR); pworditem->datasize += previous_datasize; fwrite(&(pworditem->datasize),sizeof(glong),1,idxfile); fwrite(pworditem->data,sizeof(gchar),pworditem->datasize,dicfile); */ } } g_free(previous_word); g_free(previous_data); previous_word = pworditem->word; previous_datasize = pworditem->datasize; previous_data = pworditem->data; offset_old = ftell(dicfile); fwrite(pworditem->data,sizeof(gchar),pworditem->datasize,dicfile); fwrite(pworditem->word,sizeof(gchar),strlen(pworditem->word)+1,idxfile); tmpglong = g_htonl(offset_old); fwrite(&(tmpglong),sizeof(glong),1,idxfile); tmpglong = g_htonl(pworditem->datasize); fwrite(&(tmpglong),sizeof(glong),1,idxfile); } g_free(previous_word); g_free(previous_data); g_array_free(array,TRUE); fseek(idxfile,wordcount_offset,SEEK_SET); tmpglong = g_htonl(wordcount); fwrite(&(tmpglong),sizeof(glong),1,idxfile); g_print("old wordcount: %d\n",iCapacity); g_print("wordcount: %ld\n",wordcount); close(fd); fclose(idxfile); fclose(dicfile); }
/** * Converts a given string using the given iconv converter. This is similar to g_convert_with_fallback, except that it is tolerant of sequences in * the original input that are invalid even in from_encoding. g_convert_with_fallback fails for such text, whereas this function replaces such a * sequence with the fallback string. * * If len is -1, strlen(text) is used to calculate the length. Do not pass -1 if text is supposed to contain \0 bytes, such as if from_encoding is a * multi-byte encoding like UTF-16. */ gchar * text_convert_invalid (const gchar* text, gssize len, GIConv converter, const gchar *fallback, gsize *len_out) { gchar *result_part; gsize result_part_len; const gchar *end; gsize invalid_start_pos; GString *result; const gchar *current_start; if (len == -1) { len = strlen (text); } end = text + len; /* Find the first position of an invalid sequence. */ result_part = g_convert_with_iconv (text, len, converter, &invalid_start_pos, &result_part_len, NULL); g_iconv (converter, NULL, NULL, NULL, NULL); if (result_part != NULL) { /* All text converted successfully on the first try. Return it. */ if (len_out != NULL) { *len_out = result_part_len; } return result_part; } /* One or more invalid sequences exist that need to be replaced with the fallback. */ result = g_string_sized_new (len); current_start = text; for (;;) { g_assert (current_start + invalid_start_pos < end); /* Convert everything before the position of the invalid sequence. It should be successful. * But iconv may not convert everything till invalid_start_pos since the last few bytes may be part of a shift sequence. * So get the new bytes_read and use it as the actual invalid_start_pos to handle this. * * See https://github.com/hexchat/hexchat/issues/1758 */ result_part = g_convert_with_iconv (current_start, invalid_start_pos, converter, &invalid_start_pos, &result_part_len, NULL); g_iconv (converter, NULL, NULL, NULL, NULL); g_assert (result_part != NULL); g_string_append_len (result, result_part, result_part_len); g_free (result_part); /* Append the fallback */ g_string_append (result, fallback); /* Now try converting everything after the invalid sequence. */ current_start += invalid_start_pos + 1; result_part = g_convert_with_iconv (current_start, end - current_start, converter, &invalid_start_pos, &result_part_len, NULL); g_iconv (converter, NULL, NULL, NULL, NULL); if (result_part != NULL) { /* The rest of the text converted successfully. Append it and return the whole converted text. */ g_string_append_len (result, result_part, result_part_len); g_free (result_part); if (len_out != NULL) { *len_out = result->len; } return g_string_free (result, FALSE); } /* The rest of the text didn't convert successfully. invalid_start_pos has the position of the next invalid sequence. */ } }
static void entry_on_text_changed (GtkEditable * editable, gpointer userdata) { HildonTouchSelector *selector; HildonTouchSelectorEntryPrivate *priv; GtkTreeModel *model; GtkTreeIter iter; GtkTreeIter iter_suggested; GtkEntry *entry; const gchar *prefix; gchar *text; gboolean found = FALSE; gint text_column = -1; gchar *ascii_prefix; gint prefix_len; gboolean found_suggestion = FALSE; entry = GTK_ENTRY (editable); selector = HILDON_TOUCH_SELECTOR (userdata); priv = HILDON_TOUCH_SELECTOR_ENTRY_GET_PRIVATE (selector); text_column = hildon_touch_selector_entry_get_text_column (HILDON_TOUCH_SELECTOR_ENTRY (selector)); prefix = gtk_entry_get_text (entry); if (prefix[0] == '\0') { return; } model = hildon_touch_selector_get_model (selector, 0); if (!gtk_tree_model_get_iter_first (model, &iter)) { return; } if (priv->smart_match) { ascii_prefix = g_convert_with_iconv (prefix, -1, priv->converter, NULL, NULL, NULL); prefix_len = strlen (ascii_prefix); } do { gtk_tree_model_get (model, &iter, text_column, &text, -1); found = g_str_has_prefix (text, prefix); if (!found && !found_suggestion && priv->smart_match) { gchar *ascii_text = g_convert_with_iconv (text, -1, priv->converter, NULL, NULL, NULL); found_suggestion = !g_ascii_strncasecmp (ascii_text, ascii_prefix, prefix_len); if (found_suggestion) { iter_suggested = iter; } g_free (ascii_text); } g_free (text); } while (found != TRUE && gtk_tree_model_iter_next (model, &iter)); g_signal_handler_block (selector, priv->signal_id); { /* We emit the HildonTouchSelector::changed signal because a change in the GtkEntry represents a change in current selection, and therefore, users should be notified. */ if (found) { hildon_touch_selector_select_iter (selector, 0, &iter, TRUE); } else if (found_suggestion) { hildon_touch_selector_select_iter (selector, 0, &iter_suggested, TRUE); } g_signal_emit_by_name (selector, "changed", 0); } g_signal_handler_unblock (selector, priv->signal_id); if (priv->smart_match) { g_free (ascii_prefix); } }
void irc_process(irc_t *irc) { char **lines, *temp, **cmd; int i; if (irc->readbuffer != NULL) { lines = irc_splitlines(irc->readbuffer); for (i = 0; *lines[i] != '\0'; i++) { char *conv = NULL; /* [WvG] If the last line isn't empty, it's an incomplete line and we should wait for the rest to come in before processing it. */ if (lines[i + 1] == NULL) { temp = g_strdup(lines[i]); g_free(irc->readbuffer); irc->readbuffer = temp; i++; break; } if (irc->iconv != (GIConv) - 1) { gsize bytes_read, bytes_written; conv = g_convert_with_iconv(lines[i], -1, irc->iconv, &bytes_read, &bytes_written, NULL); if (conv == NULL || bytes_read != strlen(lines[i])) { /* GLib can do strange things if things are not in the expected charset, so let's be a little bit paranoid here: */ if (irc->status & USTATUS_LOGGED_IN) { irc_rootmsg(irc, "Error: Charset mismatch detected. The charset " "setting is currently set to %s, so please make " "sure your IRC client will send and accept text in " "that charset, or tell BitlBee which charset to " "expect by changing the charset setting. See " "`help set charset' for more information. Your " "message was ignored.", set_getstr(&irc->b->set, "charset")); g_free(conv); conv = NULL; } else { irc_write(irc, ":%s NOTICE * :%s", irc->root->host, "Warning: invalid characters received at login time."); conv = g_strdup(lines[i]); for (temp = conv; *temp; temp++) { if (*temp & 0x80) { *temp = '?'; } } } } lines[i] = conv; } if (lines[i] && (cmd = irc_parse_line(lines[i]))) { irc_exec(irc, cmd); g_free(cmd); } g_free(conv); /* Shouldn't really happen, but just in case... */ if (!g_slist_find(irc_connection_list, irc)) { g_free(lines); return; } } if (lines[i] != NULL) { g_free(irc->readbuffer); irc->readbuffer = NULL; } g_free(lines); } }
/* * @text: The text to convert. It may include pango markup (<b> and </b>) * @length: The length of the string -1 if it's nul-terminated * @start: Where to start converting in the text * @encoding: The encoding of text * @is_multibyte: Whether the encoding is a multibyte encoding * @error: The location to store the error, or NULL to ignore errors * @returns: UTF-8 encoded string * * Convert text to UTF-8. */ static gchar * convert_to_utf8 (const gchar * text, gint length, guint start, GIConv giconv, gboolean is_multibyte, GError ** error) { gchar *new_text; gchar *tmp, *pos; gint i; text += start; pos = tmp = g_malloc (length * 2); if (is_multibyte) { if (length == -1) { while (*text != '\0') { guint16 code = GST_READ_UINT16_BE (text); switch (code) { case 0xE086: /* emphasis on */ case 0xE087: /* emphasis off */ /* skip it */ break; case 0xE08A:{ pos[0] = 0x00; /* 0x00 0x0A is a new line */ pos[1] = 0x0A; pos += 2; break; } default: pos[0] = text[0]; pos[1] = text[1]; pos += 2; break; } text += 2; } } else { for (i = 0; i < length; i += 2) { guint16 code = GST_READ_UINT16_BE (text); switch (code) { case 0xE086: /* emphasis on */ case 0xE087: /* emphasis off */ /* skip it */ break; case 0xE08A:{ pos[0] = 0x00; /* 0x00 0x0A is a new line */ pos[1] = 0x0A; pos += 2; break; } default: pos[0] = text[0]; pos[1] = text[1]; pos += 2; break; } text += 2; } } } else { if (length == -1) { while (*text != '\0') { guint8 code = (guint8) (*text); switch (code) { case 0x86: /* emphasis on */ case 0x87: /* emphasis off */ /* skip it */ break; case 0x8A: *pos = '\n'; pos += 1; break; default: *pos = *text; pos += 1; break; } text++; } } else { for (i = 0; i < length; i++) { guint8 code = (guint8) (*text); switch (code) { case 0x86: /* emphasis on */ case 0x87: /* emphasis off */ /* skip it */ break; case 0x8A: *pos = '\n'; pos += 1; break; default: *pos = *text; pos += 1; break; } text++; } } } if (pos > tmp) { gsize bread = 0; new_text = g_convert_with_iconv (tmp, pos - tmp, giconv, &bread, NULL, error); GST_DEBUG ("Converted to : %s", new_text); } else { new_text = g_strdup (""); } g_free (tmp); return new_text; }
static estr_t _str_convert (GIConv coder, const char *string, int size, GString * buffer) { estr_t state = ESTR_SUCCESS; gchar *tmp_buff = NULL; gssize left; gsize bytes_read = 0; gsize bytes_written = 0; GError *error = NULL; errno = 0; if (coder == INVALID_CONV) return ESTR_FAILURE; if (string == NULL || buffer == NULL) return ESTR_FAILURE; /* if (! used_class.is_valid_string (string)) { return ESTR_FAILURE; } */ if (size < 0) { size = strlen (string); } else { left = strlen (string); if (left < size) size = left; } left = size; g_iconv (coder, NULL, NULL, NULL, NULL); while (left) { tmp_buff = g_convert_with_iconv ((const gchar *) string, left, coder, &bytes_read, &bytes_written, &error); if (error) { int code = error->code; g_error_free (error); error = NULL; switch (code) { case G_CONVERT_ERROR_NO_CONVERSION: /* Conversion between the requested character sets is not supported. */ tmp_buff = g_strnfill (strlen (string), '?'); g_string_append (buffer, tmp_buff); g_free (tmp_buff); return ESTR_FAILURE; case G_CONVERT_ERROR_ILLEGAL_SEQUENCE: /* Invalid byte sequence in conversion input. */ if ((tmp_buff == NULL) && (bytes_read != 0)) /* recode valid byte sequence */ tmp_buff = g_convert_with_iconv ((const gchar *) string, bytes_read, coder, NULL, NULL, NULL); if (tmp_buff != NULL) { g_string_append (buffer, tmp_buff); g_free (tmp_buff); } if ((int) bytes_read < left) { string += bytes_read + 1; size -= (bytes_read + 1); left -= (bytes_read + 1); g_string_append_c (buffer, *(string - 1)); } else { return ESTR_PROBLEM; } state = ESTR_PROBLEM; break; case G_CONVERT_ERROR_PARTIAL_INPUT: /* Partial character sequence at end of input. */ g_string_append (buffer, tmp_buff); g_free (tmp_buff); if ((int) bytes_read < left) { left = left - bytes_read; tmp_buff = g_strnfill (left, '?'); g_string_append (buffer, tmp_buff); g_free (tmp_buff); } return ESTR_PROBLEM; case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */ case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */ case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */ default: g_free (tmp_buff); return ESTR_FAILURE; } } else { if (tmp_buff != NULL) { if (*tmp_buff) { g_string_append (buffer, tmp_buff); g_free (tmp_buff); string += bytes_read; left -= bytes_read; } else { g_free (tmp_buff); g_string_append (buffer, string); return state; } } else { g_string_append (buffer, string); return ESTR_PROBLEM; } } } return state; }
/** * dvb_text_from_utf8: * @text: The text to convert. This should be in UTF-8 format * @out_size: (out): the byte length of the new text * * Converts UTF-8 strings to text characters compliant with EN 300 468. * The converted text can be used directly in DVB #GstMpegtsDescriptor * * The function will try different character maps until the string is * completely converted. * * The function tries the default ISO 6937 character map first. * * If no character map that contains all characters could be found, the * string is converted to ISO 6937 with unknown characters set to `?`. * * Returns: (transfer full): byte array of size @out_size */ guint8 * dvb_text_from_utf8 (const gchar * text, gsize * out_size) { GError *error = NULL; gchar *out_text; guint8 *out_buffer; guint encoding; GIConv giconv = (GIConv) - 1; /* We test character maps one-by-one. Start with the default */ encoding = _ICONV_ISO6937; giconv = _get_iconv (_ICONV_UTF8, encoding); out_text = g_convert_with_iconv (text, -1, giconv, NULL, out_size, &error); if (out_text) { GST_DEBUG ("Using default ISO6937 encoding"); goto out; } g_clear_error (&error); for (encoding = _ICONV_ISO8859_1; encoding <= _ICONV_ISO10646_UTF8; encoding++) { giconv = _get_iconv (_ICONV_UTF8, encoding); if (giconv == (GIConv) - 1) continue; out_text = g_convert_with_iconv (text, -1, giconv, NULL, out_size, &error); if (out_text) { GST_DEBUG ("Found suitable character map - %s", iconvtablename[encoding]); goto out; } g_clear_error (&error); } out_text = g_convert_with_fallback (text, -1, iconvtablename[_ICONV_ISO6937], iconvtablename[_ICONV_UTF8], "?", NULL, out_size, &error); out: if (error) { GST_WARNING ("Could not convert from utf-8: %s", error->message); g_error_free (error); g_free (out_text); return NULL; } switch (encoding) { case _ICONV_ISO6937: /* Default encoding contains no selection bytes. */ _encode_control_codes (out_text, *out_size, FALSE); return (guint8 *) out_text; case _ICONV_ISO8859_1: case _ICONV_ISO8859_2: case _ICONV_ISO8859_3: case _ICONV_ISO8859_4: /* These character sets requires 3 selection bytes */ _encode_control_codes (out_text, *out_size, FALSE); out_buffer = g_malloc (*out_size + 3); out_buffer[0] = 0x10; out_buffer[1] = 0x00; out_buffer[2] = encoding - _ICONV_ISO8859_1 + 1; memcpy (out_buffer + 3, out_text, *out_size); *out_size += 3; g_free (out_text); return out_buffer; case _ICONV_ISO8859_5: case _ICONV_ISO8859_6: case _ICONV_ISO8859_7: case _ICONV_ISO8859_8: case _ICONV_ISO8859_9: case _ICONV_ISO8859_10: case _ICONV_ISO8859_11: case _ICONV_ISO8859_12: case _ICONV_ISO8859_13: case _ICONV_ISO8859_14: case _ICONV_ISO8859_15: /* These character sets requires 1 selection byte */ _encode_control_codes (out_text, *out_size, FALSE); out_buffer = g_malloc (*out_size + 1); out_buffer[0] = encoding - _ICONV_ISO8859_5 + 1; memcpy (out_buffer + 1, out_text, *out_size); *out_size += 1; g_free (out_text); return out_buffer; case _ICONV_UCS_2BE: case _ICONV_EUC_KR: case _ICONV_UTF_16BE: /* These character sets requires 1 selection byte */ _encode_control_codes (out_text, *out_size, TRUE); out_buffer = g_malloc (*out_size + 1); out_buffer[0] = encoding - _ICONV_UCS_2BE + 0x11; memcpy (out_buffer + 1, out_text, *out_size); *out_size += 1; g_free (out_text); return out_buffer; case _ICONV_GB2312: case _ICONV_ISO10646_UTF8: /* These character sets requires 1 selection byte */ _encode_control_codes (out_text, *out_size, FALSE); out_buffer = g_malloc (*out_size + 1); out_buffer[0] = encoding - _ICONV_UCS_2BE + 0x11; memcpy (out_buffer + 1, out_text, *out_size); *out_size += 1; g_free (out_text); return out_buffer; default: g_free (out_text); return NULL; } }