Beispiel #1
0
Datei: lib.c Projekt: BrEacK/mc
gchar *
mc_search__recode_str (const char *str, gsize str_len,
                       const char *charset_from, const char *charset_to, gsize * bytes_written)
{
    gchar *ret;
    gsize bytes_read;
    GIConv conv;

    if (charset_from == NULL || charset_to == NULL || !strcmp (charset_to, charset_from))
    {
        *bytes_written = str_len;
        return g_strndup (str, str_len);
    }

    conv = g_iconv_open (charset_to, charset_from);
    if (conv == INVALID_CONV)
    {
        *bytes_written = str_len;
        return g_strndup (str, str_len);
    }

    ret = g_convert_with_iconv (str, str_len, conv, &bytes_read, bytes_written, NULL);
    g_iconv_close (conv);

    if (ret == NULL)
    {
        *bytes_written = str_len;
        return g_strndup (str, str_len);
    }

    return ret;
}
Beispiel #2
0
static GnmValue *
qpro_new_string (QProReadState *state, gchar const *data)
{
	return value_new_string_nocopy (
		g_convert_with_iconv (data, -1, state->converter,
				      NULL, NULL, NULL));
}
Beispiel #3
0
/**
 * Send a line over an IO Channel
 *
 * @param c IO Channel
 * @param iconv iconv to use, -1 for none
 * @param l Line
 * @param error Error
 */
GIOStatus irc_send_line(GIOChannel *c, GIConv iconv,
						const struct irc_line *l, GError **error)
{
	char *raw, *cvrt = NULL;
	GIOStatus ret;
	gsize bytes_written = 0;

	g_assert(c);

	raw = irc_line_string_nl(l);
	if (iconv != (GIConv)-1) {
		cvrt = g_convert_with_iconv(raw, -1, iconv, NULL, NULL, error);
		if (cvrt == NULL)
			return G_IO_STATUS_ERROR;
		g_free(raw);
	} else {
		cvrt = raw;
	}
	ret = g_io_channel_write_chars(c, cvrt, -1, &bytes_written, error);
	g_free(cvrt);

	if (ret == G_IO_STATUS_AGAIN) {
		g_assert(bytes_written == 0);
	}

	return ret;
}
static tvbuff_t * dissect_cbs_data(guint8 sms_encoding, tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo, guint16 offset )
{
   tvbuff_t * tvb_out = NULL;
   guint8		out_len;
   int			length = tvb_length(tvb) - offset;
   gchar *utf8_text = NULL;
   static unsigned char msgbuf[1024];
   guint8 * input_string = tvb_get_ephemeral_string(tvb, offset, length);
   GIConv cd;
   GError *l_conv_error = NULL;

   switch(sms_encoding){
     case SMS_ENCODING_7BIT:
     case SMS_ENCODING_7BIT_LANG:
     out_len = gsm_sms_char_7bit_unpack(0, length, sizeof(msgbuf),
                                        input_string,
                                        msgbuf);
     msgbuf[out_len] = '\0';
     utf8_text = gsm_sms_chars_to_utf8(msgbuf, out_len);
     tvb_out = tvb_new_child_real_data(tvb, utf8_text, out_len, out_len);
     add_new_data_source(pinfo, tvb_out, "unpacked 7 bit data");
     break;

     case SMS_ENCODING_8BIT:
     tvb_out = tvb_new_subset(tvb, offset, length, length);
     break;

     case SMS_ENCODING_UCS2:
     case SMS_ENCODING_UCS2_LANG:
     if ((cd = g_iconv_open("UTF-8","UCS-2BE")) != (GIConv) -1)
     {
         utf8_text = g_convert_with_iconv(input_string, length, cd, NULL, NULL, &l_conv_error);
         if(!l_conv_error)
         {
            tvb_out = tvb_new_subset(tvb, offset, length, length);
         }
         else
         proto_tree_add_text(tree, tvb, offset, length, "CBS String: g_convert_with_iconv FAILED");

         g_free(utf8_text);
         g_iconv_close(cd);
     }
     else
     {
            proto_tree_add_text(tree, tvb, offset, length, "CBS String: g_iconv_open FAILED contact wireshark");
     }
     break;

      default:
         proto_tree_add_text(tree, tvb, offset, length, "Unhandled encoding %d of CBS String", sms_encoding);
     break;
   }
   return tvb_out;
}
tvbuff_t * dissect_cbs_data(guint8 sms_encoding, tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo, guint16 offset )
{
   tvbuff_t * tvb_out = NULL;
   int			length = tvb_length(tvb) - offset;
   gchar *utf8_text = NULL, *utf8_out;
   guint8 * input_string;
   GIConv cd;
   GError *l_conv_error = NULL;

   switch(sms_encoding){
     case SMS_ENCODING_7BIT:
     case SMS_ENCODING_7BIT_LANG:
     utf8_text = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), tvb, offset<<3, (length*8)/7);
     utf8_out = g_strdup(utf8_text);
     tvb_out = tvb_new_child_real_data(tvb, utf8_out, (guint)strlen(utf8_out), (guint)strlen(utf8_out));
     tvb_set_free_cb(tvb_out, g_free);
     add_new_data_source(pinfo, tvb_out, "unpacked 7 bit data");
     break;

     case SMS_ENCODING_8BIT:
     tvb_out = tvb_new_subset(tvb, offset, length, length);
     break;

     case SMS_ENCODING_UCS2:
     case SMS_ENCODING_UCS2_LANG:
     input_string = tvb_get_string(wmem_packet_scope(), tvb, offset, length);
     if ((cd = g_iconv_open("UTF-8","UCS-2BE")) != (GIConv) -1)
     {
         utf8_text = g_convert_with_iconv(input_string, length, cd, NULL, NULL, &l_conv_error);
         if(!l_conv_error)
         {
            tvb_out = tvb_new_subset(tvb, offset, length, length);
         }
         else
         proto_tree_add_text(tree, tvb, offset, length, "CBS String: g_convert_with_iconv FAILED");

         g_free(utf8_text);
         g_iconv_close(cd);
     }
     else
     {
            proto_tree_add_text(tree, tvb, offset, length, "CBS String: g_iconv_open FAILED contact wireshark");
     }
     break;

      default:
         proto_tree_add_text(tree, tvb, offset, length, "Unhandled encoding %d of CBS String", sms_encoding);
     break;
   }
   return tvb_out;
}
Beispiel #6
0
static gboolean
dif_get_line (DifInputContext *ctxt)
{
	char *raw;

	if (NULL == (raw = gsf_input_textline_ascii_gets (ctxt->input)))
		return FALSE;

	g_free (ctxt->line);
	ctxt->line = g_convert_with_iconv (raw, -1, ctxt->converter,
					   NULL, &ctxt->line_len, NULL);

	ctxt->line_no++;
	return ctxt->line != NULL;
}
Beispiel #7
0
gint rlib_charencoder_convert(GIConv converter, gchar **inbuf, gsize *inbytes_left, gchar **outbuf, gsize *outbytes_left) {
#ifdef DISABLE_UTF8
	/* The strlen is passed in here so we bump it by 1 */
	*outbuf = g_strdup(*inbuf);
	return 0;
#else
	if((converter == (GIConv) -1) || (converter == (GIConv) 0)) {
		*outbuf = g_strdup(*inbuf);
		return 1;
	} else {
		*outbuf = g_convert_with_iconv(*inbuf, strlen(*inbuf), converter, inbytes_left, outbytes_left, NULL);
		return *outbuf ? 0 : -1;
	}
#endif
}
Beispiel #8
0
static char
translate_character (GIConv cd, char c)
{
    gchar *tmp_buff = NULL;
    gsize bytes_read, bytes_written = 0;
    const char *ibuf = &c;
    char ch = UNKNCHAR;

    int ibuflen = 1;

    tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
    if (tmp_buff)
        ch = tmp_buff[0];
    g_free (tmp_buff);
    return ch;
}
Beispiel #9
0
static void
sylk_parse_sheet (SylkReader *state)
{
	char *buf, *utf8buf;
	gsize utf8_len;

	while (!state->finished &&
	       (buf = gsf_input_textline_ascii_gets (state->input)) != NULL) {
		g_strchomp (buf);

		utf8buf = g_convert_with_iconv (buf, -1, state->converter, NULL,
						&utf8_len, NULL);

		state->line_no++;
		sylk_parse_line (state, utf8buf, utf8_len);
		g_free (utf8buf);
	}
	if (!state->finished)
		sylk_read_warning (state, _("Missing closing 'E'"));
}
Beispiel #10
0
gchar*
g_convert (const gchar *str,
           gssize       len,  
           const gchar *to_codeset,
           const gchar *from_codeset,
           gsize       *bytes_read, 
	   gsize       *bytes_written, 
	   GError     **error)
{
  gchar *res;
#ifdef HAVE_ICONV_H
  GIConv cd;
  
  g_return_val_if_fail (str != NULL, NULL);
  g_return_val_if_fail (to_codeset != NULL, NULL);
  g_return_val_if_fail (from_codeset != NULL, NULL);

  cd = open_converter (to_codeset, from_codeset, error);

  if (cd == (GIConv) -1)
    {
      if (bytes_read)
        *bytes_read = 0;
      
      if (bytes_written)
        *bytes_written = 0;
      
      return NULL;
    }

  res = g_convert_with_iconv (str, len, cd,
			      bytes_read, bytes_written,
			      error);
  
  g_iconv_close (cd);
#else
  res = g_strdup(str);
#endif

  return res;
}
CString TextCodecGtk::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
    if (!length)
        return "";

    if (m_iconvEncoder == reinterpret_cast<GIConv>(-1))
        createIConvEncoder();
    if (m_iconvEncoder == reinterpret_cast<GIConv>(-1))
        return CString();

    size_t count;

    GOwnPtr<GError> err;
    GOwnPtr<char> buffer;

    buffer.outPtr() = g_convert_with_iconv(reinterpret_cast<const char*>(characters), length * sizeof(UChar), m_iconvEncoder, 0, &count, &err.outPtr());
    if (err) {
        LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message);
        return CString();
    }

    return CString(buffer.get(), count);
}
Beispiel #12
0
GIOStatus irc_recv_line(GIOChannel *c, GIConv iconv,
						GError **error, struct irc_line **l)
{
	gchar *raw = NULL, *cvrt = NULL;
	GIOStatus status;
	gsize in_len;

	g_assert(l != NULL);

	*l = NULL;

	g_assert(c);

	status = g_io_channel_read_line(c, &raw, &in_len, NULL, error);
	if (status != G_IO_STATUS_NORMAL) {
		g_free(raw);
		return status;
	}

	if (iconv == (GIConv)-1) {
		cvrt = raw;
	} else {
		cvrt = g_convert_with_iconv(raw, -1, iconv, NULL, NULL, error);
		if (cvrt == NULL) {
			cvrt = raw;
			status = G_IO_STATUS_ERROR;
		} else {
			g_free(raw);
		}
	}

	*l = irc_parse_line(cvrt);

	g_free(cvrt);

	return status;
}
String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
{
    // Get a converter for the passed-in encoding.
    if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) {
        createIConvDecoder();
        ASSERT(m_iconvDecoder != reinterpret_cast<GIConv>(-1));
        if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) {
            LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
            return String();
        }
    }

    size_t countWritten, countRead, conversionLength;
    const char* conversionBytes;
    char* prefixedBytes = 0;

    if (m_numBufferedBytes) {
        conversionLength = length + m_numBufferedBytes;
        prefixedBytes = static_cast<char*>(fastMalloc(conversionLength));
        memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes);
        memcpy(prefixedBytes + m_numBufferedBytes, bytes, length);
        
        conversionBytes = prefixedBytes;
        
        // all buffered bytes are consumed now
        m_numBufferedBytes = 0;
    } else {
        // no previously buffered partial data, 
        // just convert the data that was passed in
        conversionBytes = bytes;
        conversionLength = length;
    }

    GOwnPtr<GError> err;
    GOwnPtr<UChar> buffer;

    buffer.outPtr() = reinterpret_cast<UChar*>(g_convert_with_iconv(conversionBytes, conversionLength, m_iconvDecoder, &countRead, &countWritten, &err.outPtr())); 


    if (err) {
        LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message);
        m_numBufferedBytes = 0; // reset state for subsequent calls to decode
        fastFree(prefixedBytes);
        sawError = true;
        return String();
    }
    
    // Partial input at the end of the string may not result in an error being raised. 
    // From the gnome library documentation on g_convert_with_iconv:
    // "Even if the conversion was successful, this may be less than len if there were partial characters at the end of the input."
    // That's why we need to compare conversionLength against countRead 

    m_numBufferedBytes = conversionLength - countRead;
    if (m_numBufferedBytes > 0) {
        if (flush) {
            LOG_ERROR("Partial bytes at end of input while flush requested.");
            m_numBufferedBytes = 0; // reset state for subsequent calls to decode
            fastFree(prefixedBytes);
            sawError = true;
            return String();
        }
        memcpy(m_bufferedBytes, conversionBytes + countRead, m_numBufferedBytes);
    }

    fastFree(prefixedBytes);
    
    Vector<UChar> result;

    result.append(buffer.get(), countWritten / sizeof(UChar));

    return String::adopt(result);
}
Beispiel #14
0
void build_zip(const gchar *dirname, const gchar *zipfilename)
{

	gchar sutra_name[256];
	strcpy(sutra_name, zipfilename);
	gchar *p;
	p = strrchr(sutra_name, '.');
	if (p) {
		*p = '\0';
	}

	GIConv locale_converter;
	locale_converter = g_iconv_open("gb18030","UTF-8");

	gchar *locale_sutra_name;
	locale_sutra_name = g_convert_with_iconv(sutra_name,-1,locale_converter,NULL,NULL,NULL);
	g_iconv_close(locale_converter);

	gchar *quotefilename;

	std::string cmd;
	cmd = "mv ";
	quotefilename = g_shell_quote(dirname);
	cmd += quotefilename;
	g_free(quotefilename);
	cmd += '/';
	quotefilename = g_shell_quote(zipfilename);
	cmd += quotefilename;
	g_free(quotefilename);
	cmd += ' ';
	quotefilename = g_shell_quote(dirname);
	cmd += quotefilename;
	g_free(quotefilename);
	cmd += "/txt";
	system(cmd.c_str());

	cmd = "unzip ";
	quotefilename = g_shell_quote(dirname);
	cmd += quotefilename;
	g_free(quotefilename);
	cmd += "/txt/";
	quotefilename = g_shell_quote(zipfilename);
	cmd += quotefilename;
	g_free(quotefilename);
	cmd += " -d ";
	quotefilename = g_shell_quote(dirname);
	cmd += quotefilename;
	g_free(quotefilename);
	cmd += "/txt";
	system(cmd.c_str());

	cmd = "mv ";
	quotefilename = g_shell_quote(dirname);
	cmd += quotefilename;
	g_free(quotefilename);
	cmd += "/txt/";
	quotefilename = g_shell_quote(zipfilename);
	cmd += quotefilename;
	g_free(quotefilename);
	cmd += ' ';
	quotefilename = g_shell_quote(dirname);
	cmd += quotefilename;
	g_free(quotefilename);
	system(cmd.c_str());

	std::string fullfilename;
	fullfilename = dirname;
	fullfilename += "/txt";

	GDir *dir = g_dir_open(fullfilename.c_str(), 0, NULL);
	const gchar *txtfilename;
	while ((txtfilename = g_dir_read_name(dir))!=NULL) {
		fullfilename = dirname;
		fullfilename += "/txt/";
		fullfilename += txtfilename;
		if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_REGULAR)) {
			convert_txt(dirname, txtfilename, locale_sutra_name);
		}
	}
	g_dir_close(dir);

	g_free(locale_sutra_name);
}
Beispiel #15
0
static gboolean handle_client_detect(GIOChannel *ioc, struct pending_client *pc)
{
	GIOStatus status;
	gsize read;
	gchar header[2];
	GError *error = NULL;

	status = g_io_channel_read_chars(ioc, header, 1, &read, &error);

	if (status != G_IO_STATUS_NORMAL && status != G_IO_STATUS_AGAIN) {
		if (error != NULL)
			g_error_free(error);
		return FALSE;
	}

	if (header[0] == SOCKS_VERSION) {
		listener_log(LOG_TRACE, pc->listener, "Detected SOCKS.");
		pc->type = CLIENT_TYPE_SOCKS;
		pc->socks.state = SOCKS_STATE_NEW;
		return TRUE;
	} else {
		struct irc_line *l = NULL;
		gchar *raw = NULL, *cvrt = NULL;
		gchar *complete;
		GIOStatus status;
		gboolean ret;
		gsize in_len;

		pc->type = CLIENT_TYPE_REGULAR;

		g_assert(ioc != NULL);

		status = g_io_channel_read_line(ioc, &raw, &in_len, NULL, &error);
		if (status != G_IO_STATUS_NORMAL) {
			g_free(raw);
			g_error_free(error);
			return status;
		}

		complete = g_malloc(in_len+2);
		complete[0] = header[0];
		memcpy(complete+1, raw, in_len);
		complete[in_len+1] = '\0';
		g_free(raw);

		if (pc->listener->iconv == (GIConv)-1) {
			cvrt = complete;
		} else {
			cvrt = g_convert_with_iconv(complete, -1, pc->listener->iconv, NULL, NULL, &error);
			if (cvrt == NULL) {
				cvrt = complete;
				status = G_IO_STATUS_ERROR;
				if (error != NULL)
					g_error_free(error);
			} else {
				g_free(complete);
			}
		}

		l = irc_parse_line(cvrt);

		ret = pc->listener->ops->handle_client_line(pc, l);

		free_line(l);

		g_free(cvrt);

		return ret;
	}
}
Beispiel #16
0
void convert(char *filename,char *idxheadfilename)
{
	struct stat stats;
	if (stat (idxheadfilename, &stats) == -1)
	{
		printf("idxhead file not exist!\n");
		return;
	}

	FILE *idxheadfile;
	idxheadfile = fopen(idxheadfilename,"r");
	gchar *buffer;
	buffer = (gchar *)g_malloc (stats.st_size + 1);
	size_t fread_size;
	fread_size = fread (buffer, 1, stats.st_size, idxheadfile);
	if (fread_size != (size_t)stats.st_size) {
		g_print("fread error!\n");
	}
	fclose (idxheadfile);
	buffer[stats.st_size] = '\0';
	//gboolean sametypesequence = FALSE;
	//if (strstr(buffer,"sametypesequence="))
		//sametypesequence = TRUE;
	
	//in the next code we will always treat sametypesequence to be TRUE.
	//as now all old stardict dictionaries use these two feature.	
	
	FILE *idxfile,*dicfile;
	gchar str[256],basename[256];
	
	strcpy(basename,idxheadfilename);
	basename[strlen(idxheadfilename)-8]='\0';
	
	sprintf(str,"%s.idx",basename);
	idxfile = fopen(str,"w");
	sprintf(str,"%s.dict",basename);
	dicfile = fopen(str,"w");
	
	fwrite(buffer, 1, stats.st_size, idxfile);
	g_free(buffer);
	
	long wordcount_offset = ftell(idxfile);
	glong tmpglong=0;
	fwrite(&(tmpglong),sizeof(glong),1,idxfile);

	int fd=open(filename,O_RDONLY);
    if(fd==-1)
    {
		g_print("open fail\n");
        return;
    }

    // get length of dicfile.
    struct stat stStat;
    if(fstat(fd,&stStat)!=0)
    {
        g_print("stat fail\n");
        return;
    }
    int iFileSize=stStat.st_size;
    
	// get item count
	lseek(fd,0-sizeof(int)*2,SEEK_END);
	unsigned int iCapacity,iStyle;
	ssize_t read_size;
	read_size = read(fd,&iCapacity,sizeof(int));
	if (read_size == -1) {
		g_print("read() error!\n");
	}
	read_size = read(fd,&iStyle,sizeof(int));
	if (read_size == -1) {
		g_print("read() error!\n");
	}
	//disable the next two line when the convert file is from the same arch machine.
#ifndef DISABLE_CONVERT_ENDIAN
	vConvertEndian(&iCapacity);
    vConvertEndian(&iStyle);
#endif

    unsigned char cIndex=(unsigned char)(iStyle>>24);
    unsigned char cWord=(unsigned char)(iStyle>>16);
    unsigned char cMeaning=(unsigned char)(iStyle>>8);
    unsigned char cMark=(unsigned char)iStyle;
	g_print("flag: %c %c %c\n",cIndex,cWord,cMeaning);

    // mmap the file to memory
    caddr_t pFileMem=(caddr_t)mmap( (caddr_t)0,iFileSize-sizeof(int)*2,
                            PROT_READ,MAP_SHARED|MAP_NORESERVE,fd,0 );
    if(pFileMem==MAP_FAILED)
    {
        g_print("mmap fail\n");
        return;
    }

    // begin to read items.
    caddr_t p=pFileMem;
    caddr_t pMeaning, pMark;
	gchar *utf8_str;
#ifndef DISABLE_CONVERT_LOCALE
	gchar *locale_str;
	gsize locale_write_size;
	GIConv locale_converter;
	GIConv utf8_converter;
//	locale_converter = g_iconv_open("GB2312","BIG5");
//	utf8_converter = g_iconv_open("UTF-8","GB2312");

	locale_converter = g_iconv_open("BIG5","GB2312");
	utf8_converter = g_iconv_open("UTF-8","BIG5");
	
	//the locale_converter have no problem!but why it fail later?
/*	locale_str = g_convert_with_iconv("�~�^���",8,locale_converter,NULL,&locale_write_size,NULL);
	if (!locale_str) {
		g_print("convert fail\n");		
	}
	else
		printf("%s",locale_str);
	return;*/

#endif
	gsize write_size;
	long tmp_long,wordcount=0;
	int word_len, meaning_len,mark_len=0;
	gulong iLength=0;
		
	GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),iCapacity);
    struct _worditem worditem;
	glong old_size;
	
	while(p<pFileMem+iFileSize-sizeof(int)*2 && iLength<iCapacity)
    {
		iLength++;
		word_len = strlen(p);
#ifndef DISABLE_CONVERT_LOCALE
		locale_str = g_convert_with_iconv(p,word_len,locale_converter,NULL,&locale_write_size,NULL);
		if (locale_str) {
			utf8_str = g_convert_with_iconv(locale_str,locale_write_size,utf8_converter,NULL,NULL,NULL);
			g_free(locale_str);
		}
		else {
			printf("%s convert to other locale error!!!\n",p);
			utf8_str = NULL;
		}
#else
		utf8_str = g_locale_to_utf8(p,word_len,NULL,NULL,NULL);
#endif
		if (utf8_str)
			g_strstrip(utf8_str);
		if (!utf8_str || (*utf8_str=='\0'))
		{
			printf("%s convert to utf8 error!!!\n",p);
			pMeaning=p+word_len+1;
			meaning_len = strlen(pMeaning);
	        if ( !cMark )
	            p = pMeaning+meaning_len+1;
    	    else
			{
	            pMark = pMeaning+meaning_len+1;
				mark_len = strlen(pMark);
        	    p = pMark+mark_len+1;
			}
			if (utf8_str)
				g_free(utf8_str);
			continue;
		}
		worditem.word = utf8_str;
		//utf8_str will be free at last.
		worditem.data = NULL;
		worditem.datasize = 0;		
		old_size=0;
		
        pMeaning=p+word_len+1;
		meaning_len = strlen(pMeaning);
        if ( !cMark )   // no Mark field, eg py2gb
            pMark = NULL;
        else
		{			
            pMark = pMeaning+meaning_len+1;
			mark_len = strlen(pMark);
#ifndef DISABLE_CONVERT_LOCALE
			locale_str = g_convert_with_iconv(pMark,mark_len,locale_converter,NULL,&locale_write_size,NULL);
			if (locale_str) {
				utf8_str = g_convert_with_iconv(locale_str,locale_write_size,utf8_converter,NULL,&write_size,NULL);
				g_free(locale_str);
			}
			else {
				printf("%s convert to other locale error!!!\n",pMark);
				utf8_str = NULL;
			}
#else
			utf8_str = g_locale_to_utf8(pMark,mark_len,NULL,&write_size,NULL); //mark may contains Chinese too,ie. "not"
#endif
			if (utf8_str)
			{		
				gchar *p_str = to_utf8_phonetic(utf8_str, write_size);
				g_free(utf8_str);
				write_size = strlen(p_str);
				worditem.datasize += (write_size + 1);
					
				worditem.data = (gchar *)g_realloc(worditem.data,worditem.datasize);
				/*if ((cWord == LIB_WORD_GB) || (cWord == LIB_WORD_BIG5))
					memcpy(worditem.data+old_size,"Y",sizeof(gchar)); // Chinese Yin Biao
				else
					memcpy(worditem.data+old_size,"T",sizeof(gchar));  // English Phonetic
				old_size+=sizeof(gchar);
				tmp_long = write_size;				
				memcpy(worditem.data+old_size,&tmp_long,sizeof(glong));
				old_size+=sizeof(glong);*/
				memcpy(worditem.data+old_size,p_str,write_size+1);				
				old_size+= (write_size+1);
				g_free(p_str);
			}
			else {
				worditem.datasize += 1;
				worditem.data = (gchar *)g_realloc(worditem.data,worditem.datasize);
				memcpy(worditem.data+old_size,"", 1);				
				printf("%s 's mark convert to utf8 error!\n",p);
			}
		}

#ifndef DISABLE_CONVERT_LOCALE
			locale_str = g_convert_with_iconv(pMeaning, meaning_len,locale_converter,NULL,&locale_write_size,NULL);
			if (locale_str) {
				utf8_str = g_convert_with_iconv(locale_str,locale_write_size,utf8_converter,NULL,&write_size,NULL);
				g_free(locale_str);
			}
			else {
				printf("%s convert to other locale error!!!\n",pMeaning);
				utf8_str = NULL;
			}
#else
		utf8_str = g_locale_to_utf8(pMeaning,meaning_len,NULL,&write_size,NULL);
#endif		
		if (utf8_str)
		{
			tmp_long = write_size;
		}
		else
		{
			printf("%s 's meaning convert to utf8 error!\n",p);
			tmp_long = 1;
			utf8_str = g_strdup("");
		}
		worditem.datasize += tmp_long;
		worditem.data = (gchar *)g_realloc(worditem.data,worditem.datasize);		
		/*memcpy(worditem.data+old_size,"M",sizeof(gchar));
		old_size+=sizeof(gchar);
		memcpy(worditem.data+old_size,&tmp_long,sizeof(glong));
		old_size+=sizeof(glong);*/
		memcpy(worditem.data+old_size,utf8_str,tmp_long);				
		old_size+= tmp_long;
		g_free(utf8_str);
		
		g_array_append_val(array, worditem);
		
        if ( !cMark )
            p = pMeaning+meaning_len+1;
        else
            p = pMark+mark_len+1;
		wordcount++;
    }
#ifndef DISABLE_CONVERT_LOCALE
	g_iconv_close(locale_converter);
	g_iconv_close(utf8_converter);
#endif

	//g_qsort_with_data(parray->pdata,parray->len,sizeof(gpointer),comparefunc,NULL);
	g_array_sort(array,comparefunc);
	
	long offset_old=0;
	
	gulong i;
	
	gchar *previous_word = g_strdup(""); //there should have no word equal this.
	glong previous_datasize = 0;
	gchar *previous_data = g_strdup("");
	struct _worditem *pworditem;
	for (i=0;i<array->len;i++)
	{
		pworditem = &g_array_index(array, struct _worditem, i);
		
		// should use g_ascii_strcasecmp() ??
		if (strcmp(previous_word,pworditem->word)==0) {
			if ((previous_datasize == pworditem->datasize)&&
				(memcmp(previous_data,pworditem->data,previous_datasize)==0)) {
				
				g_print("word %s is complete duplicated! droped!\n" ,previous_word);				
				wordcount--;
				continue;
			}
			else {
				g_print("word %s is duplicated! droped!\n" ,previous_word);				
				wordcount--;
				continue;
				
				//i don't want to emerge here,it is a bad work!
				/*
				g_print("word %s is duplicated! merged!\n" ,previous_word);
				fseek(dicfile,-sizeof(glong),SEEK_CUR);
				pworditem->datasize += previous_datasize;
				fwrite(&(pworditem->datasize),sizeof(glong),1,idxfile);
				fwrite(pworditem->data,sizeof(gchar),pworditem->datasize,dicfile);
				*/
			}									
		}
		g_free(previous_word);
		g_free(previous_data);
		previous_word = pworditem->word;
		previous_datasize = pworditem->datasize;
		previous_data = pworditem->data;
		
		offset_old = ftell(dicfile);
		fwrite(pworditem->data,sizeof(gchar),pworditem->datasize,dicfile);
		
		fwrite(pworditem->word,sizeof(gchar),strlen(pworditem->word)+1,idxfile);
		tmpglong = g_htonl(offset_old);
		fwrite(&(tmpglong),sizeof(glong),1,idxfile);
		tmpglong = g_htonl(pworditem->datasize);
		fwrite(&(tmpglong),sizeof(glong),1,idxfile);			
	}
	g_free(previous_word);
	g_free(previous_data);
	g_array_free(array,TRUE);
	
	fseek(idxfile,wordcount_offset,SEEK_SET);
	tmpglong = g_htonl(wordcount);
	fwrite(&(tmpglong),sizeof(glong),1,idxfile);
	
	g_print("old wordcount: %d\n",iCapacity);
	g_print("wordcount: %ld\n",wordcount);

    close(fd);
	fclose(idxfile);
	fclose(dicfile);
}
Beispiel #17
0
/**
 * Converts a given string using the given iconv converter. This is similar to g_convert_with_fallback, except that it is tolerant of sequences in
 * the original input that are invalid even in from_encoding. g_convert_with_fallback fails for such text, whereas this function replaces such a
 * sequence with the fallback string.
 *
 * If len is -1, strlen(text) is used to calculate the length. Do not pass -1 if text is supposed to contain \0 bytes, such as if from_encoding is a
 * multi-byte encoding like UTF-16.
 */
gchar *
text_convert_invalid (const gchar* text, gssize len, GIConv converter, const gchar *fallback, gsize *len_out)
{
	gchar *result_part;
	gsize result_part_len;
	const gchar *end;
	gsize invalid_start_pos;
	GString *result;
	const gchar *current_start;

	if (len == -1)
	{
		len = strlen (text);
	}

	end = text + len;

	/* Find the first position of an invalid sequence. */
	result_part = g_convert_with_iconv (text, len, converter, &invalid_start_pos, &result_part_len, NULL);
	g_iconv (converter, NULL, NULL, NULL, NULL);

	if (result_part != NULL)
	{
		/* All text converted successfully on the first try. Return it. */

		if (len_out != NULL)
		{
			*len_out = result_part_len;
		}

		return result_part;
	}

	/* One or more invalid sequences exist that need to be replaced with the fallback. */

	result = g_string_sized_new (len);
	current_start = text;

	for (;;)
	{
		g_assert (current_start + invalid_start_pos < end);

		/* Convert everything before the position of the invalid sequence. It should be successful.
		 * But iconv may not convert everything till invalid_start_pos since the last few bytes may be part of a shift sequence.
		 * So get the new bytes_read and use it as the actual invalid_start_pos to handle this.
		 *
		 * See https://github.com/hexchat/hexchat/issues/1758
		 */
		result_part = g_convert_with_iconv (current_start, invalid_start_pos, converter, &invalid_start_pos, &result_part_len, NULL);
		g_iconv (converter, NULL, NULL, NULL, NULL);

		g_assert (result_part != NULL);
		g_string_append_len (result, result_part, result_part_len);
		g_free (result_part);

		/* Append the fallback */
		g_string_append (result, fallback);

		/* Now try converting everything after the invalid sequence. */
		current_start += invalid_start_pos + 1;

		result_part = g_convert_with_iconv (current_start, end - current_start, converter, &invalid_start_pos, &result_part_len, NULL);
		g_iconv (converter, NULL, NULL, NULL, NULL);

		if (result_part != NULL)
		{
			/* The rest of the text converted successfully. Append it and return the whole converted text. */

			g_string_append_len (result, result_part, result_part_len);
			g_free (result_part);

			if (len_out != NULL)
			{
				*len_out = result->len;
			}

			return g_string_free (result, FALSE);
		}

		/* The rest of the text didn't convert successfully. invalid_start_pos has the position of the next invalid sequence. */
	}
}
static void
entry_on_text_changed (GtkEditable * editable,
                       gpointer userdata)
{
  HildonTouchSelector *selector;
  HildonTouchSelectorEntryPrivate *priv;
  GtkTreeModel *model;
  GtkTreeIter iter;
  GtkTreeIter iter_suggested;
  GtkEntry *entry;
  const gchar *prefix;
  gchar *text;
  gboolean found = FALSE;
  gint text_column = -1;
  gchar *ascii_prefix;
  gint prefix_len;
  gboolean found_suggestion = FALSE;

  entry = GTK_ENTRY (editable);
  selector = HILDON_TOUCH_SELECTOR (userdata);
  priv = HILDON_TOUCH_SELECTOR_ENTRY_GET_PRIVATE (selector);

  text_column =
    hildon_touch_selector_entry_get_text_column (HILDON_TOUCH_SELECTOR_ENTRY (selector));

  prefix = gtk_entry_get_text (entry);

  if (prefix[0] == '\0') {
	  return;
  }

  model = hildon_touch_selector_get_model (selector, 0);

  if (!gtk_tree_model_get_iter_first (model, &iter)) {
    return;
  }

  if (priv->smart_match) {
    ascii_prefix = g_convert_with_iconv (prefix, -1, priv->converter, NULL, NULL, NULL);
    prefix_len = strlen (ascii_prefix);
  }

  do {
    gtk_tree_model_get (model, &iter, text_column, &text, -1);
    found = g_str_has_prefix (text, prefix);

    if (!found && !found_suggestion && priv->smart_match) {
      gchar *ascii_text = g_convert_with_iconv (text, -1, priv->converter, NULL, NULL, NULL);
      found_suggestion = !g_ascii_strncasecmp (ascii_text, ascii_prefix, prefix_len);
      if (found_suggestion) {
        iter_suggested = iter;
      }
      g_free (ascii_text);
    }

    g_free (text);
  } while (found != TRUE && gtk_tree_model_iter_next (model, &iter));

  g_signal_handler_block (selector, priv->signal_id);
  {
    /* We emit the HildonTouchSelector::changed signal because a change in the
       GtkEntry represents a change in current selection, and therefore, users
       should be notified. */
    if (found) {
      hildon_touch_selector_select_iter (selector, 0, &iter, TRUE);
    } else if (found_suggestion) {
      hildon_touch_selector_select_iter (selector, 0, &iter_suggested, TRUE);
    }
    g_signal_emit_by_name (selector, "changed", 0);
  }
  g_signal_handler_unblock (selector, priv->signal_id);

  if (priv->smart_match) {
    g_free (ascii_prefix);
  }
}
Beispiel #19
0
void irc_process(irc_t *irc)
{
	char **lines, *temp, **cmd;
	int i;

	if (irc->readbuffer != NULL) {
		lines = irc_splitlines(irc->readbuffer);

		for (i = 0; *lines[i] != '\0'; i++) {
			char *conv = NULL;

			/* [WvG] If the last line isn't empty, it's an incomplete line and we
			   should wait for the rest to come in before processing it. */
			if (lines[i + 1] == NULL) {
				temp = g_strdup(lines[i]);
				g_free(irc->readbuffer);
				irc->readbuffer = temp;
				i++;
				break;
			}

			if (irc->iconv != (GIConv) - 1) {
				gsize bytes_read, bytes_written;

				conv = g_convert_with_iconv(lines[i], -1, irc->iconv,
				                            &bytes_read, &bytes_written, NULL);

				if (conv == NULL || bytes_read != strlen(lines[i])) {
					/* GLib can do strange things if things are not in the expected charset,
					   so let's be a little bit paranoid here: */
					if (irc->status & USTATUS_LOGGED_IN) {
						irc_rootmsg(irc, "Error: Charset mismatch detected. The charset "
						            "setting is currently set to %s, so please make "
						            "sure your IRC client will send and accept text in "
						            "that charset, or tell BitlBee which charset to "
						            "expect by changing the charset setting. See "
						            "`help set charset' for more information. Your "
						            "message was ignored.",
						            set_getstr(&irc->b->set, "charset"));

						g_free(conv);
						conv = NULL;
					} else {
						irc_write(irc, ":%s NOTICE * :%s", irc->root->host,
						          "Warning: invalid characters received at login time.");

						conv = g_strdup(lines[i]);
						for (temp = conv; *temp; temp++) {
							if (*temp & 0x80) {
								*temp = '?';
							}
						}
					}
				}
				lines[i] = conv;
			}

			if (lines[i] && (cmd = irc_parse_line(lines[i]))) {
				irc_exec(irc, cmd);
				g_free(cmd);
			}

			g_free(conv);

			/* Shouldn't really happen, but just in case... */
			if (!g_slist_find(irc_connection_list, irc)) {
				g_free(lines);
				return;
			}
		}

		if (lines[i] != NULL) {
			g_free(irc->readbuffer);
			irc->readbuffer = NULL;
		}

		g_free(lines);
	}
}
/*
 * @text: The text to convert. It may include pango markup (<b> and </b>)
 * @length: The length of the string -1 if it's nul-terminated
 * @start: Where to start converting in the text
 * @encoding: The encoding of text
 * @is_multibyte: Whether the encoding is a multibyte encoding
 * @error: The location to store the error, or NULL to ignore errors
 * @returns: UTF-8 encoded string
 *
 * Convert text to UTF-8.
 */
static gchar *
convert_to_utf8 (const gchar * text, gint length, guint start,
    GIConv giconv, gboolean is_multibyte, GError ** error)
{
  gchar *new_text;
  gchar *tmp, *pos;
  gint i;

  text += start;

  pos = tmp = g_malloc (length * 2);

  if (is_multibyte) {
    if (length == -1) {
      while (*text != '\0') {
        guint16 code = GST_READ_UINT16_BE (text);

        switch (code) {
          case 0xE086:         /* emphasis on */
          case 0xE087:         /* emphasis off */
            /* skip it */
            break;
          case 0xE08A:{
            pos[0] = 0x00;      /* 0x00 0x0A is a new line */
            pos[1] = 0x0A;
            pos += 2;
            break;
          }
          default:
            pos[0] = text[0];
            pos[1] = text[1];
            pos += 2;
            break;
        }

        text += 2;
      }
    } else {
      for (i = 0; i < length; i += 2) {
        guint16 code = GST_READ_UINT16_BE (text);

        switch (code) {
          case 0xE086:         /* emphasis on */
          case 0xE087:         /* emphasis off */
            /* skip it */
            break;
          case 0xE08A:{
            pos[0] = 0x00;      /* 0x00 0x0A is a new line */
            pos[1] = 0x0A;
            pos += 2;
            break;
          }
          default:
            pos[0] = text[0];
            pos[1] = text[1];
            pos += 2;
            break;
        }

        text += 2;
      }
    }
  } else {
    if (length == -1) {
      while (*text != '\0') {
        guint8 code = (guint8) (*text);

        switch (code) {
          case 0x86:           /* emphasis on */
          case 0x87:           /* emphasis off */
            /* skip it */
            break;
          case 0x8A:
            *pos = '\n';
            pos += 1;
            break;
          default:
            *pos = *text;
            pos += 1;
            break;
        }

        text++;
      }
    } else {
      for (i = 0; i < length; i++) {
        guint8 code = (guint8) (*text);

        switch (code) {
          case 0x86:           /* emphasis on */
          case 0x87:           /* emphasis off */
            /* skip it */
            break;
          case 0x8A:
            *pos = '\n';
            pos += 1;
            break;
          default:
            *pos = *text;
            pos += 1;
            break;
        }

        text++;
      }
    }
  }

  if (pos > tmp) {
    gsize bread = 0;
    new_text =
        g_convert_with_iconv (tmp, pos - tmp, giconv, &bread, NULL, error);
    GST_DEBUG ("Converted to : %s", new_text);
  } else {
    new_text = g_strdup ("");
  }

  g_free (tmp);

  return new_text;
}
Beispiel #21
0
static estr_t
_str_convert (GIConv coder, const char *string, int size, GString * buffer)
{
    estr_t state = ESTR_SUCCESS;
    gchar *tmp_buff = NULL;
    gssize left;
    gsize bytes_read = 0;
    gsize bytes_written = 0;
    GError *error = NULL;
    errno = 0;

    if (coder == INVALID_CONV)
        return ESTR_FAILURE;

    if (string == NULL || buffer == NULL)
        return ESTR_FAILURE;

    /*
       if (! used_class.is_valid_string (string))
       {
       return ESTR_FAILURE;
       }
     */
    if (size < 0)
    {
        size = strlen (string);
    }
    else
    {
        left = strlen (string);
        if (left < size)
            size = left;
    }

    left = size;
    g_iconv (coder, NULL, NULL, NULL, NULL);

    while (left)
    {
        tmp_buff = g_convert_with_iconv ((const gchar *) string,
                                         left, coder, &bytes_read, &bytes_written, &error);
        if (error)
        {
            int code = error->code;

            g_error_free (error);
            error = NULL;

            switch (code)
            {
            case G_CONVERT_ERROR_NO_CONVERSION:
                /* Conversion between the requested character sets is not supported. */
                tmp_buff = g_strnfill (strlen (string), '?');
                g_string_append (buffer, tmp_buff);
                g_free (tmp_buff);
                return ESTR_FAILURE;

            case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
                /* Invalid byte sequence in conversion input. */
                if ((tmp_buff == NULL) && (bytes_read != 0))
                    /* recode valid byte sequence */
                    tmp_buff = g_convert_with_iconv ((const gchar *) string,
                                                     bytes_read, coder, NULL, NULL, NULL);

                if (tmp_buff != NULL)
                {
                    g_string_append (buffer, tmp_buff);
                    g_free (tmp_buff);
                }

                if ((int) bytes_read < left)
                {
                    string += bytes_read + 1;
                    size -= (bytes_read + 1);
                    left -= (bytes_read + 1);
                    g_string_append_c (buffer, *(string - 1));
                }
                else
                {
                    return ESTR_PROBLEM;
                }
                state = ESTR_PROBLEM;
                break;

            case G_CONVERT_ERROR_PARTIAL_INPUT:
                /* Partial character sequence at end of input. */
                g_string_append (buffer, tmp_buff);
                g_free (tmp_buff);
                if ((int) bytes_read < left)
                {
                    left = left - bytes_read;
                    tmp_buff = g_strnfill (left, '?');
                    g_string_append (buffer, tmp_buff);
                    g_free (tmp_buff);
                }
                return ESTR_PROBLEM;

            case G_CONVERT_ERROR_BAD_URI:      /* Don't know how handle this error :( */
            case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH:    /* Don't know how handle this error :( */
            case G_CONVERT_ERROR_FAILED:       /* Conversion failed for some reason. */
            default:
                g_free (tmp_buff);
                return ESTR_FAILURE;
            }
        }
        else
        {
            if (tmp_buff != NULL)
            {
                if (*tmp_buff)
                {
                    g_string_append (buffer, tmp_buff);
                    g_free (tmp_buff);
                    string += bytes_read;
                    left -= bytes_read;
                }
                else
                {
                    g_free (tmp_buff);
                    g_string_append (buffer, string);
                    return state;
                }
            }
            else
            {
                g_string_append (buffer, string);
                return ESTR_PROBLEM;
            }
        }
    }
    return state;
}
/**
 * dvb_text_from_utf8:
 * @text: The text to convert. This should be in UTF-8 format
 * @out_size: (out): the byte length of the new text
 *
 * Converts UTF-8 strings to text characters compliant with EN 300 468.
 * The converted text can be used directly in DVB #GstMpegtsDescriptor
 *
 * The function will try different character maps until the string is
 * completely converted.
 *
 * The function tries the default ISO 6937 character map first.
 *
 * If no character map that contains all characters could be found, the
 * string is converted to ISO 6937 with unknown characters set to `?`.
 *
 * Returns: (transfer full): byte array of size @out_size
 */
guint8 *
dvb_text_from_utf8 (const gchar * text, gsize * out_size)
{
  GError *error = NULL;
  gchar *out_text;
  guint8 *out_buffer;
  guint encoding;
  GIConv giconv = (GIConv) - 1;

  /* We test character maps one-by-one. Start with the default */
  encoding = _ICONV_ISO6937;
  giconv = _get_iconv (_ICONV_UTF8, encoding);
  out_text = g_convert_with_iconv (text, -1, giconv, NULL, out_size, &error);

  if (out_text) {
    GST_DEBUG ("Using default ISO6937 encoding");
    goto out;
  }

  g_clear_error (&error);

  for (encoding = _ICONV_ISO8859_1; encoding <= _ICONV_ISO10646_UTF8;
      encoding++) {
    giconv = _get_iconv (_ICONV_UTF8, encoding);
    if (giconv == (GIConv) - 1)
      continue;
    out_text = g_convert_with_iconv (text, -1, giconv, NULL, out_size, &error);

    if (out_text) {
      GST_DEBUG ("Found suitable character map - %s", iconvtablename[encoding]);
      goto out;
    }

    g_clear_error (&error);
  }

  out_text = g_convert_with_fallback (text, -1, iconvtablename[_ICONV_ISO6937],
      iconvtablename[_ICONV_UTF8], "?", NULL, out_size, &error);

out:

  if (error) {
    GST_WARNING ("Could not convert from utf-8: %s", error->message);
    g_error_free (error);
    g_free (out_text);
    return NULL;
  }

  switch (encoding) {
    case _ICONV_ISO6937:
      /* Default encoding contains no selection bytes. */
      _encode_control_codes (out_text, *out_size, FALSE);
      return (guint8 *) out_text;
    case _ICONV_ISO8859_1:
    case _ICONV_ISO8859_2:
    case _ICONV_ISO8859_3:
    case _ICONV_ISO8859_4:
      /* These character sets requires 3 selection bytes */
      _encode_control_codes (out_text, *out_size, FALSE);
      out_buffer = g_malloc (*out_size + 3);
      out_buffer[0] = 0x10;
      out_buffer[1] = 0x00;
      out_buffer[2] = encoding - _ICONV_ISO8859_1 + 1;
      memcpy (out_buffer + 3, out_text, *out_size);
      *out_size += 3;
      g_free (out_text);
      return out_buffer;
    case _ICONV_ISO8859_5:
    case _ICONV_ISO8859_6:
    case _ICONV_ISO8859_7:
    case _ICONV_ISO8859_8:
    case _ICONV_ISO8859_9:
    case _ICONV_ISO8859_10:
    case _ICONV_ISO8859_11:
    case _ICONV_ISO8859_12:
    case _ICONV_ISO8859_13:
    case _ICONV_ISO8859_14:
    case _ICONV_ISO8859_15:
      /* These character sets requires 1 selection byte */
      _encode_control_codes (out_text, *out_size, FALSE);
      out_buffer = g_malloc (*out_size + 1);
      out_buffer[0] = encoding - _ICONV_ISO8859_5 + 1;
      memcpy (out_buffer + 1, out_text, *out_size);
      *out_size += 1;
      g_free (out_text);
      return out_buffer;
    case _ICONV_UCS_2BE:
    case _ICONV_EUC_KR:
    case _ICONV_UTF_16BE:
      /* These character sets requires 1 selection byte */
      _encode_control_codes (out_text, *out_size, TRUE);
      out_buffer = g_malloc (*out_size + 1);
      out_buffer[0] = encoding - _ICONV_UCS_2BE + 0x11;
      memcpy (out_buffer + 1, out_text, *out_size);
      *out_size += 1;
      g_free (out_text);
      return out_buffer;
    case _ICONV_GB2312:
    case _ICONV_ISO10646_UTF8:
      /* These character sets requires 1 selection byte */
      _encode_control_codes (out_text, *out_size, FALSE);
      out_buffer = g_malloc (*out_size + 1);
      out_buffer[0] = encoding - _ICONV_UCS_2BE + 0x11;
      memcpy (out_buffer + 1, out_text, *out_size);
      *out_size += 1;
      g_free (out_text);
      return out_buffer;
    default:
      g_free (out_text);
      return NULL;
  }
}