static void proto_tree_add_item_ucs2string (proto_tree *tree, int hfindex, tvbuff_t *tvb, gint start ) 
{
    guint16 ucs2_len;
    gboolean dump_bytes = FALSE;
        
    ucs2_len = tvb_get_ntohs( tvb, start );

    if ( ucs2_iconv != (GIConv)-1 && ucs2_len <= MC_MAX_UCS2LEN )
    {
	gchar *in, *out, *out_const;
	gsize in_len, out_len, rv_iconv;
	
	in_len = ucs2_len * 2;
	in = (gchar*)tvb_get_ptr(tvb, start + MC_TYPELEN_UCS2LEN, in_len);
	
	if ( in ) 
	{
	    out_len = ucs2_len*2;
	    out_const = ep_alloc0( (size_t)out_len + 1);
	    out = out_const;
	}
	
	if ( in != NULL && out != NULL )
	{
	    rv_iconv = g_iconv( ucs2_iconv, &in, &in_len, &out, &out_len );
	    if ( -1 != rv_iconv ) 
	    {
		proto_tree_add_bytes_format_value( tree, hfindex, tvb, start, ucs2_len*2 + MC_TYPELEN_UCS2LEN, 
						   tvb_get_ptr( tvb, start, ucs2_len*2 + MC_TYPELEN_UCS2LEN), 
						   "%s", out_const );    
	    }	   
	    else 
	    {
		g_iconv( ucs2_iconv, NULL, 0, NULL, 0 );
		dump_bytes = TRUE;
	    }	    
	}
	else 
	{
	    dump_bytes = TRUE;
	}
    }
    else 
    {
	dump_bytes = TRUE;
    }

    if ( dump_bytes ) 
    {
	proto_tree_add_item(tree, hfindex, tvb, start, MC_TYPELEN_UCS2LEN + ucs2_len * 2, FALSE);
    }
}
Ejemplo n.º 2
0
Archivo: strutil.c Proyecto: CTU-OSP/mc
estr_t
str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
{
    size_t left;
    size_t cnv;

    g_iconv (conv, NULL, NULL, NULL, NULL);

    left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;

    cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
    if (cnv == (size_t) (-1))
        return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;

    output[0] = '\0';
    return ESTR_SUCCESS;
}
Ejemplo n.º 3
0
/*
 * returns the number of bytes that represent the UTF8 encoding buffer
 * in the original encoding that the user specified.
 *
 * NOTE: this is slow, but we only call this for the remainder of our
 * buffer (e.g. the partial line at the end of our last chunk of read
 * data). Also, this is only invoked if the file uses an encoding.
 */
static gsize
log_proto_text_server_get_raw_size_of_buffer(LogProtoTextServer *self, const guchar *buffer, gsize buffer_len)
{
  gchar *out;
  const guchar *in;
  gsize avail_out, avail_in;
  gint ret;

  if (self->reverse_convert == ((GIConv) -1) && !self->convert_scale)
    {
      /* try to speed up raw size calculation by recognizing the most
       * prominent character encodings and in the case the encoding
       * uses fixed size characters set that in self->convert_scale,
       * which in turn will speed up the reversal of the UTF8 buffer
       * size to raw buffer sizes.
       */
      self->convert_scale = log_proto_get_char_size_for_fixed_encoding(self->super.super.options->encoding);
      if (self->convert_scale == 0)
        {
          /* this encoding is not known, do the conversion for real :( */
          self->reverse_convert = g_iconv_open(self->super.super.options->encoding, "utf-8");
        }
    }

  if (self->convert_scale)
    return g_utf8_strlen((gchar *) buffer, buffer_len) * self->convert_scale;

  if (self->reverse_buffer_len < buffer_len * 6)
    {
      /* we free and malloc, since we never need the data still in reverse buffer */
      g_free(self->reverse_buffer);
      self->reverse_buffer_len = buffer_len * 6;
      self->reverse_buffer = g_malloc(buffer_len * 6);
    }

  avail_out = self->reverse_buffer_len;
  out = self->reverse_buffer;

  avail_in = buffer_len;
  in = buffer;

  ret = g_iconv(self->reverse_convert, (gchar **) &in, &avail_in, &out, &avail_out);
  if (ret == (gsize) -1)
    {
      /* oops, we cannot reverse that we ourselves converted to UTF-8,
       * this is simply impossible, but never say never */
      msg_error("Internal error, couldn't reverse the internal UTF8 string to the original encoding",
                evt_tag_printf("buffer", "%.*s", (gint) buffer_len, buffer),
                NULL);
      return 0;
    }
  else
    {
      return self->reverse_buffer_len - avail_out;
    }
}
Ejemplo n.º 4
0
/* Returns a newly allocated gchar, converted according to the given
   handler */
gchar *gnc_call_iconv(GIConv handler, const gchar* input)
{
    gchar *inbuffer = (gchar*)input;
    gchar *outbuffer, *outbufferstart;
    gsize inbytes, outbytes;

    inbytes = strlen(inbuffer);
    outbytes = inbytes + 2;
    outbufferstart = g_strndup(inbuffer, outbytes);
    outbuffer = outbufferstart;
    g_iconv(handler, &inbuffer, &inbytes, &outbuffer, &outbytes);
    if (outbytes > 0)
        *outbuffer = '\0';
    return outbufferstart;
}
Ejemplo n.º 5
0
bool
ISpellChecker::checkWord(const char * const utf8Word, size_t length)
{	
	ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
	char szWord[INPUTWORDLEN + MAXAFFIXLEN];
	
	if (!m_bSuccessfulInit)
		return false;
	
	if (!utf8Word || length >= (INPUTWORDLEN + MAXAFFIXLEN) || length == 0)
		return false;
	
	bool retVal = false;

	if (!g_iconv_is_valid(m_translate_in))
		return false;	
	else
		{
			/* convert to 8bit string and null terminate */
			size_t len_in, len_out, result;
			// the 8bit encodings use precomposed forms
			char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC);
			char *In = normalizedWord;
			char *Out = szWord;
			
			len_in = strlen(In);
			len_out = sizeof( szWord ) - 1;
			result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out);
			g_free(normalizedWord);
			if ((size_t)-1 == result)
				return false;
			*Out = '\0';
		}
	
	if (!strtoichar(iWord, szWord, sizeof(iWord), 0))
		{
			if (good(iWord, 0, 0, 1, 0) == 1 ||
			    compoundgood(iWord, 1) == 1)
				{
					retVal = true;
				}
		}
	
	return retVal;
}
Ejemplo n.º 6
0
bool
MySpellChecker::checkWord(const char *utf8Word, size_t len)
{
	if (len > MAXWORDLEN || !g_iconv_is_valid(m_translate_in))
		return false;

	// the 8bit encodings use precomposed forms
	char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
	char *in = normalizedWord;
	char word8[MAXWORDLEN + 1];
	char *out = word8;
	size_t len_in = strlen(in);
	size_t len_out = sizeof( word8 ) - 1;
	size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out);
	g_free(normalizedWord);
	if ((size_t)-1 == result)
		return false;
	*out = '\0';
	if (myspell->spell(word8))
		return true;
	else
		return false;
}
Ejemplo n.º 7
0
char* iofunctions_decode_text (gchar* text) {
    GError* err = NULL;
    gchar* result = 0;
    gsize read = 0, written = 0;

    if (! (result = g_locale_to_utf8 (text, -1, &read, &written, &err))) {
        g_error_free (err);
        slog (L_ERROR, "failed to convert text from default locale, trying "
                "ISO-8859-1\n");
        gsize in_size = strlen (text), out_size = in_size * 2;
        gchar* out = (gchar*)g_malloc (out_size);
        gchar* process = out;
        /* TODO: replace these calls to the non-raw glib functions */
        GIConv cd = g_iconv_open ("UTF-8//IGNORE", "ISO−8859-1");

        if (-1 == g_iconv (cd, &text, &in_size, &process, &out_size)) {
            slog (L_G_ERROR, _("Can not convert text to UTF-8!\n"));
            g_free (out);
            out = NULL;
        }
        result = out;
    }
    return result;
}
static gboolean
convert_text (GeditDocumentOutputStream *stream,
              const gchar               *inbuf,
              gsize                      inbuf_len,
              gchar                    **outbuf,
              gsize                     *outbuf_len,
              GError                   **error)
{
	gchar *out, *dest;
	gsize in_left, out_left, outbuf_size, res;
	gint errsv;
	gboolean done, have_error;

	in_left = inbuf_len;
	/* set an arbitrary length if inbuf_len is 0, this is needed to flush
	   the iconv data */
	outbuf_size = (inbuf_len > 0) ? inbuf_len : 100;

	out_left = outbuf_size;
	out = dest = g_malloc (outbuf_size);

	done = FALSE;
	have_error = FALSE;

	while (!done && !have_error)
	{
		/* If we reached here is because we need to convert the text,
		   so we convert it using iconv.
		   See that if inbuf is NULL the data will be flushed */
		res = g_iconv (stream->priv->iconv,
		               (gchar **)&inbuf, &in_left,
		               &out, &out_left);

		/* something went wrong */
		if (res == (gsize)-1)
		{
			errsv = errno;

			switch (errsv)
			{
				case EINVAL:
					/* Incomplete text, do not report an error */
					stream->priv->iconv_buffer = g_strndup (inbuf, in_left);
					stream->priv->iconv_buflen = in_left;
					done = TRUE;
					break;
				case E2BIG:
					{
						/* allocate more space */
						gsize used = out - dest;

						outbuf_size *= 2;
						dest = g_realloc (dest, outbuf_size);

						out = dest + used;
						out_left = outbuf_size - used;
					}
					break;
				case EILSEQ:
					/* TODO: we should escape this text.*/
					g_set_error_literal (error, G_CONVERT_ERROR,
					                     G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
					                     _("Invalid byte sequence in conversion input"));
					have_error = TRUE;
					break;
				default:
					g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
					             _("Error during conversion: %s"),
					             g_strerror (errsv));
					have_error = TRUE;
					break;
			}
		}
		else
		{
			done = TRUE;
		}
	}

	if (have_error)
	{
		g_free (dest);
		*outbuf = NULL;
		*outbuf_len = 0;

		return FALSE;
	}

	*outbuf = dest;
	*outbuf_len = out - dest;

	return TRUE;
}
Ejemplo n.º 9
0
bool Gobby::OperationOpen::on_idle()
{
	static const unsigned int CONVERT_BUFFER_SIZE = 1024;

	const char* inbuffer = &m_raw_content[m_raw_pos];
	char* inbuf = const_cast<char*>(inbuffer);
	gsize inbytes = m_raw_content.size() - m_raw_pos;
	char outbuffer[CONVERT_BUFFER_SIZE];
	gchar* outbuf = outbuffer;
	gsize outbytes = CONVERT_BUFFER_SIZE;

	/* iconv is defined as libiconv on Windows, or at least when using the
	 * binary packages from ftp.gnome.org. Therefore we can't propely
	 * call Glib::IConv::iconv. Therefore, we use the C API here. */
	const std::size_t result = g_iconv(m_iconv->gobj(),
		&inbuf, &inbytes, &outbuf, &outbytes);
	bool more_to_process = (inbytes != 0);

	if(result == static_cast<std::size_t>(-1))
	{
		if(errno == EILSEQ)
		{
			// Invalid text for the current encoding
			encoding_error();
			return false;
		}

		if(errno == EINVAL)
		{
			// If EINVAL is set, this means that an incomplete
			// multibyte sequence was at the end of the input.
			// We might have some more bytes, but those do not
			// make up a whole character, so we need to wait for
			// more input.
			if(!m_stream)
			{
				// However, if we already read all input, then
				// there is no more input to come. We
				// consider this an error since the file
				// should not end with an incomplete multibyte
				// sequence.
				encoding_error();
				return false;
			}
			else
			{
				// Otherwise, we need to wait for more data
				// to process.
				more_to_process = false;
			}
		}
	}

	m_raw_pos += (inbuf - inbuffer);

	// We now have outbuf - outbuffer bytes of valid UTF-8 in outbuffer.
	char* prev = outbuffer;
	char* pos;
	const char to_find[] = { '\r', '\n', '\0' };

	/* TODO: Write directly into the buffer here,
	 * instead of memmoving stuff. */
	while( (pos = std::find_first_of<char*>(prev, outbuf,
		to_find, to_find + sizeof(to_find))) != outbuf)
	{
		if(*pos == '\0')
		{
			// There is a nullbyte in the conversion. As normal
			// text files don't contain nullbytes, this only
			// occurs when converting for example a UTF-16 from
			// ISO-8859-1 to UTF-8 (note that the UTF-16 file is
			// valid ISO-8859-1, it just contains lots of
			// nullbytes). We therefore produce an error here.
			encoding_error();
			return false;
		}
		else
		{
			// We convert everything to '\n' as line separator,
			// but remember the current eol-style to correctly
			// save the document back to disk.
			prev = pos + 1;
			if(*pos == '\r' && prev != outbuf && *prev == '\n')
			{
				// CRLF style line break
				std::memmove(prev, prev + 1,
				             outbuf - prev - 1);
				m_eol_style = DocumentInfoStorage::EOL_CRLF;
				--outbuf;
			}
			else if(*pos == '\r')
			{
				*pos = '\n';
				m_eol_style = DocumentInfoStorage::EOL_CR;
			}
			else
			{
				m_eol_style = DocumentInfoStorage::EOL_LF;
			}
		}
	}

	GtkTextIter insert_iter;
	gtk_text_buffer_get_end_iter(m_content, &insert_iter);
	gtk_text_buffer_insert(m_content, &insert_iter, outbuffer,
	                       outbuf - outbuffer);

	// Done reading and converting the whole file
	if(!more_to_process && !m_stream)
		read_finish();

	return more_to_process;
}
Ejemplo n.º 10
0
/**
 * Converts a given string using the given iconv converter. This is similar to g_convert_with_fallback, except that it is tolerant of sequences in
 * the original input that are invalid even in from_encoding. g_convert_with_fallback fails for such text, whereas this function replaces such a
 * sequence with the fallback string.
 *
 * If len is -1, strlen(text) is used to calculate the length. Do not pass -1 if text is supposed to contain \0 bytes, such as if from_encoding is a
 * multi-byte encoding like UTF-16.
 */
gchar *
text_convert_invalid (const gchar* text, gssize len, GIConv converter, const gchar *fallback, gsize *len_out)
{
	gchar *result_part;
	gsize result_part_len;
	const gchar *end;
	gsize invalid_start_pos;
	GString *result;
	const gchar *current_start;

	if (len == -1)
	{
		len = strlen (text);
	}

	end = text + len;

	/* Find the first position of an invalid sequence. */
	result_part = g_convert_with_iconv (text, len, converter, &invalid_start_pos, &result_part_len, NULL);
	g_iconv (converter, NULL, NULL, NULL, NULL);

	if (result_part != NULL)
	{
		/* All text converted successfully on the first try. Return it. */

		if (len_out != NULL)
		{
			*len_out = result_part_len;
		}

		return result_part;
	}

	/* One or more invalid sequences exist that need to be replaced with the fallback. */

	result = g_string_sized_new (len);
	current_start = text;

	for (;;)
	{
		g_assert (current_start + invalid_start_pos < end);

		/* Convert everything before the position of the invalid sequence. It should be successful.
		 * But iconv may not convert everything till invalid_start_pos since the last few bytes may be part of a shift sequence.
		 * So get the new bytes_read and use it as the actual invalid_start_pos to handle this.
		 *
		 * See https://github.com/hexchat/hexchat/issues/1758
		 */
		result_part = g_convert_with_iconv (current_start, invalid_start_pos, converter, &invalid_start_pos, &result_part_len, NULL);
		g_iconv (converter, NULL, NULL, NULL, NULL);

		g_assert (result_part != NULL);
		g_string_append_len (result, result_part, result_part_len);
		g_free (result_part);

		/* Append the fallback */
		g_string_append (result, fallback);

		/* Now try converting everything after the invalid sequence. */
		current_start += invalid_start_pos + 1;

		result_part = g_convert_with_iconv (current_start, end - current_start, converter, &invalid_start_pos, &result_part_len, NULL);
		g_iconv (converter, NULL, NULL, NULL, NULL);

		if (result_part != NULL)
		{
			/* The rest of the text converted successfully. Append it and return the whole converted text. */

			g_string_append_len (result, result_part, result_part_len);
			g_free (result_part);

			if (len_out != NULL)
			{
				*len_out = result->len;
			}

			return g_string_free (result, FALSE);
		}

		/* The rest of the text didn't convert successfully. invalid_start_pos has the position of the next invalid sequence. */
	}
}
Ejemplo n.º 11
0
static gboolean
log_proto_buffered_server_convert_from_raw(LogProtoBufferedServer *self, const guchar *raw_buffer, gsize raw_buffer_len)
{
  /* some data was read */
  gsize avail_in = raw_buffer_len;
  gsize avail_out;
  gchar *out;
  gint  ret = -1;
  gboolean success = FALSE;
  LogProtoBufferedServerState *state = log_proto_buffered_server_get_state(self);

  do
    {
      avail_out = state->buffer_size - state->pending_buffer_end;
      out = (gchar *) self->buffer + state->pending_buffer_end;

      ret = g_iconv(self->convert, (gchar **) &raw_buffer, &avail_in, (gchar **) &out, &avail_out);
      if (ret == (gsize) -1)
        {
          switch (errno)
            {
            case EINVAL:
              if (self->stream_based)
                {
                  /* Incomplete text, do not report an error, rather try to read again */
                  state->pending_buffer_end = state->buffer_size - avail_out;

                  if (avail_in > 0)
                    {
                      if (avail_in > sizeof(state->raw_buffer_leftover))
                        {
                          msg_error("Invalid byte sequence, the remaining raw buffer is larger than the supported leftover size",
                                    evt_tag_str("encoding", self->super.options->encoding),
                                    evt_tag_int("avail_in", avail_in),
                                    evt_tag_int("leftover_size", sizeof(state->raw_buffer_leftover)));
                          goto error;
                        }
                      memcpy(state->raw_buffer_leftover, raw_buffer, avail_in);
                      state->raw_buffer_leftover_size = avail_in;
                      state->raw_buffer_size -= avail_in;
                      msg_trace("Leftover characters remained after conversion, delaying message until another chunk arrives",
                                evt_tag_str("encoding", self->super.options->encoding),
                                evt_tag_int("avail_in", avail_in));
                      goto success;
                    }
                }
              else
                {
                  msg_error("Byte sequence too short, cannot convert an individual frame in its entirety",
                            evt_tag_str("encoding", self->super.options->encoding),
                            evt_tag_int("avail_in", avail_in));
                  goto error;
                }
              break;
            case E2BIG:
              state->pending_buffer_end = state->buffer_size - avail_out;
              /* extend the buffer */

              if (state->buffer_size < self->super.options->max_buffer_size)
                {
                  state->buffer_size *= 2;
                  if (state->buffer_size > self->super.options->max_buffer_size)
                    state->buffer_size = self->super.options->max_buffer_size;

                  self->buffer = g_realloc(self->buffer, state->buffer_size);

                  /* recalculate the out pointer, and add what we have now */
                  ret = -1;
                }
              else
                {
                  msg_error("Incoming byte stream requires a too large conversion buffer, probably invalid character sequence",
                            evt_tag_str("encoding", self->super.options->encoding),
                            evt_tag_printf("buffer", "%.*s", (gint) state->pending_buffer_end, self->buffer));
                  goto error;
                }
              break;
            case EILSEQ:
            default:
              msg_notice("Invalid byte sequence or other error while converting input, skipping character",
                         evt_tag_str("encoding", self->super.options->encoding),
                         evt_tag_printf("char", "0x%02x", *(guchar *) raw_buffer));
              goto error;
            }
        }
      else
        {
          state->pending_buffer_end = state->buffer_size - avail_out;
        }
    }
  while (avail_in > 0);

 success:
  success = TRUE;
 error:
  log_proto_buffered_server_put_state(self);
  return success;
}
Ejemplo n.º 12
0
char *str_convert_encoding(int from, int to, const char *str)
{
	/*
	 * And here it should just be a matter of calling glib's g_convert().
	 * Or so I thought.  Alas, they chickened out of the hard part: how to 
	 * figure out the size of a zero-terminaded string in any arbitrary 
	 * encoding.
	 * Since there is no advantage in using g_convert(), might as well 
	 * keep my old implementation.  It's worth using their iconv wrapppers 
	 * though, because they provide libiconv in systems that don't have it 
	 * natively.
	 */


	GIConv conv;
	char *result;
	char *inbuf, *outbuf;
	size_t inbpc, outbpc;
	size_t inbytes, outbytes;
	size_t inbytesleft, outbytesleft;
	size_t res;

	inbytes = strsize(from, str);

	if (strcasecmp(encoding_name(to), encoding_name(from)) == 0) {
		result = malloc(inbytes);
		memcpy(result, str, inbytes);
		return result;
	}

	conv = g_iconv_open(encoding_name(to), encoding_name(from));
	if (conv == (GIConv)-1) {
		fprintf(stderr, "convert_encoding: cannot convert from %s to %s\n", 
			encoding_name(from), encoding_name(to));
		return NULL;
	}

	inbpc = bytes_per_char(from);
	outbpc = bytes_per_char(to);

	/* estimate the converted size */
	outbytes = ((double)outbpc / (double)inbpc) * inbytes;
	/* optimize common cases (tuned for western european languages) */
	if (to == UTF_8 && inbpc == 1)
		outbytes = ceil(1.25 * inbytes);
	else if (to == UTF_16)
		outbytes += 2;	/* for the BOM */

	//printf("inbytes : %i\noutbytes: %i\n", inbytes, outbytes);

	result = malloc(outbytes);

	inbuf = (char*)str;
	inbytesleft = inbytes;
	outbuf = result;
	outbytesleft = outbytes;

	while(1) {
		res = g_iconv(conv, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
		if (res == (size_t)-1) {
			if (errno == E2BIG) {
				/* Ran out of space, alloc more 
				   This code tries hard to avoid the need for a second realloc,
				   while still keeping over-allocation to a minimum */
				double done = 1.0 - (double)inbytesleft / (double)inbytes;
				size_t bytes_written = outbuf - result;
				size_t newsize = ceil((bytes_written / done) * 1.1);

				//printf("growing: done=%g%%, old size=%i, new size=%i\n",
				//	100.0*done, outbytes, newsize);

				outbytesleft += newsize - outbytes;
				outbytes = newsize;
				result = realloc(result, outbytes);
				outbuf = result + bytes_written;

				continue;
			}
			else {
				/* Invalid or inconvertible char, skip it
				   Seems better than aborting the conversion... */

				fprintf(stderr, "convert_encoding: conversion error at offset %i\n", 
					inbytes-inbytesleft);

				inbuf += inbpc;
				inbytesleft = max(inbytesleft - inbpc, 0);
				outbuf += outbpc;
				outbytesleft = max(outbytesleft - outbpc, 0);

				continue;
			}
		}
		break;
	}

	//printf("%i of %i bytes unused (wasted %g%%)\n", 
	//	outbytesleft, outbytes, 100.0*(double)outbytesleft/(double)outbytes);

	g_iconv_close(conv);
	return result;
}
Ejemplo n.º 13
0
Archivo: io.c Proyecto: kyoushuu/gwaei
//!
//! @brief Copies a file and creates a new one using the new encoding
//! @param SOURCE_PATH The source file to change the encoding on.
//! @param TARGET_PATH The place to save the new file with the new encoding.
//! @param SOURCE_ENCODING The encoding of the source file.
//! @param TARGET_ENCODING THe wanted encoding in the new file to be created.
//! @param cb A LwIoProgressCallback to use to give progress feedback or NULL
//! @param data A gpointer to data to pass to the LwIoProgressCallback
//! @param error pointer to a GError to write errors to
//! @return The status of the conversion opertaion
//!
gboolean 
lw_io_copy_with_encoding (const gchar           *SOURCE_PATH, 
                          const gchar           *TARGET_PATH,
                          const gchar           *SOURCE_ENCODING, 
                          const gchar           *TARGET_ENCODING,
                          LwIoProgressCallback   cb, 
                          gpointer               data, 
                          GCancellable          *cancellable,
                          GError               **error)
{
    if (*error != NULL) return FALSE;

    //Declarations
    FILE* readfd = NULL;
    FILE* writefd = NULL;
    const gint MAX = 1024 * 2;
    gchar source_buffer[MAX];
    gchar target_buffer[MAX];
    gchar *sptr, *tptr;
    size_t read, source_bytes_left, target_bytes_left;
    gdouble fraction;
    size_t position, filesize;
    GIConv conv;

    filesize = lw_io_get_filesize (SOURCE_PATH);
    position = 0;

    //Initializations
    readfd = fopen (SOURCE_PATH, "rb");
    writefd = fopen (TARGET_PATH, "wb");
    conv = g_iconv_open (TARGET_ENCODING, SOURCE_ENCODING);

    //Read a chunk
    while (ferror(readfd) == 0 && feof(readfd) == 0)
    {
      read = fread(source_buffer, sizeof(gchar), MAX, readfd);
      source_bytes_left = read;
      sptr = source_buffer;

      //Try to convert and write the chunk
      while (source_bytes_left > 0 && ferror(writefd) == 0 && feof(writefd) == 0)
      {
        target_bytes_left = MAX;
        tptr = target_buffer;

        g_iconv (conv, &sptr, &source_bytes_left, &tptr, &target_bytes_left);
        if (MAX != target_bytes_left) //Bytes were converted!
        {
          fwrite(target_buffer, sizeof(gchar), MAX - target_bytes_left, writefd); 
        }
        else if (source_bytes_left == MAX && target_bytes_left == MAX)
        {
          fprintf(stderr, "The file you are converting may be corrupt! Trying to skip a character...\n");
          fseek(readfd, 1L - source_bytes_left, SEEK_CUR);
        }
        else if (source_bytes_left > 0) //Bytes failed to convert!
        {
          fseek(readfd, -source_bytes_left, SEEK_CUR);
          source_bytes_left = 0;
        }
      }
      position = ftell(readfd);
      fraction = (gdouble) position / (gdouble) filesize;
      if (cb != NULL) cb (fraction, data);
    }

    //Cleanup
    g_iconv_close (conv);
    fclose(readfd);
    fclose(writefd);

    return TRUE;
}
Ejemplo n.º 14
0
Archivo: gconvert.c Proyecto: dimkr/rox
gchar*
g_convert_with_iconv (const gchar *str,
		      gssize       len,
		      GIConv       converter,
		      gsize       *bytes_read, 
		      gsize       *bytes_written, 
		      GError     **error)
{
  gchar *dest;
  gchar *outp;
  const gchar *p;
  gsize inbytes_remaining;
  gsize outbytes_remaining;
  gsize err;
  gsize outbuf_size;
  gboolean have_error = FALSE;
  
  g_return_val_if_fail (str != NULL, NULL);
  g_return_val_if_fail (converter != (GIConv) -1, NULL);
     
  if (len < 0)
    len = strlen (str);

  p = str;
  inbytes_remaining = len;
  outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
  
  outbytes_remaining = outbuf_size - 1; /* -1 for nul */
  outp = dest = g_malloc (outbuf_size);

 again:
  
  err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);

  if (err == (size_t) -1)
    {
      switch (errno)
	{
	case EINVAL:
	  /* Incomplete text, do not report an error */
	  break;
	case E2BIG:
	  {
	    size_t used = outp - dest;

	    outbuf_size *= 2;
	    dest = g_realloc (dest, outbuf_size);
		
	    outp = dest + used;
	    outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */

	    goto again;
	  }
	case EILSEQ:
	  g_warning("Invalid byte sequence in conversion input");
	  have_error = TRUE;
	  break;
	default:
	  g_warning("Error during conversion: %s", strerror (errno));
	  have_error = TRUE;
	  break;
	}
    }

  *outp = '\0';
  
  if (bytes_read)
    *bytes_read = p - str;
  else
    {
      if ((p - str) != len) 
	{
          if (!have_error)
            {
              g_warning("Partial character sequence at end of input");
              have_error = TRUE;
            }
	}
    }

  if (bytes_written)
    *bytes_written = outp - dest;	/* Doesn't include '\0' */

  if (have_error)
    {
      g_free (dest);
      return NULL;
    }
  else
    return dest;
}
Ejemplo n.º 15
0
static estr_t
_str_convert (GIConv coder, const char *string, int size, GString * buffer)
{
    estr_t state = ESTR_SUCCESS;
    gchar *tmp_buff = NULL;
    gssize left;
    gsize bytes_read = 0;
    gsize bytes_written = 0;
    GError *error = NULL;
    errno = 0;

    if (coder == INVALID_CONV)
        return ESTR_FAILURE;

    if (string == NULL || buffer == NULL)
        return ESTR_FAILURE;

    /*
       if (! used_class.is_valid_string (string))
       {
       return ESTR_FAILURE;
       }
     */
    if (size < 0)
    {
        size = strlen (string);
    }
    else
    {
        left = strlen (string);
        if (left < size)
            size = left;
    }

    left = size;
    g_iconv (coder, NULL, NULL, NULL, NULL);

    while (left)
    {
        tmp_buff = g_convert_with_iconv ((const gchar *) string,
                                         left, coder, &bytes_read, &bytes_written, &error);
        if (error)
        {
            int code = error->code;

            g_error_free (error);
            error = NULL;

            switch (code)
            {
            case G_CONVERT_ERROR_NO_CONVERSION:
                /* Conversion between the requested character sets is not supported. */
                tmp_buff = g_strnfill (strlen (string), '?');
                g_string_append (buffer, tmp_buff);
                g_free (tmp_buff);
                return ESTR_FAILURE;

            case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
                /* Invalid byte sequence in conversion input. */
                if ((tmp_buff == NULL) && (bytes_read != 0))
                    /* recode valid byte sequence */
                    tmp_buff = g_convert_with_iconv ((const gchar *) string,
                                                     bytes_read, coder, NULL, NULL, NULL);

                if (tmp_buff != NULL)
                {
                    g_string_append (buffer, tmp_buff);
                    g_free (tmp_buff);
                }

                if ((int) bytes_read < left)
                {
                    string += bytes_read + 1;
                    size -= (bytes_read + 1);
                    left -= (bytes_read + 1);
                    g_string_append_c (buffer, *(string - 1));
                }
                else
                {
                    return ESTR_PROBLEM;
                }
                state = ESTR_PROBLEM;
                break;

            case G_CONVERT_ERROR_PARTIAL_INPUT:
                /* Partial character sequence at end of input. */
                g_string_append (buffer, tmp_buff);
                g_free (tmp_buff);
                if ((int) bytes_read < left)
                {
                    left = left - bytes_read;
                    tmp_buff = g_strnfill (left, '?');
                    g_string_append (buffer, tmp_buff);
                    g_free (tmp_buff);
                }
                return ESTR_PROBLEM;

            case G_CONVERT_ERROR_BAD_URI:      /* Don't know how handle this error :( */
            case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH:    /* Don't know how handle this error :( */
            case G_CONVERT_ERROR_FAILED:       /* Conversion failed for some reason. */
            default:
                g_free (tmp_buff);
                return ESTR_FAILURE;
            }
        }
        else
        {
            if (tmp_buff != NULL)
            {
                if (*tmp_buff)
                {
                    g_string_append (buffer, tmp_buff);
                    g_free (tmp_buff);
                    string += bytes_read;
                    left -= bytes_read;
                }
                else
                {
                    g_free (tmp_buff);
                    g_string_append (buffer, string);
                    return state;
                }
            }
            else
            {
                g_string_append (buffer, string);
                return ESTR_PROBLEM;
            }
        }
    }
    return state;
}
Ejemplo n.º 16
0
ssize_t
TextStream::Read (char *buf, size_t n)
{
	size_t inleft = buflen;
	char *inbuf = bufptr;
	char *outbuf = buf;
	size_t outleft = n;
	ssize_t nread;
	size_t r;
	
	do {
		if (cd != (GIConv) -1) {
			if (g_iconv (cd, &inbuf, &inleft, &outbuf, &outleft) == (size_t) -1) {
				switch (errno) {
				case E2BIG:
					// not enough space available in the output buffer
					goto out;
				case EINVAL:
					// incomplete multibyte character sequence
					goto out;
				case EILSEQ:
					// illegal multibyte sequence
					return -1;
				default:
					// unknown error, fail
					return -1;
				}
			}
		} else {
			r = MIN (inleft, outleft);
			memcpy (outbuf, inbuf, r);
			outleft -= r;
			outbuf += r;
			inleft -= r;
			inbuf += r;
		}
		
		if (outleft == 0 || eof)
			break;
		
		// buffer more data
		if (inleft > 0)
			memmove (buffer, inbuf, inleft);
		
		inbuf = buffer + inleft;
		if ((nread = ReadInternal (inbuf, sizeof (buffer) - inleft)) <= 0) {
			eof = true;
			break;
		}
		
		inleft += nread;
		inbuf = buffer;
	} while (true);
	
	if (eof && cd != (GIConv) -1)
		g_iconv (cd, NULL, NULL, &outbuf, &outleft);
	
out:
	
	buflen = inleft;
	bufptr = inbuf;
	
	return (outbuf - buf);
}
Ejemplo n.º 17
0
void Gobby::OperationSave::write_next()
{
	gchar* inbuf;
	gsize inlen;
	char newlinebuf[2] = { '\r', '\n' };

	if(m_current_line_index < m_current_line->second)
	{
		inbuf = m_current_line->first + m_current_line_index;
		inlen = m_current_line->second - m_current_line_index;
	}
	else
	{
		// Write newline
		switch(m_eol_style)
		{
		case DocumentInfoStorage::EOL_CR:
			inbuf = newlinebuf + 0;
			inlen = 1;
			break;
		case DocumentInfoStorage::EOL_LF:
			inbuf = newlinebuf + 1;
			inlen = 1;
			break;
		case DocumentInfoStorage::EOL_CRLF:
			inbuf = newlinebuf + 0;
			inlen = 2;
			break;
		default:
			g_assert_not_reached();
			break;
		}
	}

	gchar* outbuf = m_buffer;
	gsize outlen = BUFFER_SIZE;

	gchar* preserve_inbuf = inbuf;

	/* iconv is defined as libiconv on Windows, or at least when using the
	 * binary packages from ftp.gnome.org. Therefore we can't properly
	 * call Glib::IConv::iconv. Therefore, we use the C API here. */
	std::size_t retval = g_iconv(
		m_iconv.gobj(), &inbuf, &inlen, &outbuf, &outlen);

	if(retval == static_cast<std::size_t>(-1))
	{
		g_assert(errno != EILSEQ);
		// E2BIG and EINVAL are fully OK here.
	}
	else if(retval > 0)
	{
		error(_("The document contains one or more characters that "
		        "cannot be encoded in the specified character "
		        "coding."));
		return;
	}

	// Advance bytes read.
	m_current_line_index += inbuf - preserve_inbuf;
	m_buffer_size = BUFFER_SIZE - outlen;
	m_buffer_index = 0;

	g_assert(m_buffer_size > 0);

	if(m_current_line_index > m_current_line->second)
	{
		// Converted whole line:
		g_free(m_current_line->first);
		m_current_line = m_lines.erase(m_current_line);
		m_current_line_index = 0;
	}

	m_stream->write_async(m_buffer, m_buffer_size,
	                      sigc::mem_fun(*this,
			                    &OperationSave::on_stream_write));
}