static void proto_tree_add_item_ucs2string (proto_tree *tree, int hfindex, tvbuff_t *tvb, gint start ) { guint16 ucs2_len; gboolean dump_bytes = FALSE; ucs2_len = tvb_get_ntohs( tvb, start ); if ( ucs2_iconv != (GIConv)-1 && ucs2_len <= MC_MAX_UCS2LEN ) { gchar *in, *out, *out_const; gsize in_len, out_len, rv_iconv; in_len = ucs2_len * 2; in = (gchar*)tvb_get_ptr(tvb, start + MC_TYPELEN_UCS2LEN, in_len); if ( in ) { out_len = ucs2_len*2; out_const = ep_alloc0( (size_t)out_len + 1); out = out_const; } if ( in != NULL && out != NULL ) { rv_iconv = g_iconv( ucs2_iconv, &in, &in_len, &out, &out_len ); if ( -1 != rv_iconv ) { proto_tree_add_bytes_format_value( tree, hfindex, tvb, start, ucs2_len*2 + MC_TYPELEN_UCS2LEN, tvb_get_ptr( tvb, start, ucs2_len*2 + MC_TYPELEN_UCS2LEN), "%s", out_const ); } else { g_iconv( ucs2_iconv, NULL, 0, NULL, 0 ); dump_bytes = TRUE; } } else { dump_bytes = TRUE; } } else { dump_bytes = TRUE; } if ( dump_bytes ) { proto_tree_add_item(tree, hfindex, tvb, start, MC_TYPELEN_UCS2LEN + ucs2_len * 2, FALSE); } }
estr_t str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size) { size_t left; size_t cnv; g_iconv (conv, NULL, NULL, NULL, NULL); left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size; cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size); if (cnv == (size_t) (-1)) return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE; output[0] = '\0'; return ESTR_SUCCESS; }
/* * returns the number of bytes that represent the UTF8 encoding buffer * in the original encoding that the user specified. * * NOTE: this is slow, but we only call this for the remainder of our * buffer (e.g. the partial line at the end of our last chunk of read * data). Also, this is only invoked if the file uses an encoding. */ static gsize log_proto_text_server_get_raw_size_of_buffer(LogProtoTextServer *self, const guchar *buffer, gsize buffer_len) { gchar *out; const guchar *in; gsize avail_out, avail_in; gint ret; if (self->reverse_convert == ((GIConv) -1) && !self->convert_scale) { /* try to speed up raw size calculation by recognizing the most * prominent character encodings and in the case the encoding * uses fixed size characters set that in self->convert_scale, * which in turn will speed up the reversal of the UTF8 buffer * size to raw buffer sizes. */ self->convert_scale = log_proto_get_char_size_for_fixed_encoding(self->super.super.options->encoding); if (self->convert_scale == 0) { /* this encoding is not known, do the conversion for real :( */ self->reverse_convert = g_iconv_open(self->super.super.options->encoding, "utf-8"); } } if (self->convert_scale) return g_utf8_strlen((gchar *) buffer, buffer_len) * self->convert_scale; if (self->reverse_buffer_len < buffer_len * 6) { /* we free and malloc, since we never need the data still in reverse buffer */ g_free(self->reverse_buffer); self->reverse_buffer_len = buffer_len * 6; self->reverse_buffer = g_malloc(buffer_len * 6); } avail_out = self->reverse_buffer_len; out = self->reverse_buffer; avail_in = buffer_len; in = buffer; ret = g_iconv(self->reverse_convert, (gchar **) &in, &avail_in, &out, &avail_out); if (ret == (gsize) -1) { /* oops, we cannot reverse that we ourselves converted to UTF-8, * this is simply impossible, but never say never */ msg_error("Internal error, couldn't reverse the internal UTF8 string to the original encoding", evt_tag_printf("buffer", "%.*s", (gint) buffer_len, buffer), NULL); return 0; } else { return self->reverse_buffer_len - avail_out; } }
/* Returns a newly allocated gchar, converted according to the given handler */ gchar *gnc_call_iconv(GIConv handler, const gchar* input) { gchar *inbuffer = (gchar*)input; gchar *outbuffer, *outbufferstart; gsize inbytes, outbytes; inbytes = strlen(inbuffer); outbytes = inbytes + 2; outbufferstart = g_strndup(inbuffer, outbytes); outbuffer = outbufferstart; g_iconv(handler, &inbuffer, &inbytes, &outbuffer, &outbytes); if (outbytes > 0) *outbuffer = '\0'; return outbufferstart; }
bool ISpellChecker::checkWord(const char * const utf8Word, size_t length) { ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN]; char szWord[INPUTWORDLEN + MAXAFFIXLEN]; if (!m_bSuccessfulInit) return false; if (!utf8Word || length >= (INPUTWORDLEN + MAXAFFIXLEN) || length == 0) return false; bool retVal = false; if (!g_iconv_is_valid(m_translate_in)) return false; else { /* convert to 8bit string and null terminate */ size_t len_in, len_out, result; // the 8bit encodings use precomposed forms char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC); char *In = normalizedWord; char *Out = szWord; len_in = strlen(In); len_out = sizeof( szWord ) - 1; result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out); g_free(normalizedWord); if ((size_t)-1 == result) return false; *Out = '\0'; } if (!strtoichar(iWord, szWord, sizeof(iWord), 0)) { if (good(iWord, 0, 0, 1, 0) == 1 || compoundgood(iWord, 1) == 1) { retVal = true; } } return retVal; }
bool MySpellChecker::checkWord(const char *utf8Word, size_t len) { if (len > MAXWORDLEN || !g_iconv_is_valid(m_translate_in)) return false; // the 8bit encodings use precomposed forms char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC); char *in = normalizedWord; char word8[MAXWORDLEN + 1]; char *out = word8; size_t len_in = strlen(in); size_t len_out = sizeof( word8 ) - 1; size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out); g_free(normalizedWord); if ((size_t)-1 == result) return false; *out = '\0'; if (myspell->spell(word8)) return true; else return false; }
char* iofunctions_decode_text (gchar* text) { GError* err = NULL; gchar* result = 0; gsize read = 0, written = 0; if (! (result = g_locale_to_utf8 (text, -1, &read, &written, &err))) { g_error_free (err); slog (L_ERROR, "failed to convert text from default locale, trying " "ISO-8859-1\n"); gsize in_size = strlen (text), out_size = in_size * 2; gchar* out = (gchar*)g_malloc (out_size); gchar* process = out; /* TODO: replace these calls to the non-raw glib functions */ GIConv cd = g_iconv_open ("UTF-8//IGNORE", "ISO−8859-1"); if (-1 == g_iconv (cd, &text, &in_size, &process, &out_size)) { slog (L_G_ERROR, _("Can not convert text to UTF-8!\n")); g_free (out); out = NULL; } result = out; } return result; }
static gboolean convert_text (GeditDocumentOutputStream *stream, const gchar *inbuf, gsize inbuf_len, gchar **outbuf, gsize *outbuf_len, GError **error) { gchar *out, *dest; gsize in_left, out_left, outbuf_size, res; gint errsv; gboolean done, have_error; in_left = inbuf_len; /* set an arbitrary length if inbuf_len is 0, this is needed to flush the iconv data */ outbuf_size = (inbuf_len > 0) ? inbuf_len : 100; out_left = outbuf_size; out = dest = g_malloc (outbuf_size); done = FALSE; have_error = FALSE; while (!done && !have_error) { /* If we reached here is because we need to convert the text, so we convert it using iconv. See that if inbuf is NULL the data will be flushed */ res = g_iconv (stream->priv->iconv, (gchar **)&inbuf, &in_left, &out, &out_left); /* something went wrong */ if (res == (gsize)-1) { errsv = errno; switch (errsv) { case EINVAL: /* Incomplete text, do not report an error */ stream->priv->iconv_buffer = g_strndup (inbuf, in_left); stream->priv->iconv_buflen = in_left; done = TRUE; break; case E2BIG: { /* allocate more space */ gsize used = out - dest; outbuf_size *= 2; dest = g_realloc (dest, outbuf_size); out = dest + used; out_left = outbuf_size - used; } break; case EILSEQ: /* TODO: we should escape this text.*/ g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid byte sequence in conversion input")); have_error = TRUE; break; default: g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, _("Error during conversion: %s"), g_strerror (errsv)); have_error = TRUE; break; } } else { done = TRUE; } } if (have_error) { g_free (dest); *outbuf = NULL; *outbuf_len = 0; return FALSE; } *outbuf = dest; *outbuf_len = out - dest; return TRUE; }
bool Gobby::OperationOpen::on_idle() { static const unsigned int CONVERT_BUFFER_SIZE = 1024; const char* inbuffer = &m_raw_content[m_raw_pos]; char* inbuf = const_cast<char*>(inbuffer); gsize inbytes = m_raw_content.size() - m_raw_pos; char outbuffer[CONVERT_BUFFER_SIZE]; gchar* outbuf = outbuffer; gsize outbytes = CONVERT_BUFFER_SIZE; /* iconv is defined as libiconv on Windows, or at least when using the * binary packages from ftp.gnome.org. Therefore we can't propely * call Glib::IConv::iconv. Therefore, we use the C API here. */ const std::size_t result = g_iconv(m_iconv->gobj(), &inbuf, &inbytes, &outbuf, &outbytes); bool more_to_process = (inbytes != 0); if(result == static_cast<std::size_t>(-1)) { if(errno == EILSEQ) { // Invalid text for the current encoding encoding_error(); return false; } if(errno == EINVAL) { // If EINVAL is set, this means that an incomplete // multibyte sequence was at the end of the input. // We might have some more bytes, but those do not // make up a whole character, so we need to wait for // more input. if(!m_stream) { // However, if we already read all input, then // there is no more input to come. We // consider this an error since the file // should not end with an incomplete multibyte // sequence. encoding_error(); return false; } else { // Otherwise, we need to wait for more data // to process. more_to_process = false; } } } m_raw_pos += (inbuf - inbuffer); // We now have outbuf - outbuffer bytes of valid UTF-8 in outbuffer. char* prev = outbuffer; char* pos; const char to_find[] = { '\r', '\n', '\0' }; /* TODO: Write directly into the buffer here, * instead of memmoving stuff. */ while( (pos = std::find_first_of<char*>(prev, outbuf, to_find, to_find + sizeof(to_find))) != outbuf) { if(*pos == '\0') { // There is a nullbyte in the conversion. As normal // text files don't contain nullbytes, this only // occurs when converting for example a UTF-16 from // ISO-8859-1 to UTF-8 (note that the UTF-16 file is // valid ISO-8859-1, it just contains lots of // nullbytes). We therefore produce an error here. encoding_error(); return false; } else { // We convert everything to '\n' as line separator, // but remember the current eol-style to correctly // save the document back to disk. prev = pos + 1; if(*pos == '\r' && prev != outbuf && *prev == '\n') { // CRLF style line break std::memmove(prev, prev + 1, outbuf - prev - 1); m_eol_style = DocumentInfoStorage::EOL_CRLF; --outbuf; } else if(*pos == '\r') { *pos = '\n'; m_eol_style = DocumentInfoStorage::EOL_CR; } else { m_eol_style = DocumentInfoStorage::EOL_LF; } } } GtkTextIter insert_iter; gtk_text_buffer_get_end_iter(m_content, &insert_iter); gtk_text_buffer_insert(m_content, &insert_iter, outbuffer, outbuf - outbuffer); // Done reading and converting the whole file if(!more_to_process && !m_stream) read_finish(); return more_to_process; }
/** * Converts a given string using the given iconv converter. This is similar to g_convert_with_fallback, except that it is tolerant of sequences in * the original input that are invalid even in from_encoding. g_convert_with_fallback fails for such text, whereas this function replaces such a * sequence with the fallback string. * * If len is -1, strlen(text) is used to calculate the length. Do not pass -1 if text is supposed to contain \0 bytes, such as if from_encoding is a * multi-byte encoding like UTF-16. */ gchar * text_convert_invalid (const gchar* text, gssize len, GIConv converter, const gchar *fallback, gsize *len_out) { gchar *result_part; gsize result_part_len; const gchar *end; gsize invalid_start_pos; GString *result; const gchar *current_start; if (len == -1) { len = strlen (text); } end = text + len; /* Find the first position of an invalid sequence. */ result_part = g_convert_with_iconv (text, len, converter, &invalid_start_pos, &result_part_len, NULL); g_iconv (converter, NULL, NULL, NULL, NULL); if (result_part != NULL) { /* All text converted successfully on the first try. Return it. */ if (len_out != NULL) { *len_out = result_part_len; } return result_part; } /* One or more invalid sequences exist that need to be replaced with the fallback. */ result = g_string_sized_new (len); current_start = text; for (;;) { g_assert (current_start + invalid_start_pos < end); /* Convert everything before the position of the invalid sequence. It should be successful. * But iconv may not convert everything till invalid_start_pos since the last few bytes may be part of a shift sequence. * So get the new bytes_read and use it as the actual invalid_start_pos to handle this. * * See https://github.com/hexchat/hexchat/issues/1758 */ result_part = g_convert_with_iconv (current_start, invalid_start_pos, converter, &invalid_start_pos, &result_part_len, NULL); g_iconv (converter, NULL, NULL, NULL, NULL); g_assert (result_part != NULL); g_string_append_len (result, result_part, result_part_len); g_free (result_part); /* Append the fallback */ g_string_append (result, fallback); /* Now try converting everything after the invalid sequence. */ current_start += invalid_start_pos + 1; result_part = g_convert_with_iconv (current_start, end - current_start, converter, &invalid_start_pos, &result_part_len, NULL); g_iconv (converter, NULL, NULL, NULL, NULL); if (result_part != NULL) { /* The rest of the text converted successfully. Append it and return the whole converted text. */ g_string_append_len (result, result_part, result_part_len); g_free (result_part); if (len_out != NULL) { *len_out = result->len; } return g_string_free (result, FALSE); } /* The rest of the text didn't convert successfully. invalid_start_pos has the position of the next invalid sequence. */ } }
static gboolean log_proto_buffered_server_convert_from_raw(LogProtoBufferedServer *self, const guchar *raw_buffer, gsize raw_buffer_len) { /* some data was read */ gsize avail_in = raw_buffer_len; gsize avail_out; gchar *out; gint ret = -1; gboolean success = FALSE; LogProtoBufferedServerState *state = log_proto_buffered_server_get_state(self); do { avail_out = state->buffer_size - state->pending_buffer_end; out = (gchar *) self->buffer + state->pending_buffer_end; ret = g_iconv(self->convert, (gchar **) &raw_buffer, &avail_in, (gchar **) &out, &avail_out); if (ret == (gsize) -1) { switch (errno) { case EINVAL: if (self->stream_based) { /* Incomplete text, do not report an error, rather try to read again */ state->pending_buffer_end = state->buffer_size - avail_out; if (avail_in > 0) { if (avail_in > sizeof(state->raw_buffer_leftover)) { msg_error("Invalid byte sequence, the remaining raw buffer is larger than the supported leftover size", evt_tag_str("encoding", self->super.options->encoding), evt_tag_int("avail_in", avail_in), evt_tag_int("leftover_size", sizeof(state->raw_buffer_leftover))); goto error; } memcpy(state->raw_buffer_leftover, raw_buffer, avail_in); state->raw_buffer_leftover_size = avail_in; state->raw_buffer_size -= avail_in; msg_trace("Leftover characters remained after conversion, delaying message until another chunk arrives", evt_tag_str("encoding", self->super.options->encoding), evt_tag_int("avail_in", avail_in)); goto success; } } else { msg_error("Byte sequence too short, cannot convert an individual frame in its entirety", evt_tag_str("encoding", self->super.options->encoding), evt_tag_int("avail_in", avail_in)); goto error; } break; case E2BIG: state->pending_buffer_end = state->buffer_size - avail_out; /* extend the buffer */ if (state->buffer_size < self->super.options->max_buffer_size) { state->buffer_size *= 2; if (state->buffer_size > self->super.options->max_buffer_size) state->buffer_size = self->super.options->max_buffer_size; self->buffer = g_realloc(self->buffer, state->buffer_size); /* recalculate the out pointer, and add what we have now */ ret = -1; } else { msg_error("Incoming byte stream requires a too large conversion buffer, probably invalid character sequence", evt_tag_str("encoding", self->super.options->encoding), evt_tag_printf("buffer", "%.*s", (gint) state->pending_buffer_end, self->buffer)); goto error; } break; case EILSEQ: default: msg_notice("Invalid byte sequence or other error while converting input, skipping character", evt_tag_str("encoding", self->super.options->encoding), evt_tag_printf("char", "0x%02x", *(guchar *) raw_buffer)); goto error; } } else { state->pending_buffer_end = state->buffer_size - avail_out; } } while (avail_in > 0); success: success = TRUE; error: log_proto_buffered_server_put_state(self); return success; }
char *str_convert_encoding(int from, int to, const char *str) { /* * And here it should just be a matter of calling glib's g_convert(). * Or so I thought. Alas, they chickened out of the hard part: how to * figure out the size of a zero-terminaded string in any arbitrary * encoding. * Since there is no advantage in using g_convert(), might as well * keep my old implementation. It's worth using their iconv wrapppers * though, because they provide libiconv in systems that don't have it * natively. */ GIConv conv; char *result; char *inbuf, *outbuf; size_t inbpc, outbpc; size_t inbytes, outbytes; size_t inbytesleft, outbytesleft; size_t res; inbytes = strsize(from, str); if (strcasecmp(encoding_name(to), encoding_name(from)) == 0) { result = malloc(inbytes); memcpy(result, str, inbytes); return result; } conv = g_iconv_open(encoding_name(to), encoding_name(from)); if (conv == (GIConv)-1) { fprintf(stderr, "convert_encoding: cannot convert from %s to %s\n", encoding_name(from), encoding_name(to)); return NULL; } inbpc = bytes_per_char(from); outbpc = bytes_per_char(to); /* estimate the converted size */ outbytes = ((double)outbpc / (double)inbpc) * inbytes; /* optimize common cases (tuned for western european languages) */ if (to == UTF_8 && inbpc == 1) outbytes = ceil(1.25 * inbytes); else if (to == UTF_16) outbytes += 2; /* for the BOM */ //printf("inbytes : %i\noutbytes: %i\n", inbytes, outbytes); result = malloc(outbytes); inbuf = (char*)str; inbytesleft = inbytes; outbuf = result; outbytesleft = outbytes; while(1) { res = g_iconv(conv, &inbuf, &inbytesleft, &outbuf, &outbytesleft); if (res == (size_t)-1) { if (errno == E2BIG) { /* Ran out of space, alloc more This code tries hard to avoid the need for a second realloc, while still keeping over-allocation to a minimum */ double done = 1.0 - (double)inbytesleft / (double)inbytes; size_t bytes_written = outbuf - result; size_t newsize = ceil((bytes_written / done) * 1.1); //printf("growing: done=%g%%, old size=%i, new size=%i\n", // 100.0*done, outbytes, newsize); outbytesleft += newsize - outbytes; outbytes = newsize; result = realloc(result, outbytes); outbuf = result + bytes_written; continue; } else { /* Invalid or inconvertible char, skip it Seems better than aborting the conversion... */ fprintf(stderr, "convert_encoding: conversion error at offset %i\n", inbytes-inbytesleft); inbuf += inbpc; inbytesleft = max(inbytesleft - inbpc, 0); outbuf += outbpc; outbytesleft = max(outbytesleft - outbpc, 0); continue; } } break; } //printf("%i of %i bytes unused (wasted %g%%)\n", // outbytesleft, outbytes, 100.0*(double)outbytesleft/(double)outbytes); g_iconv_close(conv); return result; }
//! //! @brief Copies a file and creates a new one using the new encoding //! @param SOURCE_PATH The source file to change the encoding on. //! @param TARGET_PATH The place to save the new file with the new encoding. //! @param SOURCE_ENCODING The encoding of the source file. //! @param TARGET_ENCODING THe wanted encoding in the new file to be created. //! @param cb A LwIoProgressCallback to use to give progress feedback or NULL //! @param data A gpointer to data to pass to the LwIoProgressCallback //! @param error pointer to a GError to write errors to //! @return The status of the conversion opertaion //! gboolean lw_io_copy_with_encoding (const gchar *SOURCE_PATH, const gchar *TARGET_PATH, const gchar *SOURCE_ENCODING, const gchar *TARGET_ENCODING, LwIoProgressCallback cb, gpointer data, GCancellable *cancellable, GError **error) { if (*error != NULL) return FALSE; //Declarations FILE* readfd = NULL; FILE* writefd = NULL; const gint MAX = 1024 * 2; gchar source_buffer[MAX]; gchar target_buffer[MAX]; gchar *sptr, *tptr; size_t read, source_bytes_left, target_bytes_left; gdouble fraction; size_t position, filesize; GIConv conv; filesize = lw_io_get_filesize (SOURCE_PATH); position = 0; //Initializations readfd = fopen (SOURCE_PATH, "rb"); writefd = fopen (TARGET_PATH, "wb"); conv = g_iconv_open (TARGET_ENCODING, SOURCE_ENCODING); //Read a chunk while (ferror(readfd) == 0 && feof(readfd) == 0) { read = fread(source_buffer, sizeof(gchar), MAX, readfd); source_bytes_left = read; sptr = source_buffer; //Try to convert and write the chunk while (source_bytes_left > 0 && ferror(writefd) == 0 && feof(writefd) == 0) { target_bytes_left = MAX; tptr = target_buffer; g_iconv (conv, &sptr, &source_bytes_left, &tptr, &target_bytes_left); if (MAX != target_bytes_left) //Bytes were converted! { fwrite(target_buffer, sizeof(gchar), MAX - target_bytes_left, writefd); } else if (source_bytes_left == MAX && target_bytes_left == MAX) { fprintf(stderr, "The file you are converting may be corrupt! Trying to skip a character...\n"); fseek(readfd, 1L - source_bytes_left, SEEK_CUR); } else if (source_bytes_left > 0) //Bytes failed to convert! { fseek(readfd, -source_bytes_left, SEEK_CUR); source_bytes_left = 0; } } position = ftell(readfd); fraction = (gdouble) position / (gdouble) filesize; if (cb != NULL) cb (fraction, data); } //Cleanup g_iconv_close (conv); fclose(readfd); fclose(writefd); return TRUE; }
gchar* g_convert_with_iconv (const gchar *str, gssize len, GIConv converter, gsize *bytes_read, gsize *bytes_written, GError **error) { gchar *dest; gchar *outp; const gchar *p; gsize inbytes_remaining; gsize outbytes_remaining; gsize err; gsize outbuf_size; gboolean have_error = FALSE; g_return_val_if_fail (str != NULL, NULL); g_return_val_if_fail (converter != (GIConv) -1, NULL); if (len < 0) len = strlen (str); p = str; inbytes_remaining = len; outbuf_size = len + 1; /* + 1 for nul in case len == 1 */ outbytes_remaining = outbuf_size - 1; /* -1 for nul */ outp = dest = g_malloc (outbuf_size); again: err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining); if (err == (size_t) -1) { switch (errno) { case EINVAL: /* Incomplete text, do not report an error */ break; case E2BIG: { size_t used = outp - dest; outbuf_size *= 2; dest = g_realloc (dest, outbuf_size); outp = dest + used; outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */ goto again; } case EILSEQ: g_warning("Invalid byte sequence in conversion input"); have_error = TRUE; break; default: g_warning("Error during conversion: %s", strerror (errno)); have_error = TRUE; break; } } *outp = '\0'; if (bytes_read) *bytes_read = p - str; else { if ((p - str) != len) { if (!have_error) { g_warning("Partial character sequence at end of input"); have_error = TRUE; } } } if (bytes_written) *bytes_written = outp - dest; /* Doesn't include '\0' */ if (have_error) { g_free (dest); return NULL; } else return dest; }
static estr_t _str_convert (GIConv coder, const char *string, int size, GString * buffer) { estr_t state = ESTR_SUCCESS; gchar *tmp_buff = NULL; gssize left; gsize bytes_read = 0; gsize bytes_written = 0; GError *error = NULL; errno = 0; if (coder == INVALID_CONV) return ESTR_FAILURE; if (string == NULL || buffer == NULL) return ESTR_FAILURE; /* if (! used_class.is_valid_string (string)) { return ESTR_FAILURE; } */ if (size < 0) { size = strlen (string); } else { left = strlen (string); if (left < size) size = left; } left = size; g_iconv (coder, NULL, NULL, NULL, NULL); while (left) { tmp_buff = g_convert_with_iconv ((const gchar *) string, left, coder, &bytes_read, &bytes_written, &error); if (error) { int code = error->code; g_error_free (error); error = NULL; switch (code) { case G_CONVERT_ERROR_NO_CONVERSION: /* Conversion between the requested character sets is not supported. */ tmp_buff = g_strnfill (strlen (string), '?'); g_string_append (buffer, tmp_buff); g_free (tmp_buff); return ESTR_FAILURE; case G_CONVERT_ERROR_ILLEGAL_SEQUENCE: /* Invalid byte sequence in conversion input. */ if ((tmp_buff == NULL) && (bytes_read != 0)) /* recode valid byte sequence */ tmp_buff = g_convert_with_iconv ((const gchar *) string, bytes_read, coder, NULL, NULL, NULL); if (tmp_buff != NULL) { g_string_append (buffer, tmp_buff); g_free (tmp_buff); } if ((int) bytes_read < left) { string += bytes_read + 1; size -= (bytes_read + 1); left -= (bytes_read + 1); g_string_append_c (buffer, *(string - 1)); } else { return ESTR_PROBLEM; } state = ESTR_PROBLEM; break; case G_CONVERT_ERROR_PARTIAL_INPUT: /* Partial character sequence at end of input. */ g_string_append (buffer, tmp_buff); g_free (tmp_buff); if ((int) bytes_read < left) { left = left - bytes_read; tmp_buff = g_strnfill (left, '?'); g_string_append (buffer, tmp_buff); g_free (tmp_buff); } return ESTR_PROBLEM; case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */ case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */ case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */ default: g_free (tmp_buff); return ESTR_FAILURE; } } else { if (tmp_buff != NULL) { if (*tmp_buff) { g_string_append (buffer, tmp_buff); g_free (tmp_buff); string += bytes_read; left -= bytes_read; } else { g_free (tmp_buff); g_string_append (buffer, string); return state; } } else { g_string_append (buffer, string); return ESTR_PROBLEM; } } } return state; }
ssize_t TextStream::Read (char *buf, size_t n) { size_t inleft = buflen; char *inbuf = bufptr; char *outbuf = buf; size_t outleft = n; ssize_t nread; size_t r; do { if (cd != (GIConv) -1) { if (g_iconv (cd, &inbuf, &inleft, &outbuf, &outleft) == (size_t) -1) { switch (errno) { case E2BIG: // not enough space available in the output buffer goto out; case EINVAL: // incomplete multibyte character sequence goto out; case EILSEQ: // illegal multibyte sequence return -1; default: // unknown error, fail return -1; } } } else { r = MIN (inleft, outleft); memcpy (outbuf, inbuf, r); outleft -= r; outbuf += r; inleft -= r; inbuf += r; } if (outleft == 0 || eof) break; // buffer more data if (inleft > 0) memmove (buffer, inbuf, inleft); inbuf = buffer + inleft; if ((nread = ReadInternal (inbuf, sizeof (buffer) - inleft)) <= 0) { eof = true; break; } inleft += nread; inbuf = buffer; } while (true); if (eof && cd != (GIConv) -1) g_iconv (cd, NULL, NULL, &outbuf, &outleft); out: buflen = inleft; bufptr = inbuf; return (outbuf - buf); }
void Gobby::OperationSave::write_next() { gchar* inbuf; gsize inlen; char newlinebuf[2] = { '\r', '\n' }; if(m_current_line_index < m_current_line->second) { inbuf = m_current_line->first + m_current_line_index; inlen = m_current_line->second - m_current_line_index; } else { // Write newline switch(m_eol_style) { case DocumentInfoStorage::EOL_CR: inbuf = newlinebuf + 0; inlen = 1; break; case DocumentInfoStorage::EOL_LF: inbuf = newlinebuf + 1; inlen = 1; break; case DocumentInfoStorage::EOL_CRLF: inbuf = newlinebuf + 0; inlen = 2; break; default: g_assert_not_reached(); break; } } gchar* outbuf = m_buffer; gsize outlen = BUFFER_SIZE; gchar* preserve_inbuf = inbuf; /* iconv is defined as libiconv on Windows, or at least when using the * binary packages from ftp.gnome.org. Therefore we can't properly * call Glib::IConv::iconv. Therefore, we use the C API here. */ std::size_t retval = g_iconv( m_iconv.gobj(), &inbuf, &inlen, &outbuf, &outlen); if(retval == static_cast<std::size_t>(-1)) { g_assert(errno != EILSEQ); // E2BIG and EINVAL are fully OK here. } else if(retval > 0) { error(_("The document contains one or more characters that " "cannot be encoded in the specified character " "coding.")); return; } // Advance bytes read. m_current_line_index += inbuf - preserve_inbuf; m_buffer_size = BUFFER_SIZE - outlen; m_buffer_index = 0; g_assert(m_buffer_size > 0); if(m_current_line_index > m_current_line->second) { // Converted whole line: g_free(m_current_line->first); m_current_line = m_lines.erase(m_current_line); m_current_line_index = 0; } m_stream->write_async(m_buffer, m_buffer_size, sigc::mem_fun(*this, &OperationSave::on_stream_write)); }