/* Check if part of the extension coresponds to a supported encoding and if it * has any handlers. */ static inline unsigned char * check_encoding_type(unsigned char *extension) { enum stream_encoding encoding = guess_encoding(extension); const unsigned char *const *extension_list; unsigned char *last_extension = strrchr((const char *)extension, '.'); if (encoding == ENCODING_NONE || !last_extension) return NULL; for (extension_list = listext_encoded(encoding); extension_list && *extension_list; extension_list++) { unsigned char *content_type; if (strcmp(*extension_list, last_extension)) continue; *last_extension = '\0'; content_type = get_content_type_backends(extension); *last_extension = '.'; return content_type; } return NULL; }
static void load_synonyms(grn_ctx *ctx) { static char path_env[GRN_ENV_BUFFER_SIZE]; const char *path; grn_file_reader *file_reader; int number_of_lines; grn_encoding encoding; grn_obj line, key, value; grn_getenv("GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE", path_env, GRN_ENV_BUFFER_SIZE); if (path_env[0]) { path = path_env; } else { path = get_system_synonyms_file(); } file_reader = grn_file_reader_open(ctx, path); if (!file_reader) { GRN_LOG(ctx, GRN_LOG_WARNING, "[plugin][query-expander][tsv] " "synonyms file doesn't exist: <%s>", path); return; } GRN_TEXT_INIT(&line, 0); GRN_TEXT_INIT(&key, 0); GRN_TEXT_INIT(&value, 0); grn_bulk_reserve(ctx, &value, MAX_SYNONYM_BYTES); number_of_lines = 0; while (grn_file_reader_read_line(ctx, file_reader, &line) == GRN_SUCCESS) { const char *line_value = GRN_TEXT_VALUE(&line); size_t line_length = GRN_TEXT_LEN(&line); if (line_length > 0 && line_value[line_length - 1] == '\n') { if (line_length > 1 && line_value[line_length - 2] == '\r') { line_length -= 2; } else { line_length -= 1; } } number_of_lines++; if (number_of_lines == 1) { encoding = guess_encoding(ctx, &line_value, &line_length); } GRN_BULK_REWIND(&key); GRN_BULK_REWIND(&value); parse_synonyms_file_line(ctx, line_value, line_length, &key, &value); GRN_BULK_REWIND(&line); } GRN_OBJ_FIN(ctx, &line); GRN_OBJ_FIN(ctx, &key); GRN_OBJ_FIN(ctx, &value); grn_file_reader_close(ctx, file_reader); }
static gssize gedit_document_output_stream_write (GOutputStream *stream, const void *buffer, gsize count, GCancellable *cancellable, GError **error) { GeditDocumentOutputStream *ostream; gchar *text; gsize len; gboolean freetext = FALSE; if (g_cancellable_set_error_if_cancelled (cancellable, error)) { return -1; } ostream = GEDIT_DOCUMENT_OUTPUT_STREAM (stream); if (!ostream->priv->is_initialized) { ostream->priv->charset_conv = guess_encoding (ostream, buffer, count); /* If we still have the previous case is that we didn't guess anything */ if (ostream->priv->charset_conv == NULL && !ostream->priv->is_utf8) { g_set_error_literal (error, GEDIT_DOCUMENT_ERROR, GEDIT_DOCUMENT_ERROR_ENCODING_AUTO_DETECTION_FAILED, _("It is not possible to detect the encoding automatically")); return -1; } /* Do not initialize iconv if we are not going to convert anything */ if (!ostream->priv->is_utf8) { gchar *from_charset; /* Initialize iconv */ g_object_get (G_OBJECT (ostream->priv->charset_conv), "from-charset", &from_charset, NULL); ostream->priv->iconv = g_iconv_open ("UTF-8", from_charset); if (ostream->priv->iconv == (GIConv)-1) { if (errno == EINVAL) { g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED, _("Conversion from character set '%s' to 'UTF-8' is not supported"), from_charset); } else { g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED, _("Could not open converter from '%s' to 'UTF-8'"), from_charset); } g_free (from_charset); g_clear_object (&ostream->priv->charset_conv); return -1; } g_free (from_charset); } /* Init the undoable action */ gtk_source_buffer_begin_not_undoable_action (GTK_SOURCE_BUFFER (ostream->priv->doc)); gtk_text_buffer_get_start_iter (GTK_TEXT_BUFFER (ostream->priv->doc), &ostream->priv->pos); ostream->priv->is_initialized = TRUE; } if (ostream->priv->buflen > 0) { len = ostream->priv->buflen + count; text = g_malloc (len + 1); memcpy (text, ostream->priv->buffer, ostream->priv->buflen); memcpy (text + ostream->priv->buflen, buffer, count); text[len] = '\0'; g_free (ostream->priv->buffer); ostream->priv->buffer = NULL; ostream->priv->buflen = 0; freetext = TRUE; } else { text = (gchar *) buffer; len = count; } if (!ostream->priv->is_utf8) { gchar *outbuf; gsize outbuf_len; /* check if iconv was correctly initializated, this shouldn't happen but better be safe */ if (ostream->priv->iconv == NULL) { g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED, _("Invalid object, not initialized")); if (freetext) { g_free (text); } return -1; } /* manage the previous conversion buffer */ if (ostream->priv->iconv_buflen > 0) { gchar *text2; gsize len2; len2 = len + ostream->priv->iconv_buflen; text2 = g_malloc (len2 + 1); memcpy (text2, ostream->priv->iconv_buffer, ostream->priv->iconv_buflen); memcpy (text2 + ostream->priv->iconv_buflen, text, len); text2[len2] = '\0'; if (freetext) { g_free (text); } text = text2; len = len2; g_free (ostream->priv->iconv_buffer); ostream->priv->iconv_buffer = NULL; ostream->priv->iconv_buflen = 0; freetext = TRUE; } if (!convert_text (ostream, text, len, &outbuf, &outbuf_len, error)) { if (freetext) { g_free (text); } return -1; } if (freetext) { g_free (text); } /* set the converted text as the text to validate */ text = outbuf; len = outbuf_len; } validate_and_insert (ostream, text, len); if (freetext) { g_free (text); } return count; }
static GConverterResult cedit_smart_charset_converter_convert (GConverter *converter, const void *inbuf, gsize inbuf_size, void *outbuf, gsize outbuf_size, GConverterFlags flags, gsize *bytes_read, gsize *bytes_written, GError **error) { CeditSmartCharsetConverter *smart = CEDIT_SMART_CHARSET_CONVERTER (converter); /* Guess the encoding if we didn't make it yet */ if (smart->priv->charset_conv == NULL && !smart->priv->is_utf8) { smart->priv->charset_conv = guess_encoding (smart, inbuf, inbuf_size); /* If we still have the previous case is that we didn't guess anything */ if (smart->priv->charset_conv == NULL && !smart->priv->is_utf8) { /* FIXME: Add a different domain when we kill cedit_convert */ g_set_error_literal (error, CEDIT_DOCUMENT_ERROR, CEDIT_DOCUMENT_ERROR_ENCODING_AUTO_DETECTION_FAILED, _("It is not possible to detect the encoding automatically")); return G_CONVERTER_ERROR; } } /* Now if the encoding is utf8 just redirect the input to the output */ if (smart->priv->is_utf8) { gsize size; GConverterResult ret; size = MIN (inbuf_size, outbuf_size); memcpy (outbuf, inbuf, size); *bytes_read = size; *bytes_written = size; ret = G_CONVERTER_CONVERTED; if (flags & G_CONVERTER_INPUT_AT_END) ret = G_CONVERTER_FINISHED; else if (flags & G_CONVERTER_FLUSH) ret = G_CONVERTER_FLUSHED; return ret; } /* If we reached here is because we need to convert the text so, we convert it with the charset converter */ return g_converter_convert (G_CONVERTER (smart->priv->charset_conv), inbuf, inbuf_size, outbuf, outbuf_size, flags, bytes_read, bytes_written, error); }
int main(int argc, const char **argv) { struct stat st; iconv_t ic; STRBUF *wbuf; STRBUF *docbuf; STRBUF *outbuf; int i = 1; (void)setlocale(LC_ALL, ""); while (argv[i]) { if (!strcmp(argv[i], "--raw")) { opt_raw = 1; i++; continue; } else if (!strcmp(argv[i], "--raw-input")) { opt_raw_input = 1; i++; continue; } else if (!strncmp(argv[i], "--encoding=", 11)) { size_t arglen = strlen(argv[i]) - 10; #ifdef iconvlist if (!strcmp(argv[i] + 11, "list")) { show_iconvlist(); } #endif opt_encoding = ymalloc(arglen); memcpy(opt_encoding, argv[i] + 11, arglen); i++; continue; } else if (!strncmp(argv[i], "--width=", 8)) { opt_width = atoi(argv[i] + 8); if(opt_width < 3 && opt_width != -1) { fprintf(stderr, "Invalid value for width: %s\n", argv[i] + 8); exit(EXIT_FAILURE); } i++; continue; } else if (!strcmp(argv[i], "--force")) { // ignore this setting i++; continue; } else if (!strncmp(argv[i], "--output=", 9)) { if (*(argv[i] + 9) != '-') { size_t arglen = strlen(argv[i]) - 8; opt_output = ymalloc(arglen); memcpy(opt_output, argv[i] + 9, arglen); } i++; continue; } else if (!strncmp(argv[i], "--subst=", 8)) { if (!strcmp(argv[i] + 8, "none")) opt_subst = SUBST_NONE; else if (!strcmp(argv[i] + 8, "some")) opt_subst = SUBST_SOME; else if (!strcmp(argv[i] + 8, "all")) opt_subst = SUBST_ALL; else { fprintf(stderr, "Invalid value for --subst: %s\n", argv[i] + 8); exit(EXIT_FAILURE); } i++; continue; } else if (!strcmp(argv[i], "--help")) { usage(); } else if (!strcmp(argv[i], "--version") || !strcmp(argv[i], "-v")) { version_info(); } else if (!strcmp(argv[i], "-")) { usage(); } else { if(opt_filename) usage(); opt_filename = argv[i]; i++; continue; } } if(opt_encoding && !strcmp("show", opt_encoding)) { yfree(opt_encoding); opt_encoding = guess_encoding(); printf("%s\n", opt_encoding); yfree(opt_encoding); exit(EXIT_SUCCESS); } if(opt_raw) opt_width = -1; if(!opt_filename) usage(); if(!opt_encoding) { opt_encoding = guess_encoding(); } ic = init_conv("UTF-8", opt_encoding); if (0 != stat(opt_filename, &st)) { fprintf(stderr, "%s: %s\n", opt_filename, strerror(errno)); exit(EXIT_FAILURE); } /* read content.xml */ docbuf = opt_raw_input ? read_from_xml(opt_filename, "content.xml") : read_from_zip(opt_filename, "content.xml"); if (!opt_raw) { subst_doc(ic, docbuf); format_doc(docbuf, opt_raw_input); } wbuf = wrap(docbuf, opt_width); /* remove all trailing whitespace */ (void) regex_subst(wbuf, " +\n", _REG_GLOBAL, "\n"); outbuf = conv(ic, wbuf); if (opt_output) write_to_file(outbuf, opt_output); else fwrite(strbuf_get(outbuf), strbuf_len(outbuf), 1, stdout); finish_conv(ic); strbuf_free(wbuf); strbuf_free(docbuf); strbuf_free(outbuf); #ifndef NO_ICONV yfree(opt_encoding); #endif if (opt_output) yfree(opt_output); return EXIT_SUCCESS; }