C++ (Cpp) guess_encoding примеры использования

Пример #1

0

Показать файл

Файл: mime.c Проект: Efreak/elinks

/* Check if part of the extension coresponds to a supported encoding and if it
 * has any handlers. */
static inline unsigned char *
check_encoding_type(unsigned char *extension)
{
	enum stream_encoding encoding = guess_encoding(extension);
	const unsigned char *const *extension_list;
	unsigned char *last_extension = strrchr((const char *)extension, '.');

	if (encoding == ENCODING_NONE || !last_extension)
		return NULL;

	for (extension_list = listext_encoded(encoding);
	     extension_list && *extension_list;
	     extension_list++) {
		unsigned char *content_type;

		if (strcmp(*extension_list, last_extension))
			continue;

		*last_extension = '\0';
		content_type = get_content_type_backends(extension);
		*last_extension = '.';

		return content_type;
	}

	return NULL;
}

Пример #2

0

Показать файл

Файл: tsv.c Проект: cosmo0920/groonga

static void
load_synonyms(grn_ctx *ctx)
{
  static char path_env[GRN_ENV_BUFFER_SIZE];
  const char *path;
  grn_file_reader *file_reader;
  int number_of_lines;
  grn_encoding encoding;
  grn_obj line, key, value;

  grn_getenv("GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE",
             path_env,
             GRN_ENV_BUFFER_SIZE);
  if (path_env[0]) {
    path = path_env;
  } else {
    path = get_system_synonyms_file();
  }
  file_reader = grn_file_reader_open(ctx, path);
  if (!file_reader) {
    GRN_LOG(ctx, GRN_LOG_WARNING,
            "[plugin][query-expander][tsv] "
            "synonyms file doesn't exist: <%s>",
            path);
    return;
  }

  GRN_TEXT_INIT(&line, 0);
  GRN_TEXT_INIT(&key, 0);
  GRN_TEXT_INIT(&value, 0);
  grn_bulk_reserve(ctx, &value, MAX_SYNONYM_BYTES);
  number_of_lines = 0;
  while (grn_file_reader_read_line(ctx, file_reader, &line) == GRN_SUCCESS) {
    const char *line_value = GRN_TEXT_VALUE(&line);
    size_t line_length = GRN_TEXT_LEN(&line);

    if (line_length > 0 && line_value[line_length - 1] == '\n') {
      if (line_length > 1 && line_value[line_length - 2] == '\r') {
        line_length -= 2;
      } else {
        line_length -= 1;
      }
    }
    number_of_lines++;
    if (number_of_lines == 1) {
      encoding = guess_encoding(ctx, &line_value, &line_length);
    }
    GRN_BULK_REWIND(&key);
    GRN_BULK_REWIND(&value);
    parse_synonyms_file_line(ctx, line_value, line_length, &key, &value);
    GRN_BULK_REWIND(&line);
  }
  GRN_OBJ_FIN(ctx, &line);
  GRN_OBJ_FIN(ctx, &key);
  GRN_OBJ_FIN(ctx, &value);

  grn_file_reader_close(ctx, file_reader);
}

Пример #3

0

Показать файл

Файл: gedit-document-output-stream.c Проект: jaseemabid/gedit

static gssize
gedit_document_output_stream_write (GOutputStream            *stream,
				    const void               *buffer,
				    gsize                     count,
				    GCancellable             *cancellable,
				    GError                  **error)
{
	GeditDocumentOutputStream *ostream;
	gchar *text;
	gsize len;
	gboolean freetext = FALSE;

	if (g_cancellable_set_error_if_cancelled (cancellable, error))
	{
		return -1;
	}

	ostream = GEDIT_DOCUMENT_OUTPUT_STREAM (stream);

	if (!ostream->priv->is_initialized)
	{
		ostream->priv->charset_conv = guess_encoding (ostream, buffer, count);

		/* If we still have the previous case is that we didn't guess
		   anything */
		if (ostream->priv->charset_conv == NULL &&
		    !ostream->priv->is_utf8)
		{
			g_set_error_literal (error, GEDIT_DOCUMENT_ERROR,
			                     GEDIT_DOCUMENT_ERROR_ENCODING_AUTO_DETECTION_FAILED,
			                     _("It is not possible to detect the encoding automatically"));

			return -1;
		}

		/* Do not initialize iconv if we are not going to convert anything */
		if (!ostream->priv->is_utf8)
		{
			gchar *from_charset;

			/* Initialize iconv */
			g_object_get (G_OBJECT (ostream->priv->charset_conv),
				      "from-charset", &from_charset,
				      NULL);

			ostream->priv->iconv = g_iconv_open ("UTF-8", from_charset);

			if (ostream->priv->iconv == (GIConv)-1)
			{
				if (errno == EINVAL)
				{
					g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
						     _("Conversion from character set '%s' to 'UTF-8' is not supported"),
						     from_charset);
				}
				else
				{
					g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
						     _("Could not open converter from '%s' to 'UTF-8'"),
						     from_charset);
				}

				g_free (from_charset);
				g_clear_object (&ostream->priv->charset_conv);

				return -1;
			}

			g_free (from_charset);
		}

		/* Init the undoable action */
		gtk_source_buffer_begin_not_undoable_action (GTK_SOURCE_BUFFER (ostream->priv->doc));

		gtk_text_buffer_get_start_iter (GTK_TEXT_BUFFER (ostream->priv->doc),
		                                &ostream->priv->pos);

		ostream->priv->is_initialized = TRUE;
	}

	if (ostream->priv->buflen > 0)
	{
		len = ostream->priv->buflen + count;
		text = g_malloc (len + 1);

		memcpy (text, ostream->priv->buffer, ostream->priv->buflen);
		memcpy (text + ostream->priv->buflen, buffer, count);

		text[len] = '\0';

		g_free (ostream->priv->buffer);

		ostream->priv->buffer = NULL;
		ostream->priv->buflen = 0;

		freetext = TRUE;
	}
	else
	{
		text = (gchar *) buffer;
		len = count;
	}

	if (!ostream->priv->is_utf8)
	{
		gchar *outbuf;
		gsize outbuf_len;

		/* check if iconv was correctly initializated, this shouldn't
		   happen but better be safe */
		if (ostream->priv->iconv == NULL)
		{
			g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
			                     _("Invalid object, not initialized"));

			if (freetext)
			{
				g_free (text);
			}

			return -1;
		}

		/* manage the previous conversion buffer */
		if (ostream->priv->iconv_buflen > 0)
		{
			gchar *text2;
			gsize len2;

			len2 = len + ostream->priv->iconv_buflen;
			text2 = g_malloc (len2 + 1);

			memcpy (text2, ostream->priv->iconv_buffer, ostream->priv->iconv_buflen);
			memcpy (text2 + ostream->priv->iconv_buflen, text, len);

			text2[len2] = '\0';

			if (freetext)
			{
				g_free (text);
			}

			text = text2;
			len = len2;

			g_free (ostream->priv->iconv_buffer);

			ostream->priv->iconv_buffer = NULL;
			ostream->priv->iconv_buflen = 0;

			freetext = TRUE;
		}

		if (!convert_text (ostream, text, len, &outbuf, &outbuf_len, error))
		{
			if (freetext)
			{
				g_free (text);
			}

			return -1;
		}

		if (freetext)
		{
			g_free (text);
		}

		/* set the converted text as the text to validate */
		text = outbuf;
		len = outbuf_len;
	}

	validate_and_insert (ostream, text, len);

	if (freetext)
	{
		g_free (text);
	}

	return count;
}

Пример #4

0

Показать файл

Файл: cedit-smart-charset-converter.c Проект: cranes-bill/cedit

static GConverterResult
cedit_smart_charset_converter_convert (GConverter *converter,
				       const void *inbuf,
				       gsize       inbuf_size,
				       void       *outbuf,
				       gsize       outbuf_size,
				       GConverterFlags flags,
				       gsize      *bytes_read,
				       gsize      *bytes_written,
				       GError    **error)
{
	CeditSmartCharsetConverter *smart = CEDIT_SMART_CHARSET_CONVERTER (converter);

	/* Guess the encoding if we didn't make it yet */
	if (smart->priv->charset_conv == NULL &&
	    !smart->priv->is_utf8)
	{
		smart->priv->charset_conv = guess_encoding (smart, inbuf, inbuf_size);

		/* If we still have the previous case is that we didn't guess
		   anything */
		if (smart->priv->charset_conv == NULL &&
		    !smart->priv->is_utf8)
		{
			/* FIXME: Add a different domain when we kill cedit_convert */
			g_set_error_literal (error, CEDIT_DOCUMENT_ERROR,
					     CEDIT_DOCUMENT_ERROR_ENCODING_AUTO_DETECTION_FAILED,
					     _("It is not possible to detect the encoding automatically"));
			return G_CONVERTER_ERROR;
		}
	}

	/* Now if the encoding is utf8 just redirect the input to the output */
	if (smart->priv->is_utf8)
	{
		gsize size;
		GConverterResult ret;

		size = MIN (inbuf_size, outbuf_size);

		memcpy (outbuf, inbuf, size);
		*bytes_read = size;
		*bytes_written = size;

		ret = G_CONVERTER_CONVERTED;

		if (flags & G_CONVERTER_INPUT_AT_END)
			ret = G_CONVERTER_FINISHED;
		else if (flags & G_CONVERTER_FLUSH)
			ret = G_CONVERTER_FLUSHED;

		return ret;
	}

	/* If we reached here is because we need to convert the text so, we
	   convert it with the charset converter */
	return g_converter_convert (G_CONVERTER (smart->priv->charset_conv),
				    inbuf,
				    inbuf_size,
				    outbuf,
				    outbuf_size,
				    flags,
				    bytes_read,
				    bytes_written,
				    error);
}

Пример #5

0

Показать файл

Файл: odt2txt.c Проект: shenek/odt2txt

int main(int argc, const char **argv)
{
	struct stat st;
	iconv_t ic;
	STRBUF *wbuf;
	STRBUF *docbuf;
	STRBUF *outbuf;
	int i = 1;

	(void)setlocale(LC_ALL, "");

	while (argv[i]) {
		if (!strcmp(argv[i], "--raw")) {
			opt_raw = 1;
			i++; continue;
		} else if (!strcmp(argv[i], "--raw-input")) {
			opt_raw_input = 1;
			i++; continue;
		} else if (!strncmp(argv[i], "--encoding=", 11)) {
			size_t arglen = strlen(argv[i]) - 10;
#ifdef iconvlist
			if (!strcmp(argv[i] + 11, "list")) {
				show_iconvlist();
			}
#endif
			opt_encoding = ymalloc(arglen);
			memcpy(opt_encoding, argv[i] + 11, arglen);
			i++; continue;
		} else if (!strncmp(argv[i], "--width=", 8)) {
			opt_width = atoi(argv[i] + 8);
			if(opt_width < 3 && opt_width != -1) {
				fprintf(stderr, "Invalid value for width: %s\n",
					argv[i] + 8);
				exit(EXIT_FAILURE);
			}
			i++; continue;
		} else if (!strcmp(argv[i], "--force")) {
			// ignore this setting
			i++; continue;
		} else if (!strncmp(argv[i], "--output=", 9)) {
			if (*(argv[i] + 9) != '-') {
				size_t arglen = strlen(argv[i]) - 8;
				opt_output = ymalloc(arglen);
				memcpy(opt_output, argv[i] + 9, arglen);
			}
			i++; continue;
		} else if (!strncmp(argv[i], "--subst=", 8)) {
			if (!strcmp(argv[i] + 8, "none"))
				opt_subst = SUBST_NONE;
			else if (!strcmp(argv[i] + 8, "some"))
				opt_subst = SUBST_SOME;
			else if (!strcmp(argv[i] + 8, "all"))
				opt_subst = SUBST_ALL;
			else {
				fprintf(stderr, "Invalid value for --subst: %s\n",
					argv[i] + 8);
				exit(EXIT_FAILURE);
			}
			i++; continue;
		} else if (!strcmp(argv[i], "--help")) {
			usage();
		} else if (!strcmp(argv[i], "--version")
			   || !strcmp(argv[i], "-v")) {
			version_info();
		} else if (!strcmp(argv[i], "-")) {
			usage();
		} else {
			if(opt_filename)
				usage();
			opt_filename = argv[i];
			i++; continue;
		}
	}

	if(opt_encoding && !strcmp("show", opt_encoding)) {
		yfree(opt_encoding);
		opt_encoding = guess_encoding();
		printf("%s\n", opt_encoding);
		yfree(opt_encoding);
		exit(EXIT_SUCCESS);
	}

	if(opt_raw)
		opt_width = -1;

	if(!opt_filename)
		usage();

	if(!opt_encoding) {
		opt_encoding = guess_encoding();
	}

	ic = init_conv("UTF-8", opt_encoding);

	if (0 != stat(opt_filename, &st)) {
		fprintf(stderr, "%s: %s\n",
			opt_filename, strerror(errno));
		exit(EXIT_FAILURE);
	}

	/* read content.xml */
	docbuf = opt_raw_input ?
		read_from_xml(opt_filename, "content.xml") :
		read_from_zip(opt_filename, "content.xml");

	if (!opt_raw) {
		subst_doc(ic, docbuf);
		format_doc(docbuf, opt_raw_input);
	}

	wbuf = wrap(docbuf, opt_width);

	/* remove all trailing whitespace */
	(void) regex_subst(wbuf, " +\n", _REG_GLOBAL, "\n");

	outbuf = conv(ic, wbuf);

	if (opt_output)
		write_to_file(outbuf, opt_output);
	else
		fwrite(strbuf_get(outbuf), strbuf_len(outbuf), 1, stdout);

	finish_conv(ic);
	strbuf_free(wbuf);
	strbuf_free(docbuf);
	strbuf_free(outbuf);
#ifndef NO_ICONV
	yfree(opt_encoding);
#endif
	if (opt_output)
		yfree(opt_output);

	return EXIT_SUCCESS;
}