gchar *
tracker_encoding_guess_meegotouch (const gchar *buffer,
                                   gsize        size)
{
	/* Initialize detector */
	MCharsetDetector detector ((const char *)buffer, (int)size);
	gchar *locale;
	gchar *encoding = NULL;

	if (detector.hasError ()) {
		g_warning ("Charset detector error when creating: %s",
		           detector.errorString ().toUtf8 (). data ());
		return NULL;
	}

	locale = tracker_locale_get (TRACKER_LOCALE_LANGUAGE);
	detector.setDeclaredLocale (locale);

	MCharsetMatch bestMatch = detector.detect ();

	if (detector.hasError ()) {
		g_warning ("Charset detector error when detecting: %s",
		           detector.errorString ().toUtf8 (). data ());
		g_free (locale);
		return NULL;
	}

	if (bestMatch.confidence () > 30) {
		encoding = g_strdup (bestMatch.name ().toUtf8 ().data ());

#if 0
		QList<MCharsetMatch> mCharsetMatchList = detector.detectAll();

		if (detector.hasError ()) {
			g_warning ("Charset detector error when detecting all: %s",
			           detector.errorString ().toUtf8 (). data ());
		}

		g_debug ("Detecting all charsets...");
		for (gint i = 0; i < mCharsetMatchList.size (); ++i) {
			g_debug ("  Charset '%s' with %d%% confidence...",
			         mCharsetMatchList[i].name (). toUtf8 ().data (),
			         mCharsetMatchList[i].confidence ());
		}
#endif

		g_debug ("Guessing charset as '%s' with %d%% confidence",
		         encoding, bestMatch.confidence ());
	} else {
		g_debug ("Ignoring charset as '%s' with %d%% (< 30%%) confidence",
		         bestMatch.name ().toUtf8 ().data (),
		         bestMatch.confidence ());
	}

	g_free (locale);

	return encoding;
}
示例#2
0
QString MCharsetDetector::text(const MCharsetMatch &charsetMatch)
{
    Q_D(MCharsetDetector);
    clearError();
    QTextCodec *codec
        = QTextCodec::codecForName(charsetMatch.name().toLatin1());
    if (codec == NULL) { // there is no codec matching the name
        d->_status = U_ILLEGAL_ARGUMENT_ERROR;
        qWarning() << __PRETTY_FUNCTION__
                 << "no codec for the name" << charsetMatch.name()
                 << errorString();
        // return empty string to indicate that no conversion is possible:
        return QString();
    }
    else {
        QTextCodec::ConverterState state;
        QString text =
            codec->toUnicode(d->_ba.constData(), d->_ba.size(), &state);
        if (state.invalidChars > 0)
            d->_status = U_INVALID_CHAR_FOUND;
        return text;
    }
}