Ejemplo n.º 1
0
void merge( const MetaTranslator *tor, const MetaTranslator *virginTor, MetaTranslator *outTor, bool verbose, bool noObsolete )
{
    int known = 0;
    int neww = 0;
    int obsoleted = 0;
    int UntranslatedObsoleted = 0;
    int similarTextHeuristicCount = 0;
    TML all = tor->messages();
    TML::Iterator it;
    outTor->setLanguageCode(tor->languageCode());
    outTor->setSourceLanguageCode(tor->sourceLanguageCode());

    /*
      The types of all the messages from the vernacular translator
      are updated according to the virgin translator.
    */
    for ( it = all.begin(); it != all.end(); ++it ) {
        MetaTranslatorMessage::Type newType = MetaTranslatorMessage::Finished;
        MetaTranslatorMessage m = *it;

        // skip context comment
        if ( !QByteArray(m.sourceText()).isEmpty() ) {
            MetaTranslatorMessage mv = virginTor->find(m.context(), m.sourceText(), m.comment());
            if ( mv.isNull() ) {
                mv = virginTor->find(m.context(), m.comment(), m.fileName(), m.lineNumber());
                if ( mv.isNull() ) {
                    // did not find it in the virgin, mark it as obsolete
                    newType = MetaTranslatorMessage::Obsolete;
                    if ( m.type() != MetaTranslatorMessage::Obsolete )
                        obsoleted++;
                } else {
                    // Do not just accept it if its on the same line number, but different source text.
                    // Also check if the texts are more or less similar before we consider them to represent the same message...
                    // ### The QString() cast is evil
                    if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) {
                        // It is just slightly modified, assume that it is the same string
                        m = MetaTranslatorMessage(m.context(), mv.sourceText(), m.comment(), m.fileName(), m.lineNumber(), m.translations());
                        m.setPlural(mv.isPlural());

                        // Mark it as unfinished. (Since the source text was changed it might require re-translating...)
                        newType = MetaTranslatorMessage::Unfinished;
                        ++similarTextHeuristicCount;
                    } else {
                        // The virgin and vernacular sourceTexts are so different that we could not find it.
                        newType = MetaTranslatorMessage::Obsolete;
                        if ( m.type() != MetaTranslatorMessage::Obsolete )
                            obsoleted++;
                    }
                    neww++;
                }
            } else {
                switch ( m.type() ) {
                case MetaTranslatorMessage::Finished:
                default:
                    if (m.isPlural() == mv.isPlural()) {
                        newType = MetaTranslatorMessage::Finished;
                    } else {
                        newType = MetaTranslatorMessage::Unfinished;
                    }
                    known++;
                    break;
                case MetaTranslatorMessage::Unfinished:
                    newType = MetaTranslatorMessage::Unfinished;
                    known++;
                    break;
                case MetaTranslatorMessage::Obsolete:
                    newType = MetaTranslatorMessage::Unfinished;
                    neww++;
                }

                // Always get the filename and linenumber info from the virgin Translator, in case it has changed location.
                // This should also enable us to read a file that does not have the <location> element.
                m.setFileName(mv.fileName());
                m.setLineNumber(mv.lineNumber());
                m.setPlural(mv.isPlural());             // ### why not use operator=?
            }

            if (newType == MetaTranslatorMessage::Obsolete && !m.isTranslated()) {
                ++UntranslatedObsoleted;
            }

            m.setType(newType);
            outTor->insert(m);
        }
    }

    /*
      Messages found only in the virgin translator are added to the
      vernacular translator. Among these are all the context comments.
    */
    all = virginTor->messages();

    for ( it = all.begin(); it != all.end(); ++it ) {
        MetaTranslatorMessage mv = *it;
        bool found = tor->contains(mv.context(), mv.sourceText(), mv.comment());
        if (!found) {
            MetaTranslatorMessage m = tor->find(mv.context(), mv.comment(), mv.fileName(), mv.lineNumber());
            if (!m.isNull()) {
                if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) {
                    found = true;
                }
            } else {
                found = false;
            }
        }
        if ( !found ) {
            outTor->insert( mv );
            if ( !QByteArray(mv.sourceText()).isEmpty() )
                neww++;
        }
    }

    /*
      The same-text heuristic handles cases where a message has an
      obsolete counterpart with a different context or comment.
    */
    int sameTextHeuristicCount = applySameTextHeuristic( outTor );

    /*
      The number heuristic handles cases where a message has an
      obsolete counterpart with mostly numbers differing in the
      source text.
    */
    int sameNumberHeuristicCount = applyNumberHeuristic( outTor );

    if ( verbose ) {
        int totalFound = neww + known;
        fprintf( stderr, "    Found %d source text%s (%d new and %d already existing)\n",
            totalFound, totalFound == 1 ? "" : "s", neww, known);

        if (obsoleted) {
            if (noObsolete) {
                fprintf( stderr, "    Removed %d obsolete entr%s\n", 
                obsoleted, obsoleted == 1 ? "y" : "ies" );
            } else {
                int total = obsoleted - UntranslatedObsoleted;
                fprintf( stderr, "    Kept %d obsolete translation%s\n", 
                total, total == 1 ? "" : "s" );

                fprintf( stderr, "    Removed %d obsolete untranslated entr%s\n", 
                UntranslatedObsoleted, UntranslatedObsoleted == 1 ? "y" : "ies" );

            }
        }

        if (sameNumberHeuristicCount) 
            fprintf( stderr, "    Number heuristic provided %d translation%s\n", 
                     sameNumberHeuristicCount, sameNumberHeuristicCount == 1 ? "" : "s" );
        if (sameTextHeuristicCount) 
            fprintf( stderr, "    Same-text heuristic provided %d translation%s\n", 
                     sameTextHeuristicCount, sameTextHeuristicCount == 1 ? "" : "s" );
        if (similarTextHeuristicCount)
            fprintf( stderr, "    Similar-text heuristic provided %d translation%s\n", 
                     similarTextHeuristicCount, similarTextHeuristicCount == 1 ? "" : "s" );
    }
}
Ejemplo n.º 2
0
void merge( MetaTranslator *tor, const MetaTranslator *virginTor, bool verbose )
{
    int known = 0;
    int neww = 0;
    int obsoleted = 0;
    TML all = tor->messages();
    TML::Iterator it;

    /*
      The types of all the messages from the vernacular translator
      are updated according to the virgin translator.
    */
    for ( it = all.begin(); it != all.end(); ++it ) {
	MetaTranslatorMessage::Type newType;
	MetaTranslatorMessage m = *it;

	// skip context comment
	if ( !QCString((*it).sourceText()).isEmpty() ) {
	    if ( !virginTor->contains((*it).context(), (*it).sourceText(),
				      (*it).comment()) ) {
		newType = MetaTranslatorMessage::Obsolete;
		if ( m.type() != MetaTranslatorMessage::Obsolete )
		    obsoleted++;
	    } else {
		switch ( m.type() ) {
		case MetaTranslatorMessage::Finished:
		    newType = MetaTranslatorMessage::Finished;
		    known++;
		    break;
		case MetaTranslatorMessage::Unfinished:
		default:
		    newType = MetaTranslatorMessage::Unfinished;
		    known++;
		    break;
		case MetaTranslatorMessage::Obsolete:
		    newType = MetaTranslatorMessage::Unfinished;
		    neww++;
		}
	    }

	    if ( newType != m.type() ) {
		m.setType( newType );
		tor->insert( m );
	    }
	}
    }

    /*
      Messages found only in the virgin translator are added to the
      vernacular translator. Among these are all the context comments.
    */
    all = virginTor->messages();

    for ( it = all.begin(); it != all.end(); ++it ) {
	if ( !tor->contains((*it).context(), (*it).sourceText(),
			    (*it).comment()) ) {
	    tor->insert( *it );
	    if ( !QCString((*it).sourceText()).isEmpty() )
		neww++;
	}
    }

    /*
      The same-text heuristic handles cases where a message has an
      obsolete counterpart with a different context or comment.
    */
    applySameTextHeuristic( tor, verbose );

    /*
      The number heuristic handles cases where a message has an
      obsolete counterpart with mostly numbers differing in the
      source text.
    */
    applyNumberHeuristic( tor, verbose );

    if ( verbose )
	fprintf( stderr, " %d known, %d new and %d obsoleted messages\n", known,
		 neww, obsoleted );
}