Ejemplo n.º 1
0
/*
  Augments a MetaTranslator with translations easily derived from
  similar existing (probably obsolete) translations.

  For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
  has no translation, "XeT 3.1" is added to the translator and is
  marked Unfinished.
*/
void applyNumberHeuristic( MetaTranslator* tor, bool verbose )
{
	TMM translated, untranslated;
	TMM::Iterator t, u;
	TML all = tor->messages();
	TML::Iterator it;
	int inserted = 0;

	for ( it = all.begin(); it != all.end(); ++it )
	{
		if ( ( *it ).type() == MetaTranslatorMessage::Unfinished )
		{
			if ( ( *it ).translation().isEmpty() )
				untranslated.insert( QCString( ( *it ).context() ) + "\n" + ( *it ).sourceText() + "\n" + ( *it ).comment(), *it );
		}
		else if ( !( *it ).translation().isEmpty() )
		{
			translated.insert( zeroKey( ( *it ).sourceText() ), *it );
		}
	}

	for ( u = untranslated.begin(); u != untranslated.end(); ++u )
	{
		t = translated.find( zeroKey( ( *u ).sourceText() ) );
		if ( t != translated.end() && !t.key().isEmpty() && qstrcmp( ( *t ).sourceText(), ( *u ).sourceText() ) != 0 )
		{
			MetaTranslatorMessage m( *u );
			m.setTranslation( translationAttempt( ( *t ).translation(), ( *t ).sourceText(), ( *u ).sourceText() ) );
			tor->insert( m );
			inserted++;
		}
	}
	if ( verbose && inserted != 0 )
		fprintf( stderr, " number heuristic provided %d translation%s\n", inserted, inserted == 1 ? "" : "s" );
}
Ejemplo n.º 2
0
/*
  Augments a MetaTranslator with translations easily derived from
  similar existing (probably obsolete) translations.

  For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
  has no translation, "XeT 3.1" is added to the translator and is
  marked Unfinished.

  Returns the number of additional messages that this heuristic translated.
*/
int applyNumberHeuristic( MetaTranslator *tor )
{
    TMM translated, untranslated;
    TMM::Iterator t, u;
    TML all = tor->messages();
    TML::Iterator it;
    int inserted = 0;

    for ( it = all.begin(); it != all.end(); ++it ) {
        bool hasTranslation = (*it).isTranslated();
        if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
            if ( !hasTranslation )
                untranslated.insert(QByteArray((*it).context()) + "\n" + (*it).sourceText() + "\n"
                                    + (*it).comment(), *it);
        } else if ( hasTranslation && (*it).translations().count() == 1 ) {
            translated.insert( zeroKey((*it).sourceText()), *it );
        }
    }

    for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
        t = translated.find( zeroKey((*u).sourceText()) );
        if ( t != translated.end() && !t.key().isEmpty() &&
             qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) {
            MetaTranslatorMessage m( *u );
            m.setTranslation(translationAttempt((*t).translation(), (*t).sourceText(),
                                                (*u).sourceText()));
            tor->insert( m );
            inserted++;
        }
    }
    return inserted;
}
Ejemplo n.º 3
0
void applySameTextHeuristic( MetaTranslator *tor, bool verbose )
{
    TMM translated;
    TMM avoid;
    TMM::Iterator t;
    TML untranslated;
    TML::Iterator u;
    TML all = tor->messages();
    TML::Iterator it;
    int inserted = 0;

    for ( it = all.begin(); it != all.end(); ++it ) {
        if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
            if ( (*it).translation().isEmpty() )
                untranslated.append( *it );
        } else {
            QByteArray key = (*it).sourceText();
            t = translated.find( key );
            if ( t != translated.end() ) {
                /*
                  The same source text is translated at least two
                  different ways. Do nothing then.
                */
                if ( (*t).translation() != (*it).translation() ) {
                    translated.remove( key );
                    avoid.insert( key, *it );
                }
            } else if ( !avoid.contains(key) &&
                        !(*it).translation().isEmpty() ) {
                translated.insert( key, *it );
            }
        }
    }

    for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
        QByteArray key = (*u).sourceText();
        t = translated.find( key );
        if ( t != translated.end() ) {
            MetaTranslatorMessage m( *u );
            m.setTranslation( (*t).translation() );
            tor->insert( m );
            inserted++;
        }
    }
    if ( verbose && inserted != 0 )
        fprintf( stderr, " same-text heuristic provided %d translation%s\n",
                 inserted, inserted == 1 ? "" : "s" );
}
Ejemplo n.º 4
0
void merge( const MetaTranslator *tor, const MetaTranslator *virginTor, MetaTranslator *outTor, bool verbose, bool noObsolete )
{
    int known = 0;
    int neww = 0;
    int obsoleted = 0;
    int UntranslatedObsoleted = 0;
    int similarTextHeuristicCount = 0;
    TML all = tor->messages();
    TML::Iterator it;
    outTor->setLanguageCode(tor->languageCode());
    outTor->setSourceLanguageCode(tor->sourceLanguageCode());

    /*
      The types of all the messages from the vernacular translator
      are updated according to the virgin translator.
    */
    for ( it = all.begin(); it != all.end(); ++it ) {
        MetaTranslatorMessage::Type newType = MetaTranslatorMessage::Finished;
        MetaTranslatorMessage m = *it;

        // skip context comment
        if ( !QByteArray(m.sourceText()).isEmpty() ) {
            MetaTranslatorMessage mv = virginTor->find(m.context(), m.sourceText(), m.comment());
            if ( mv.isNull() ) {
                mv = virginTor->find(m.context(), m.comment(), m.fileName(), m.lineNumber());
                if ( mv.isNull() ) {
                    // did not find it in the virgin, mark it as obsolete
                    newType = MetaTranslatorMessage::Obsolete;
                    if ( m.type() != MetaTranslatorMessage::Obsolete )
                        obsoleted++;
                } else {
                    // Do not just accept it if its on the same line number, but different source text.
                    // Also check if the texts are more or less similar before we consider them to represent the same message...
                    // ### The QString() cast is evil
                    if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) {
                        // It is just slightly modified, assume that it is the same string
                        m = MetaTranslatorMessage(m.context(), mv.sourceText(), m.comment(), m.fileName(), m.lineNumber(), m.translations());
                        m.setPlural(mv.isPlural());

                        // Mark it as unfinished. (Since the source text was changed it might require re-translating...)
                        newType = MetaTranslatorMessage::Unfinished;
                        ++similarTextHeuristicCount;
                    } else {
                        // The virgin and vernacular sourceTexts are so different that we could not find it.
                        newType = MetaTranslatorMessage::Obsolete;
                        if ( m.type() != MetaTranslatorMessage::Obsolete )
                            obsoleted++;
                    }
                    neww++;
                }
            } else {
                switch ( m.type() ) {
                case MetaTranslatorMessage::Finished:
                default:
                    if (m.isPlural() == mv.isPlural()) {
                        newType = MetaTranslatorMessage::Finished;
                    } else {
                        newType = MetaTranslatorMessage::Unfinished;
                    }
                    known++;
                    break;
                case MetaTranslatorMessage::Unfinished:
                    newType = MetaTranslatorMessage::Unfinished;
                    known++;
                    break;
                case MetaTranslatorMessage::Obsolete:
                    newType = MetaTranslatorMessage::Unfinished;
                    neww++;
                }

                // Always get the filename and linenumber info from the virgin Translator, in case it has changed location.
                // This should also enable us to read a file that does not have the <location> element.
                m.setFileName(mv.fileName());
                m.setLineNumber(mv.lineNumber());
                m.setPlural(mv.isPlural());             // ### why not use operator=?
            }

            if (newType == MetaTranslatorMessage::Obsolete && !m.isTranslated()) {
                ++UntranslatedObsoleted;
            }

            m.setType(newType);
            outTor->insert(m);
        }
    }

    /*
      Messages found only in the virgin translator are added to the
      vernacular translator. Among these are all the context comments.
    */
    all = virginTor->messages();

    for ( it = all.begin(); it != all.end(); ++it ) {
        MetaTranslatorMessage mv = *it;
        bool found = tor->contains(mv.context(), mv.sourceText(), mv.comment());
        if (!found) {
            MetaTranslatorMessage m = tor->find(mv.context(), mv.comment(), mv.fileName(), mv.lineNumber());
            if (!m.isNull()) {
                if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) {
                    found = true;
                }
            } else {
                found = false;
            }
        }
        if ( !found ) {
            outTor->insert( mv );
            if ( !QByteArray(mv.sourceText()).isEmpty() )
                neww++;
        }
    }

    /*
      The same-text heuristic handles cases where a message has an
      obsolete counterpart with a different context or comment.
    */
    int sameTextHeuristicCount = applySameTextHeuristic( outTor );

    /*
      The number heuristic handles cases where a message has an
      obsolete counterpart with mostly numbers differing in the
      source text.
    */
    int sameNumberHeuristicCount = applyNumberHeuristic( outTor );

    if ( verbose ) {
        int totalFound = neww + known;
        fprintf( stderr, "    Found %d source text%s (%d new and %d already existing)\n",
            totalFound, totalFound == 1 ? "" : "s", neww, known);

        if (obsoleted) {
            if (noObsolete) {
                fprintf( stderr, "    Removed %d obsolete entr%s\n", 
                obsoleted, obsoleted == 1 ? "y" : "ies" );
            } else {
                int total = obsoleted - UntranslatedObsoleted;
                fprintf( stderr, "    Kept %d obsolete translation%s\n", 
                total, total == 1 ? "" : "s" );

                fprintf( stderr, "    Removed %d obsolete untranslated entr%s\n", 
                UntranslatedObsoleted, UntranslatedObsoleted == 1 ? "y" : "ies" );

            }
        }

        if (sameNumberHeuristicCount) 
            fprintf( stderr, "    Number heuristic provided %d translation%s\n", 
                     sameNumberHeuristicCount, sameNumberHeuristicCount == 1 ? "" : "s" );
        if (sameTextHeuristicCount) 
            fprintf( stderr, "    Same-text heuristic provided %d translation%s\n", 
                     sameTextHeuristicCount, sameTextHeuristicCount == 1 ? "" : "s" );
        if (similarTextHeuristicCount)
            fprintf( stderr, "    Similar-text heuristic provided %d translation%s\n", 
                     similarTextHeuristicCount, similarTextHeuristicCount == 1 ? "" : "s" );
    }
}
Ejemplo n.º 5
0
void merge( MetaTranslator *tor, const MetaTranslator *virginTor, bool verbose )
{
    int known = 0;
    int neww = 0;
    int obsoleted = 0;
    TML all = tor->messages();
    TML::Iterator it;

    /*
      The types of all the messages from the vernacular translator
      are updated according to the virgin translator.
    */
    for ( it = all.begin(); it != all.end(); ++it ) {
	MetaTranslatorMessage::Type newType;
	MetaTranslatorMessage m = *it;

	// skip context comment
	if ( !QCString((*it).sourceText()).isEmpty() ) {
	    if ( !virginTor->contains((*it).context(), (*it).sourceText(),
				      (*it).comment()) ) {
		newType = MetaTranslatorMessage::Obsolete;
		if ( m.type() != MetaTranslatorMessage::Obsolete )
		    obsoleted++;
	    } else {
		switch ( m.type() ) {
		case MetaTranslatorMessage::Finished:
		    newType = MetaTranslatorMessage::Finished;
		    known++;
		    break;
		case MetaTranslatorMessage::Unfinished:
		default:
		    newType = MetaTranslatorMessage::Unfinished;
		    known++;
		    break;
		case MetaTranslatorMessage::Obsolete:
		    newType = MetaTranslatorMessage::Unfinished;
		    neww++;
		}
	    }

	    if ( newType != m.type() ) {
		m.setType( newType );
		tor->insert( m );
	    }
	}
    }

    /*
      Messages found only in the virgin translator are added to the
      vernacular translator. Among these are all the context comments.
    */
    all = virginTor->messages();

    for ( it = all.begin(); it != all.end(); ++it ) {
	if ( !tor->contains((*it).context(), (*it).sourceText(),
			    (*it).comment()) ) {
	    tor->insert( *it );
	    if ( !QCString((*it).sourceText()).isEmpty() )
		neww++;
	}
    }

    /*
      The same-text heuristic handles cases where a message has an
      obsolete counterpart with a different context or comment.
    */
    applySameTextHeuristic( tor, verbose );

    /*
      The number heuristic handles cases where a message has an
      obsolete counterpart with mostly numbers differing in the
      source text.
    */
    applyNumberHeuristic( tor, verbose );

    if ( verbose )
	fprintf( stderr, " %d known, %d new and %d obsoleted messages\n", known,
		 neww, obsoleted );
}