/* Augments a MetaTranslator with translations easily derived from similar existing (probably obsolete) translations. For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1" has no translation, "XeT 3.1" is added to the translator and is marked Unfinished. */ void applyNumberHeuristic( MetaTranslator* tor, bool verbose ) { TMM translated, untranslated; TMM::Iterator t, u; TML all = tor->messages(); TML::Iterator it; int inserted = 0; for ( it = all.begin(); it != all.end(); ++it ) { if ( ( *it ).type() == MetaTranslatorMessage::Unfinished ) { if ( ( *it ).translation().isEmpty() ) untranslated.insert( QCString( ( *it ).context() ) + "\n" + ( *it ).sourceText() + "\n" + ( *it ).comment(), *it ); } else if ( !( *it ).translation().isEmpty() ) { translated.insert( zeroKey( ( *it ).sourceText() ), *it ); } } for ( u = untranslated.begin(); u != untranslated.end(); ++u ) { t = translated.find( zeroKey( ( *u ).sourceText() ) ); if ( t != translated.end() && !t.key().isEmpty() && qstrcmp( ( *t ).sourceText(), ( *u ).sourceText() ) != 0 ) { MetaTranslatorMessage m( *u ); m.setTranslation( translationAttempt( ( *t ).translation(), ( *t ).sourceText(), ( *u ).sourceText() ) ); tor->insert( m ); inserted++; } } if ( verbose && inserted != 0 ) fprintf( stderr, " number heuristic provided %d translation%s\n", inserted, inserted == 1 ? "" : "s" ); }
/* Augments a MetaTranslator with translations easily derived from similar existing (probably obsolete) translations. For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1" has no translation, "XeT 3.1" is added to the translator and is marked Unfinished. Returns the number of additional messages that this heuristic translated. */ int applyNumberHeuristic( MetaTranslator *tor ) { TMM translated, untranslated; TMM::Iterator t, u; TML all = tor->messages(); TML::Iterator it; int inserted = 0; for ( it = all.begin(); it != all.end(); ++it ) { bool hasTranslation = (*it).isTranslated(); if ( (*it).type() == MetaTranslatorMessage::Unfinished ) { if ( !hasTranslation ) untranslated.insert(QByteArray((*it).context()) + "\n" + (*it).sourceText() + "\n" + (*it).comment(), *it); } else if ( hasTranslation && (*it).translations().count() == 1 ) { translated.insert( zeroKey((*it).sourceText()), *it ); } } for ( u = untranslated.begin(); u != untranslated.end(); ++u ) { t = translated.find( zeroKey((*u).sourceText()) ); if ( t != translated.end() && !t.key().isEmpty() && qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) { MetaTranslatorMessage m( *u ); m.setTranslation(translationAttempt((*t).translation(), (*t).sourceText(), (*u).sourceText())); tor->insert( m ); inserted++; } } return inserted; }
void applySameTextHeuristic( MetaTranslator *tor, bool verbose ) { TMM translated; TMM avoid; TMM::Iterator t; TML untranslated; TML::Iterator u; TML all = tor->messages(); TML::Iterator it; int inserted = 0; for ( it = all.begin(); it != all.end(); ++it ) { if ( (*it).type() == MetaTranslatorMessage::Unfinished ) { if ( (*it).translation().isEmpty() ) untranslated.append( *it ); } else { QByteArray key = (*it).sourceText(); t = translated.find( key ); if ( t != translated.end() ) { /* The same source text is translated at least two different ways. Do nothing then. */ if ( (*t).translation() != (*it).translation() ) { translated.remove( key ); avoid.insert( key, *it ); } } else if ( !avoid.contains(key) && !(*it).translation().isEmpty() ) { translated.insert( key, *it ); } } } for ( u = untranslated.begin(); u != untranslated.end(); ++u ) { QByteArray key = (*u).sourceText(); t = translated.find( key ); if ( t != translated.end() ) { MetaTranslatorMessage m( *u ); m.setTranslation( (*t).translation() ); tor->insert( m ); inserted++; } } if ( verbose && inserted != 0 ) fprintf( stderr, " same-text heuristic provided %d translation%s\n", inserted, inserted == 1 ? "" : "s" ); }
void merge( const MetaTranslator *tor, const MetaTranslator *virginTor, MetaTranslator *outTor, bool verbose, bool noObsolete ) { int known = 0; int neww = 0; int obsoleted = 0; int UntranslatedObsoleted = 0; int similarTextHeuristicCount = 0; TML all = tor->messages(); TML::Iterator it; outTor->setLanguageCode(tor->languageCode()); outTor->setSourceLanguageCode(tor->sourceLanguageCode()); /* The types of all the messages from the vernacular translator are updated according to the virgin translator. */ for ( it = all.begin(); it != all.end(); ++it ) { MetaTranslatorMessage::Type newType = MetaTranslatorMessage::Finished; MetaTranslatorMessage m = *it; // skip context comment if ( !QByteArray(m.sourceText()).isEmpty() ) { MetaTranslatorMessage mv = virginTor->find(m.context(), m.sourceText(), m.comment()); if ( mv.isNull() ) { mv = virginTor->find(m.context(), m.comment(), m.fileName(), m.lineNumber()); if ( mv.isNull() ) { // did not find it in the virgin, mark it as obsolete newType = MetaTranslatorMessage::Obsolete; if ( m.type() != MetaTranslatorMessage::Obsolete ) obsoleted++; } else { // Do not just accept it if its on the same line number, but different source text. // Also check if the texts are more or less similar before we consider them to represent the same message... // ### The QString() cast is evil if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) { // It is just slightly modified, assume that it is the same string m = MetaTranslatorMessage(m.context(), mv.sourceText(), m.comment(), m.fileName(), m.lineNumber(), m.translations()); m.setPlural(mv.isPlural()); // Mark it as unfinished. (Since the source text was changed it might require re-translating...) newType = MetaTranslatorMessage::Unfinished; ++similarTextHeuristicCount; } else { // The virgin and vernacular sourceTexts are so different that we could not find it. newType = MetaTranslatorMessage::Obsolete; if ( m.type() != MetaTranslatorMessage::Obsolete ) obsoleted++; } neww++; } } else { switch ( m.type() ) { case MetaTranslatorMessage::Finished: default: if (m.isPlural() == mv.isPlural()) { newType = MetaTranslatorMessage::Finished; } else { newType = MetaTranslatorMessage::Unfinished; } known++; break; case MetaTranslatorMessage::Unfinished: newType = MetaTranslatorMessage::Unfinished; known++; break; case MetaTranslatorMessage::Obsolete: newType = MetaTranslatorMessage::Unfinished; neww++; } // Always get the filename and linenumber info from the virgin Translator, in case it has changed location. // This should also enable us to read a file that does not have the <location> element. m.setFileName(mv.fileName()); m.setLineNumber(mv.lineNumber()); m.setPlural(mv.isPlural()); // ### why not use operator=? } if (newType == MetaTranslatorMessage::Obsolete && !m.isTranslated()) { ++UntranslatedObsoleted; } m.setType(newType); outTor->insert(m); } } /* Messages found only in the virgin translator are added to the vernacular translator. Among these are all the context comments. */ all = virginTor->messages(); for ( it = all.begin(); it != all.end(); ++it ) { MetaTranslatorMessage mv = *it; bool found = tor->contains(mv.context(), mv.sourceText(), mv.comment()); if (!found) { MetaTranslatorMessage m = tor->find(mv.context(), mv.comment(), mv.fileName(), mv.lineNumber()); if (!m.isNull()) { if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) { found = true; } } else { found = false; } } if ( !found ) { outTor->insert( mv ); if ( !QByteArray(mv.sourceText()).isEmpty() ) neww++; } } /* The same-text heuristic handles cases where a message has an obsolete counterpart with a different context or comment. */ int sameTextHeuristicCount = applySameTextHeuristic( outTor ); /* The number heuristic handles cases where a message has an obsolete counterpart with mostly numbers differing in the source text. */ int sameNumberHeuristicCount = applyNumberHeuristic( outTor ); if ( verbose ) { int totalFound = neww + known; fprintf( stderr, " Found %d source text%s (%d new and %d already existing)\n", totalFound, totalFound == 1 ? "" : "s", neww, known); if (obsoleted) { if (noObsolete) { fprintf( stderr, " Removed %d obsolete entr%s\n", obsoleted, obsoleted == 1 ? "y" : "ies" ); } else { int total = obsoleted - UntranslatedObsoleted; fprintf( stderr, " Kept %d obsolete translation%s\n", total, total == 1 ? "" : "s" ); fprintf( stderr, " Removed %d obsolete untranslated entr%s\n", UntranslatedObsoleted, UntranslatedObsoleted == 1 ? "y" : "ies" ); } } if (sameNumberHeuristicCount) fprintf( stderr, " Number heuristic provided %d translation%s\n", sameNumberHeuristicCount, sameNumberHeuristicCount == 1 ? "" : "s" ); if (sameTextHeuristicCount) fprintf( stderr, " Same-text heuristic provided %d translation%s\n", sameTextHeuristicCount, sameTextHeuristicCount == 1 ? "" : "s" ); if (similarTextHeuristicCount) fprintf( stderr, " Similar-text heuristic provided %d translation%s\n", similarTextHeuristicCount, similarTextHeuristicCount == 1 ? "" : "s" ); } }
void merge( MetaTranslator *tor, const MetaTranslator *virginTor, bool verbose ) { int known = 0; int neww = 0; int obsoleted = 0; TML all = tor->messages(); TML::Iterator it; /* The types of all the messages from the vernacular translator are updated according to the virgin translator. */ for ( it = all.begin(); it != all.end(); ++it ) { MetaTranslatorMessage::Type newType; MetaTranslatorMessage m = *it; // skip context comment if ( !QCString((*it).sourceText()).isEmpty() ) { if ( !virginTor->contains((*it).context(), (*it).sourceText(), (*it).comment()) ) { newType = MetaTranslatorMessage::Obsolete; if ( m.type() != MetaTranslatorMessage::Obsolete ) obsoleted++; } else { switch ( m.type() ) { case MetaTranslatorMessage::Finished: newType = MetaTranslatorMessage::Finished; known++; break; case MetaTranslatorMessage::Unfinished: default: newType = MetaTranslatorMessage::Unfinished; known++; break; case MetaTranslatorMessage::Obsolete: newType = MetaTranslatorMessage::Unfinished; neww++; } } if ( newType != m.type() ) { m.setType( newType ); tor->insert( m ); } } } /* Messages found only in the virgin translator are added to the vernacular translator. Among these are all the context comments. */ all = virginTor->messages(); for ( it = all.begin(); it != all.end(); ++it ) { if ( !tor->contains((*it).context(), (*it).sourceText(), (*it).comment()) ) { tor->insert( *it ); if ( !QCString((*it).sourceText()).isEmpty() ) neww++; } } /* The same-text heuristic handles cases where a message has an obsolete counterpart with a different context or comment. */ applySameTextHeuristic( tor, verbose ); /* The number heuristic handles cases where a message has an obsolete counterpart with mostly numbers differing in the source text. */ applyNumberHeuristic( tor, verbose ); if ( verbose ) fprintf( stderr, " %d known, %d new and %d obsoleted messages\n", known, neww, obsoleted ); }