コード例 #1
0
ファイル: score.cpp プロジェクト: xwd/mosesGit-hiero
void LexicalTable::load( char *fileName )
{
  cerr << "Loading lexical translation table from " << fileName;
  ifstream inFile;
  inFile.open(fileName);
  if (inFile.fail()) {
    cerr << " - ERROR: could not open file\n";
    exit(1);
  }
  istream *inFileP = &inFile;

  char line[LINE_MAX_LENGTH];

  int i=0;
  while(true) {
    i++;
    if (i%100000 == 0) cerr << "." << flush;
    SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
    if (inFileP->eof()) break;

    vector<string> token = tokenize( line );
    if (token.size() != 3) {
      cerr << "line " << i << " in " << fileName
           << " has wrong number of tokens, skipping:\n"
           << token.size() << " " << token[0] << " " << line << endl;
      continue;
    }

    double prob = atof( token[2].c_str() );
    WORD_ID wordT = vcbT.storeIfNew( token[0] );
    WORD_ID wordS = vcbS.storeIfNew( token[1] );
    ltable[ wordS ][ wordT ] = prob;
  }
  cerr << endl;
}
コード例 #2
0
bool PhraseAlignment::create(const char line[], int lineID )
{
  vector< string > token = tokenize( line );
  int item = 1;
  PHRASE phraseF, phraseE;
  for (size_t j=0; j<token.size(); j++) {
    if (token[j] == "|||") item++;
    else {
      if (item == 1)
        phraseF.push_back( vcbF.storeIfNew( token[j] ) );
      else if (item == 2)
        phraseE.push_back( vcbE.storeIfNew( token[j] ) );
      else if (item == 3) {
        int e,f;
        sscanf(token[j].c_str(), "%d-%d", &f, &e);
        if ((size_t)e >= phraseE.size() || (size_t)f >= phraseF.size()) {
          cerr << "WARNING: sentence " << lineID << " has alignment point (" << f << ", " << e << ") out of bounds (" << phraseF.size() << ", " << phraseE.size() << ")\n";
        } else {
          if (alignedToE.size() == 0) {
            vector< size_t > dummy;
            for(size_t i=0; i<phraseE.size(); i++)
              alignedToE.push_back( dummy );
            for(size_t i=0; i<phraseF.size(); i++)
              alignedToF.push_back( dummy );
            foreign = phraseTableF.storeIfNew( phraseF );
            english = phraseTableE.storeIfNew( phraseE );
          }
          alignedToE[e].push_back( f );
          alignedToF[f].push_back( e );
        }
      }
    }
  }
  return (item>2); // real phrase pair, not just foreign phrase
}
コード例 #3
0
void QuizFrame::editCurrentTerm() {
    if( controller->isQuizInProgress() ) {
        Folder* vocabTree = controller->getVocabTree();
        Term* term = controller->getCurrentTerm();
        if( !term ) {
            QMessageBox::warning( this, QObject::tr( "Information" ), tr( "DissociatedWord" ) );
            return;
        }

        Vocabulary* vocab = vocabTree->getVocabulary( term->getVocabId() );
        if( vocab == NULL || !vocab->isTermExists( term->getId() ) ) {
            QMessageBox::warning( this, QObject::tr( "Information" ), tr( "DissociatedWord" ) );
            return;
        }

        TermDialog dialog( *vocab, controller, this, *term );
        int result = dialog.exec();
        if( result ) { 
            QString firstLang( controller->getQuizFirstLanguage() );
            QString testLang( controller->getQuizTestLanguage() );
            Term newTerm = dialog.getTerm();
            Translation firstLangTrans = newTerm.getTranslation( firstLang );
            Translation testLangTrans = newTerm.getTranslation( testLang );
            term->addTranslation( firstLangTrans );
            term->addTranslation( testLangTrans );
            BilingualKey commentKey( firstLang, testLang );
            term->addComment( commentKey, newTerm.getComment( commentKey ) );
            term->setImagePath( newTerm.getImagePath() );
            vocab->setModificationDate( QDateTime::currentDateTime() );
            vocab->setDirty( true );
            setTerm( newTerm );
        }
    }
}
コード例 #4
0
void LexicalTable::load( const string &filePath )
{
  cerr << "Loading lexical translation table from " << filePath;
  ifstream inFile;
  inFile.open(filePath.c_str());
  if (inFile.fail()) {
    cerr << " - ERROR: could not open file\n";
    exit(1);
  }
  istream *inFileP = &inFile;

  string line;

  int i=0;
  while(getline(*inFileP, line)) {
    i++;
    if (i%100000 == 0) cerr << "." << flush;

    vector<string> token = tokenize( line.c_str() );
    if (token.size() != 3) {
      cerr << "line " << i << " in " << filePath << " has wrong number of tokens, skipping:\n" <<
           token.size() << " " << token[0] << " " << line << endl;
      continue;
    }

    double prob = atof( token[2].c_str() );
    WORD_ID wordE = vcbE.storeIfNew( token[0] );
    WORD_ID wordF = vcbF.storeIfNew( token[1] );
    ltable[ wordF ][ wordE ] = prob;
  }
  cerr << endl;
}
コード例 #5
0
void SearchDialog::editResultTerm() {
    ResultListItem* currItem = (ResultListItem*)resultsListView->currentItem();
    if( currItem ) {
        Term* term = currItem->getTerm();
        if( term ) {
            Vocabulary* vocab = controller->getVocabTree()->getVocabulary( term->getVocabId() );
            TermDialog dialog( *vocab, controller, this, *term );
#if defined(Q_WS_HILDON)
            dialog.showFullScreen();
#endif
            int result = dialog.exec();
            if( result ) { 
                Term newTerm = dialog.getTerm();
                term->addTranslation( newTerm.getTranslation( controller->getPreferences().getFirstLanguage() ) );
                term->addTranslation( newTerm.getTranslation( controller->getPreferences().getTestLanguage() ) );
                BilingualKey commentKey( controller->getPreferences().getFirstLanguage(), controller->getPreferences().getTestLanguage() );
                term->addComment( commentKey, newTerm.getComment( commentKey ) );
                term->setImagePath( newTerm.getImagePath() );
                currItem->updateUi();
                vocab->setModificationDate( QDateTime::currentDateTime() );
                vocab->setDirty( true );
            }
        }
    }
}
コード例 #6
0
ファイル: kparse.cpp プロジェクト: Cruel/scummvm
reg_t kSetSynonyms(EngineState *s, int argc, reg_t *argv) {
	SegManager *segMan = s->_segMan;
	reg_t object = argv[0];
	List *list;
	Node *node;
	int script;
	int numSynonyms = 0;
	Vocabulary *voc = g_sci->getVocabulary();

	// Only SCI0-SCI1 EGA games had a parser. In newer versions, this is a stub
	if (getSciVersion() > SCI_VERSION_1_EGA_ONLY)
		return s->r_acc;

	voc->clearSynonyms();

	list = s->_segMan->lookupList(readSelector(segMan, object, SELECTOR(elements)));
	node = s->_segMan->lookupNode(list->first);

	while (node) {
		reg_t objpos = node->value;
		int seg;

		script = readSelectorValue(segMan, objpos, SELECTOR(number));
		seg = s->_segMan->getScriptSegment(script);

		if (seg > 0)
			numSynonyms = s->_segMan->getScript(seg)->getSynonymsNr();

		if (numSynonyms) {
			const byte *synonyms = s->_segMan->getScript(seg)->getSynonyms();

			if (synonyms) {
				debugC(kDebugLevelParser, "Setting %d synonyms for script.%d",
				          numSynonyms, script);

				if (numSynonyms > 16384) {
					error("Segtable corruption: script.%03d has %d synonyms",
					         script, numSynonyms);
					/* We used to reset the corrupted value here. I really don't think it's appropriate.
					 * Lars */
				} else
					for (int i = 0; i < numSynonyms; i++) {
						synonym_t tmp;
						tmp.replaceant = READ_LE_UINT16(synonyms + i * 4);
						tmp.replacement = READ_LE_UINT16(synonyms + i * 4 + 2);
						voc->addSynonym(tmp);
					}
			} else
				warning("Synonyms of script.%03d were requested, but script is not available", script);

		}

		node = s->_segMan->lookupNode(node->succ);
	}

	debugC(kDebugLevelParser, "A total of %d synonyms are active now.", numSynonyms);

	return s->r_acc;
}
コード例 #7
0
ファイル: Vocabulary.cpp プロジェクト: alexanderpanchenko/stc
void test_simple_vocabulary(void) {
	Vocabulary* voc = new Vocabulary("/home/sasha/work/data/test/stopwords.csv");
	cout << voc->contains("the") << endl;
	cout << voc->contains(".") << endl;
	cout << voc->contains(")") << endl;
	cout << voc->contains(",") << endl;

	delete voc;
}
コード例 #8
0
// Check for equal non-terminal alignment in case of SCFG rules.
// Precondition: otherTargetToSourceAlignment has the same size as m_targetToSourceAlignments.begin()->first
bool ExtractionPhrasePair::MatchesAlignment( ALIGNMENT *otherTargetToSourceAlignment ) const
{
  if (!hierarchicalFlag) return true;

  // all or none of the phrasePair's word alignment matrices match, so just pick one
  const ALIGNMENT *thisTargetToSourceAlignment = m_targetToSourceAlignments.begin()->first;

  assert(m_phraseTarget->size() == thisTargetToSourceAlignment->size() + 1);
  assert(thisTargetToSourceAlignment->size() == otherTargetToSourceAlignment->size());

  // loop over all symbols but the left hand side of the rule
  for (size_t i=0; i<thisTargetToSourceAlignment->size()-1; ++i) {
    if (isNonTerminal( vcbT.getWord( m_phraseTarget->at(i) ) )) {
      size_t thisAlign  = *(thisTargetToSourceAlignment->at(i).begin());
      size_t otherAlign = *(otherTargetToSourceAlignment->at(i).begin());

      if (thisTargetToSourceAlignment->at(i).size() != 1 ||
          otherTargetToSourceAlignment->at(i).size() != 1 ||
          thisAlign != otherAlign) {
        return false;
      }
    }
  }

  return true;
}
コード例 #9
0
void SearchDialog::search() {
    const Preferences& prefs = controller->getPreferences();
    QList<TermKey> results = controller->search( queryField->currentText(), prefs.getFirstLanguage(), prefs.getTestLanguage() );
    resultsListView->clear();
    for( QList<TermKey>::ConstIterator it = results.begin(); it != results.end(); it++ ) {
        const TermKey& termKey = *it;
        Term* term = controller->getTerm( termKey );
        Vocabulary* vocab = controller->getVocabTree()->getVocabulary( termKey.getVocabId() );
        if( vocab ) {
            ResultListItem* resultItem = new ResultListItem( resultsListView, term, 
                prefs.getFirstLanguage(), prefs.getTestLanguage(), vocab->getTitle(), vocab->getParent()->getHumanReadablePath(),
                    prefs.isAltInTermListShown() );
            resultItem->setFont( 0, prefs.getMediumFont( prefs.getFirstLanguage() ) );
            resultItem->setFont( 1, prefs.getMediumFont( prefs.getTestLanguage() ) );
        }
    }
    resultsCounterLabel->setText( tr( "%1 term(s) found" ).arg( results.count() ) );
    updateUi();
}
コード例 #10
0
// read in a phrase pair and store it
void PhraseAlignment::create(const vector<string>& token, int lineID) {
	int item = 1;
	PHRASE phraseS, phraseT;
	for (size_t j=0; j<token.size(); ++j) {
		if (token[j] == "|||")
			item++;
		else if (item == 1) // source phrase
			phraseS.push_back( vcbS.storeIfNew( token[j] ) );
		else if (item == 2) // target phrase
			phraseT.push_back( vcbT.storeIfNew( token[j] ) );
		else if (item == 3) { // alignment
			int s = strtol(token[j].substr(0, token[j].find("-")).c_str(), NULL, 10);
			int t = strtol(token[j].substr(token[j].find("-") + 1).c_str(), NULL, 10);
			if (t >= phraseT.size() || s >= phraseS.size()) {
				cerr << "WARNING: phrase pair " << lineID 
						 << " has alignment point (" << s << ", " << t 
						 << ") out of bounds (" << phraseS.size() << ", " << phraseT.size() << ")\n";
			} else {
				// first alignment point? -> initialize
				if (alignedToT.size() == 0) {
          assert(alignedToS.size() == 0);
          size_t numTgtSymbols = (hierarchicalFlag ? phraseT.size()-1 : phraseT.size());
          alignedToT.resize(numTgtSymbols);
          size_t numSrcSymbols = (hierarchicalFlag ? phraseS.size()-1 : phraseS.size());
          alignedToS.resize(numSrcSymbols);
					source = phraseTableS.storeIfNew( phraseS );
					target = phraseTableT.storeIfNew( phraseT );
				}
				// add alignment point
				alignedToT[t].insert( s );
				alignedToS[s].insert( t );
			}
		} else if (item == 4) // count
			count = strtof(token[j].c_str(), NULL);
	}
	if (item == 3)
		count = 1.0;
	if (item < 3 || item > 4) {
		cerr << "ERROR: faulty line " << lineID << ": ";
		for(vector<string>::const_iterator i = token.begin(); i != token.end(); cerr << *(i++) << " ");
		cerr << endl;
	}
}
コード例 #11
0
    void print_topics(const int num_per, const Vocabulary<W>& vocab) {
      int topic_idx = 0;
      for(const std::vector<double> topic : prior_word_) {
	INFO << "Topic " << topic_idx;
	std::vector<size_t> sorted_topic = ferrum::sort_indices(topic, false);
	for(size_t item_idx = 0; item_idx < num_per; ++item_idx) {
	  size_t which = sorted_topic[item_idx];
	  INFO << "\t" << topic[which] << "\t" << vocab.word(which);
	}
	++topic_idx;
      }
    }
コード例 #12
0
ファイル: word2vec.cpp プロジェクト: yong-wang/word2vecPlus
int parse_sentence(const string& sentence, const Vocabulary& vocab, real subsample_thres, unsigned* p_seed, vector<uint64_t>* words) {
    istringstream iss(sentence);
    uint64_t total_cnt = vocab.total_cnt();
    int word_cnt = 0;
    string word;
    while (iss >> word) {
        uint64_t word_id;
        if (!vocab.find_word_id(word, &word_id)) {
            continue;
        }
        ++word_cnt;
        if (subsample_thres > 0) {
            double t = subsample_thres * total_cnt / vocab.get_word_cnt(word_id);
            double remain_prob = (sqrt(1 / t) + 1) * t; // not the same as the paper, which is sqrt(t)
            if (remain_prob < static_cast<real>(rand_r(p_seed)) / RAND_MAX) {
                continue;
            }
        }
        words->push_back(word_id);
    }
    return word_cnt;
}
コード例 #13
0
ファイル: FissionReactor.cpp プロジェクト: acmorrow/pion-core
void FissionReactor::updateVocabulary(const Vocabulary& v)
{
	// first update anything in the Reactor base class that might be needed
	ConfigWriteLock cfg_lock(*this);
	Reactor::updateVocabulary(v);

	v.refreshTerm(m_input_event_type);
	v.refreshTerm(m_input_event_term);

	boost::mutex::scoped_lock codec_lock(m_codec_mutex);
	if (m_codec_ptr)
		m_codec_ptr->updateVocabulary(v);
}
コード例 #14
0
ファイル: word2vec.cpp プロジェクト: yong-wang/word2vecPlus
void save_word_vec(ostream& os, const Net& net, const Vocabulary& vocab) {
    size_t sz = net.hidden_layer_size();
    const vector<Word>& words = vocab.vocab();
    os << words.size() << " " << sz << endl;
    for (size_t i = 0; i != words.size(); ++i) {
        os << words[i].word;
        const real* v = net.get_input_vec(i);
        for (size_t j = 0; j != sz; ++j) {
            os << " " << v[j];
        }
        os << endl;
    }
}
コード例 #15
0
	inline void MapBackToStr(const vector<WORD_ID>& wid, vector<WORD>& tok, Vocabulary& vocab, vector<size_t>& NT_index){
		tok.resize(wid.size());
		NT_index.clear();
		for(int i = 0; i< wid.size(); i++){
			if(!ShouldIgnore(wid[i],vocab)){
				tok[i] = vocab.getWord(wid[i]);
			}

			if(IsNT(wid[i],vocab)){
				NT_index.push_back(i);
			}
		}
	}
コード例 #16
0
ファイル: score.cpp プロジェクト: xwd/mosesGit-hiero
double computeUnalignedFWPenalty( const PHRASE &phraseS, const PHRASE &phraseT, PhraseAlignment *alignment )
{
  // unaligned word counter
  double unaligned = 1.0;
  // only checking target words - source words are caught when computing inverse
  for(int ti=0; ti<alignment->alignedToT.size(); ti++) {
    const set< size_t > & srcIndices = alignment->alignedToT[ ti ];
    if (srcIndices.empty() && functionWordList.find( vcbT.getWord( phraseT[ ti ] ) ) != functionWordList.end()) {
      unaligned *= 2.718;
    }
  }
  return unaligned;
}
コード例 #17
0
ファイル: dicinvert.cpp プロジェクト: aababilov/dictutils
void print_set(FILE *f_inv, Vocabulary &voc, Translations &trans,
		Translations &occured_words)
{
	int trans_no = 0;
	FOR_EACH (Translations, j, trans) {
		string_t translation = *j;
		if (occured_words.count(translation) > 0) {
			continue;
		}
		occured_words.insert(translation);
		if (trans_no > 0) {
			fprintf(f_inv, ", ");
		}
		fprintf(f_inv, "%s", translation.c_str());
		occured_words.insert(translation);
		TranslationMap::iterator ref = voc.translation_map().find(
				'\1' + translation);
		if (ref != voc.translation_map().end()) {
			fprintf(f_inv, ", ");
			print_set(f_inv, voc, ref->second, occured_words);
		}
		++trans_no;
	}
コード例 #18
0
ファイル: score.cpp プロジェクト: xwd/mosesGit-hiero
void printTargetPhrase(const PHRASE &phraseS, const PHRASE &phraseT,
                       const PhraseAlignment &bestAlignment, ostream &out)
{
  // output target symbols, except root, in rule table format
  for (int i = 0; i < phraseT.size()-1; ++i) {
    const std::string &word = vcbT.getWord(phraseT[i]);
    if (!stringToTreeFlag || !isNonTerminal(word)) {
      out << word << " ";
      continue;
    }
    // get corresponding source non-terminal and output pair
    std::set<size_t> alignmentPoints = bestAlignment.alignedToT[i];
    assert(alignmentPoints.size() == 1);
    int j = *(alignmentPoints.begin());
    if (inverseFlag) {
      out << word << vcbS.getWord(phraseS[j]) << " ";
    } else {
      out << vcbS.getWord(phraseS[j]) << word << " ";
    }
  }

  // output target root symbol
  out << vcbT.getWord(phraseT.back());
}
コード例 #19
0
void LexicalTable::load(char *fileName) {
  cerr << "Loading lexical translation table from " << fileName;
	Bz2LineReader inFile(fileName, Bz2LineReader::UNCOMPRESSED);
	
	int i = 0;
	for (string line = inFile.readLine(); !line.empty(); line = inFile.readLine()) {
		if (line.empty())
			break;
    if (++i%100000 == 0) cerr << "." << flush;
		
    vector<string> token = tokenize(line.c_str());
    if (token.size() != 3) {
      cerr << "line " << i << " “" << line << "” in " << fileName 
			     << " has wrong number of tokens (" << token.size() << "), skipping:\n"
			     << token.size() << " " << token[0] << " " << line << endl;
      continue;
    }
  
    WORD_ID wordT = vcbT.storeIfNew( token[0] );
    WORD_ID wordS = vcbS.storeIfNew( token[1] );
    ltable[ wordS ][ wordT ] = strtod(token[2].c_str(), NULL);
  }
  cerr << endl;
}
コード例 #20
0
int main(int argc, char * argv[])
{
    Options options = ProcessOptions(argc,argv);
    auto str_path_in = options.path_in.string();
    auto path_out=options.path_out;
    if (boost::filesystem::create_directory(path_out))
    {
        std::cerr << "creating target directory\n";
    }
    provenance = std::string();
    provenance += "vocab collected on " + get_str_time();
    provenance += "source corpus : " + str_path_in + "\n";

    std::cerr<<"assigning ids\n";
    vocab.read_from_dir(str_path_in);

    provenance = provenance + "words in corpus : "+ FormatHelper::ConvertToStr(vocab.cnt_words_processed)+"\n";
    provenance = provenance + "unique words : "+ FormatHelper::ConvertToStr(vocab.cnt_words)+"\n";
    
    vocab.reduce(options.min_frequency);
    provenance=provenance+"filtered with minimal frequency: "+FormatHelper::ConvertToStr(options.min_frequency)+"\n";
    provenance = provenance + "unique words : "+ FormatHelper::ConvertToStr(vocab.cnt_words)+"\n";

    std::cerr<<"creating list of frequencies\n";

    vocab.freq_per_id.resize(vocab.cnt_words);
    vocab.lst_id2word.resize(vocab.cnt_words);
    std::fill (vocab.freq_per_id.begin(),vocab.freq_per_id.end(),0);   
    std::cerr<<"populating frequencies\n";
    vocab.populate_frequency();
    vocab.reassign_ids(vocab.freq_per_id);
    vocab.populate_frequency();
    vocab.populate_ids();
    
    std::cerr<<"dumping ids and frequencies\n";

    vocab.dump_ids((path_out / boost::filesystem::path("ids")).string());
    vocab.dump_frequency((path_out / boost::filesystem::path("frequencies")).string());
    write_value_to_file((path_out / boost::filesystem::path("cnt_unique_words")).string(),vocab.cnt_words);
    write_value_to_file((path_out / boost::filesystem::path("cnt_words")).string(),vocab.cnt_words_processed);
    write_vector_to_file((path_out / boost::filesystem::path("freq_per_id")).string(),vocab.freq_per_id);
    write_value_to_file((path_out / boost::filesystem::path("provenance.txt")).string(),provenance);

    return 0;
}
コード例 #21
0
double computeLexicalTranslation( PHRASE &phraseS, PHRASE &phraseT, PhraseAlignment *alignment ) {
	// lexical translation probability
	double lexScore = 1.;
	int null = vcbS.getWordID("NULL");
	// all target words have to be explained
	for (size_t ti=0; ti<alignment->alignedToT.size(); ++ti) { 
    const set<size_t>& srcIndices = alignment->alignedToT[ti];
		if (srcIndices.empty())
		// explain unaligned word by NULL
			lexScore *= lexTable.permissiveLookup(null, phraseT[ti]); 
		else {
		 // go through all the aligned words to compute average
			double thisWordScore = 0.;
      for (set<size_t>::const_iterator p(srcIndices.begin()); p != srcIndices.end(); thisWordScore += lexTable.permissiveLookup(phraseS[*(p++)], phraseT[ti]));
			lexScore *= (thisWordScore / srcIndices.size());
		}
	}
	return lexScore;
}
コード例 #22
0
ファイル: Codec.cpp プロジェクト: acmorrow/pion-core
void Codec::setConfig(const Vocabulary& v, const xmlNodePtr config_ptr)
{
	PlatformPlugin::setConfig(v, config_ptr);
	
	// determine the type of event used by the codec
	std::string codec_event_str;
	if (! ConfigManager::getConfigOption(EVENT_ELEMENT_NAME, codec_event_str,
										 config_ptr))
		throw EmptyEventException(getId());

	// find the Term reference number for the event type
	Vocabulary::TermRef event_type = v.findTerm(codec_event_str);
	if (event_type == Vocabulary::UNDEFINED_TERM_REF)
		throw UnknownTermException(codec_event_str);
	m_event_term = v[event_type];

	// make sure that it is an object type Term
	if (m_event_term.term_type != Vocabulary::TYPE_OBJECT)
		throw NotAnObjectException(codec_event_str);
}
コード例 #23
0
// check if two word alignments between a phrase pairs "match"
// i.e. they do not differ in the alignment of non-termimals
bool PhraseAlignment::match( const PhraseAlignment& other )
{
	if (other.target != target || other.source != source) return false;
	if (!hierarchicalFlag) return true;

	PHRASE phraseT = phraseTableT.getPhrase( target );

  assert(phraseT.size() == alignedToT.size() + 1);
  assert(alignedToT.size() == other.alignedToT.size());

	// loop over all words (note: 0 = left hand side of rule)
	for(size_t i=0;i<phraseT.size()-1;++i)
		if (isNonTerminal( vcbT.getWord( phraseT[i] ) )) {
			if (alignedToT[i].size() != 1 ||
			    other.alignedToT[i].size() != 1 ||
		    	    *(alignedToT[i].begin()) != *(other.alignedToT[i].begin()))
				return false;
		}

	return true;
}
コード例 #24
0
void TransformReactor::setConfig(const Vocabulary& v, const xmlNodePtr config_ptr)
{
	// first set config options for the Reactor base class
	ConfigWriteLock cfg_lock(*this);
	Reactor::setConfig(v, config_ptr);

	// clear the current configuration
	m_transforms.clear();

	// Outgoing Event type -- i.e. what will the outgoing event be transformed into
	// Default (UNDEFINED_TERM_REF) -- make it the same as incoming event type
	// 	<OutgoingEvent>obj-term</OutgoingEvent>
	m_event_type = Vocabulary::UNDEFINED_TERM_REF;
	std::string event_type_str;
	if (ConfigManager::getConfigOption(OUTGOING_EVENT_ELEMENT_NAME, event_type_str, config_ptr))
	{
		if (!event_type_str.empty())
			m_event_type = v.findTerm(event_type_str);
	}

	// This really doesn't make much sense anymore -- you can wire the delivery of the original right through
	// it would make sense, if it was possible to deliver "if-not-changed" but TR2 always changes...
	// 	<DeliverOriginal>always|if-not-changed|never</DeliveryOriginal>		-> DEFAULT: never
	m_deliver_original = DO_NEVER;
	std::string deliver_original_str;
	if (ConfigManager::getConfigOption(DELIVER_ORIGINAL_NAME, deliver_original_str, config_ptr))
	{
		if (deliver_original_str == "true" || deliver_original_str == "always")
			m_deliver_original = DO_ALWAYS;
		else if (deliver_original_str == "if-not-changed")
			m_deliver_original = DO_SOMETIMES;
		// Could add code to throw if d_o_s is not "never"
	}

	// What fields/terms of the original event should be COPIED into the new event
	// <CopyOriginal>all-terms|if-not-defined|none</CopyOriginal>			-> DEFAULT: if-not-defined
	m_copy_original = COPY_UNCHANGED;
	std::string copy_original_str;
	if (ConfigManager::getConfigOption(COPY_ORIGINAL_ELEMENT_NAME, copy_original_str, config_ptr))
	{
		if (copy_original_str == "all-terms")
			m_copy_original = COPY_ALL;
		else if (copy_original_str == "none")
			m_copy_original = COPY_NONE;
		// Could add code to throw if c_o_s is not "if-not-defined"
	}

	// now, parse transformation rules
	// [rpt]	<Transformation>
	xmlNodePtr transformation_node = config_ptr;
	while ( (transformation_node = ConfigManager::findConfigNodeByName(TRANSFORMATION_ELEMENT_NAME, transformation_node)) != NULL)
	{
		// parse new Transformation rule

		// get the Term used for the Transformation rule
		//	<Term>src-term</Term>
		std::string term_id;
		if (! ConfigManager::getConfigOption(TERM_ELEMENT_NAME, term_id,
											 transformation_node->children))
			throw EmptyTermException(getId());

		// make sure that the Term is valid
		const Vocabulary::TermRef term_ref = v.findTerm(term_id);
		if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
			throw UnknownTermException(getId());

		// get the Type of transformation
		//	<Type>AssignValue|AssignTerm|Lookup|Rules</Type>
		std::string type_str;
		if (! ConfigManager::getConfigOption(TYPE_ELEMENT_NAME, type_str,
											 transformation_node->children))
			throw EmptyTypeException(getId());	// TODO: Improve the error message

		// Add the transformation
		const bool debug_mode = getReactionEngine().getDebugMode();
		Transform *new_transform;
		if (type_str == "AssignValue")
			new_transform = new TransformAssignValue(v, v[term_ref], transformation_node->children, debug_mode);
		else if (type_str == "AssignTerm")
			new_transform = new TransformAssignTerm(v, v[term_ref], transformation_node->children, debug_mode);
		else if (type_str == "Lookup")
			new_transform = new TransformLookup(v, v[term_ref], transformation_node->children, debug_mode);
		else if (type_str == "Rules")
			new_transform = new TransformRules(v, v[term_ref], transformation_node->children, debug_mode);
		else if (type_str == "Regex")
			new_transform = new TransformRegex(v, v[term_ref], transformation_node->children, debug_mode);
		else if (type_str == "SplitTerm")
			new_transform = new TransformSplitTerm(v, v[term_ref], transformation_node->children, debug_mode);
		else if (type_str == "JoinTerm")
			new_transform = new TransformJoinTerm(v, v[term_ref], transformation_node->children, debug_mode);
		else if (type_str == "URLEncode")
			new_transform = new TransformURLEncode(v, v[term_ref], transformation_node->children, debug_mode);
		else if (type_str == "URLDecode")
			new_transform = new TransformURLDecode(v, v[term_ref], transformation_node->children, debug_mode);
		else
			throw InvalidTransformation(type_str);

		m_transforms.push_back(new_transform);

		// step to the next Comparison rule
		transformation_node = transformation_node->next;
	}
}
コード例 #25
0
void processPhrasePairs( vector< PhraseAlignment > &phrasePair )
{
  if (phrasePair.size() == 0) return;
  map<int, int> countE;
  map<int, int> alignmentE;
  int totalCount = 0;
  int currentCount = 0;
  int maxSameCount = 0;
  int maxSame = -1;
  int old = -1;
  for(size_t i=0; i<phrasePair.size(); i++) {
    if (i>0) {
      if (phrasePair[old].english == phrasePair[i].english) {
        if (! phrasePair[i].equals( phrasePair[old] )) {
          if (currentCount > maxSameCount) {
            maxSameCount = currentCount;
            maxSame = i-1;
          }
          currentCount = 0;
        }
      } else {
        // wrap up old E
        if (currentCount > maxSameCount) {
          maxSameCount = currentCount;
          maxSame = i-1;
        }

        alignmentE[ phrasePair[old].english ] = maxSame;
        //	if (maxSameCount != totalCount)
        //  cout << "max count is " << maxSameCount << "/" << totalCount << endl;

        // get ready for new E
        totalCount = 0;
        currentCount = 0;
        maxSameCount = 0;
        maxSame = -1;
      }
    }
    countE[ phrasePair[i].english ]++;
    old = i;
    currentCount++;
    totalCount++;
  }

  // wrap up old E
  if (currentCount > maxSameCount) {
    maxSameCount = currentCount;
    maxSame = phrasePair.size()-1;
  }
  alignmentE[ phrasePair[old].english ] = maxSame;
  //  if (maxSameCount != totalCount)
  //    cout << "max count is " << maxSameCount << "/" << totalCount << endl;

  // output table
  typedef map< int, int >::iterator II;
  PHRASE phraseF = phraseTableF.getPhrase( phrasePair[0].foreign );
  size_t index = 0;
  for(II i = countE.begin(); i != countE.end(); i++) {
    //cout << "\tp( " << i->first << " | " << phrasePair[0].foreign << " ; " << phraseF.size() << " ) = ...\n";
    //cerr << index << endl;

    // foreign phrase (unless inverse)
    if (! inverseFlag) {
      for(size_t j=0; j<phraseF.size(); j++) {
        phraseTableFile << vcbF.getWord( phraseF[j] );
        phraseTableFile << " ";
      }
      phraseTableFile << "||| ";
    }

    // english phrase
    PHRASE phraseE = phraseTableE.getPhrase( i->first );
    for(size_t j=0; j<phraseE.size(); j++) {
      phraseTableFile << vcbE.getWord( phraseE[j] );
      phraseTableFile << " ";
    }
    phraseTableFile << "||| ";

    // foreign phrase (if inverse)
    if (inverseFlag) {
      for(size_t j=0; j<phraseF.size(); j++) {
        phraseTableFile << vcbF.getWord( phraseF[j] );
        phraseTableFile << " ";
      }
      phraseTableFile << "||| ";
    }

    // phrase pair frequency
    phraseTableFile << i->second;

    //source phrase pair frequency
    phraseTableFile << " " << phrasePair.size();

    // source phrase length
    phraseTableFile	<< " " << phraseF.size();

    // target phrase length
    phraseTableFile	<< " " << phraseE.size();

    phraseTableFile << endl;

    index += i->second;
  }
}
コード例 #26
0
ファイル: main.cpp プロジェクト: LiliMeng/FLANNBOWSearch
int main()
{
    int imgNum=300;

    vector<Mat>  imgVec;
    imgVec.resize(imgNum);

    vector<string> nameVec;
    nameVec.resize(imgNum);

    vector<vector<KeyPoint> > keyPointsVec;
    keyPointsVec.resize(imgNum);

    vector<Mat> descriptorsVec;
    descriptorsVec.resize(imgNum);

    for(int i=0; i<imgNum; i++)
    {
        char fileName[1024] ={NULL};

        sprintf(fileName, "/home/lili/workspace/SLAM/vocabTree/Lip6IndoorDataSet/Images/lip6kennedy_bigdoubleloop_%06d.ppm", i);

        nameVec[i]=string(fileName);

        imgVec[i]=imread(nameVec[i], CV_LOAD_IMAGE_GRAYSCALE);
    }

    //-- Step 1: Detect the keypoints using SURF Detector
    int minHessian = 400;

    SurfFeatureDetector detector(minHessian);

    SurfDescriptorExtractor extractor;

    vector<unsigned int> labels;
    for(int i=0; i<imgNum; i++)
    {
        detector.detect(imgVec[i], keyPointsVec[i]);

        extractor.compute(imgVec[i], keyPointsVec[i], descriptorsVec[i]);
        for(int j = 0; j<descriptorsVec[i].rows; j++)
        {
            labels.push_back(i);
        }
    }

    Mat all_descriptors;

    for(int i = 0; i<descriptorsVec.size(); i++)
    {
        all_descriptors.push_back(descriptorsVec[i]);
    }

    assert(labels.size() == all_descriptors.rows);
    cout<<"all_descriptors.rows "<<all_descriptors.rows<<endl;
    cout<<"hahha1 "<<endl;
    Vocabulary vocab;

    vocab.indexedDescriptors_ = all_descriptors;


    vector<KeyPoint> newKeypoints;
    Mat newDescriptors;

    ///add new image to the randomized kd tree
    {
        string newImageName="/home/lili/workspace/SLAM/vocabTree/Lip6IndoorDataSet/Images/lip6kennedy_bigdoubleloop_000350.ppm";
        Mat newImg=imread(newImageName, CV_LOAD_IMAGE_GRAYSCALE);
        detector.detect(newImg, newKeypoints);
        extractor.compute(newImg, newKeypoints, newDescriptors);
        cout<<"newDescriptors.rows: "<<newDescriptors.rows<<endl;
    }

    vocab.notIndexedDescriptors_ = newDescriptors;

    ///clustering
    int clustersNum;
    Mat clusters(15000,64,CV_32F);
    //Mat float_all_descriptors;


    clustersNum=vocab.clustering(all_descriptors, clusters);
    cout<<"clustersNum  "<<clustersNum<<endl;

    ///flann build tree
    clock_t begin1 = clock();
    vocab.update();
    clock_t end1 = clock();
    double buildTree_time = double(end1 - begin1) / CLOCKS_PER_SEC;
    cout.precision(5);
    cout<<"buildTree time "<<buildTree_time<<endl;


    cout<<"hahha2 "<<endl;
    vector<KeyPoint> queryKeypoints;
    Mat queryDescriptors;

    ///QueryImage
    {
        string queryImageName="/home/lili/workspace/SLAM/vocabTree/Lip6IndoorDataSet/Images/lip6kennedy_bigdoubleloop_000381.ppm";
        Mat queryImg=imread(queryImageName, CV_LOAD_IMAGE_GRAYSCALE);
        detector.detect(queryImg, queryKeypoints);
        extractor.compute(queryImg, queryKeypoints, queryDescriptors);
        cout<<"queryDescriptors.rows: "<<queryDescriptors.rows<<endl;
    }


    Mat indices;
    Mat dists;
    int k=2;

    clock_t begin2 = clock();
    vocab.search(queryDescriptors, indices, dists, k);
    clock_t end2 = clock();
    double query_time = double(end2 - begin2) / CLOCKS_PER_SEC;
    cout.precision(5);
    cout<<"query time "<<query_time<<endl;

    std::vector<int> indicesVec(indices.rows*indices.cols);

    if (indices.isContinuous())
    {
        indicesVec.assign((int*)indices.datastart, (int*)indices.dataend);
    }

    cout<<"indicesVec.size() "<<indicesVec.size()<<endl;

    /// Process Nearest Neighbor Distance Ratio
    float nndRatio = 0.8;

    for(int i=0; i<indicesVec.size(); i++)
    {
        if(dists.at<float>(i,0)<nndRatio*dists.at<float>(i,1))
        {
            cout<<"indicesVec["<<i<<"] "<<indicesVec[i]<<"  image labels "<<labels[indicesVec[i]]<<endl;
        }
    }


    return 0;
}
コード例 #27
0
void SearchDialog::doRemoveTerms( bool allowSelectTrans /* = true */, bool confirmBeforeRemove /* = true */ ) {
    int selectedItemCount = 0;
    // Find all the translation languages of the selected terms.
    QStringList translationLanguages;

    for( int i = 0; i < resultsListView->topLevelItemCount(); i++ ) {
        ResultListItem* termItem = (ResultListItem*)resultsListView->topLevelItem( i );
        if( termItem->isSelected() ) {
            selectedItemCount++;
            Term* term = termItem->getTerm();
            for( Term::TranslationMap::ConstIterator it = term->translationsBegin(); it != term->translationsEnd(); it++ ) {
                const Translation& trans = *it;
                if( !translationLanguages.contains( trans.getLanguage() ) )
                    translationLanguages.append( trans.getLanguage() );
            }
        }
    }

    if( selectedItemCount == 0 )
        return;

    if( translationLanguages.count() <= 2 ) {
        int response;

        if( confirmBeforeRemove ) {
            QMessageBox msgBox( QObject::tr( "Warning" ), tr( "ConfirmRemoveSelectedTerms" ),
                QMessageBox::Warning,
                QMessageBox::Yes,
                QMessageBox::No | QMessageBox::Default | QMessageBox::Escape,
                QMessageBox::NoButton,
                this );
            msgBox.setButtonText( QMessageBox::Yes, tr( "Yes" ) );
            msgBox.setButtonText( QMessageBox::No, tr( "No" ) );

            response = msgBox.exec();
        }
        else 
            response = QMessageBox::Yes;

        if( response == QMessageBox::Yes ) {
            for( int i = 0; i < resultsListView->topLevelItemCount(); i++ ) {
                ResultListItem* termItem = (ResultListItem*)resultsListView->topLevelItem( i );
                if( termItem->isSelected() ) {
                    Term* term = termItem->getTerm();
                    Vocabulary* vocab = controller->getVocabTree()->getVocabulary( term->getVocabId() );
                    if( !term->getImagePath().isNull() ) {
                        QDir imagePath( term->getImagePath() );
                        if( imagePath.isRelative() ) {
                            const QString& imagePath = controller->getApplicationDirName() + "/" + vocab->getParent()->getPath() +
                                "/v-" + QString::number( vocab->getId() ) + "/" + term->getImagePath();
                            QFile imageFile( imagePath );
                            if( imageFile.exists() ) {
                                if( !imageFile.remove() )
                                    cerr << "Could not remove image " << qPrintable( imagePath ) << endl;
                            }
                        }
                    }
                    vocab->removeTerm( term->getId() );
                    delete( termItem );
                    vocab->setModificationDate( QDateTime::currentDateTime() );
                    vocab->setDirty( true );
                }
            }

            resultsListView->clearSelection();
            updateUi();
            emit termsRemoved();
        }
    }
    else {
        int response;
        QStringList selectedLanguages;
        if( allowSelectTrans ) {
            TranslationSelectionDialog msgBox( tr( "MultipleTranslationsDetectedForRemoveTermsCaption" ), tr( "MultipleTranslationsDetectedForRemoveTerms" ), 
                translationLanguages, TranslationSelectionDialog::selectionModeTargetLanguage, controller, this );
            msgBox.setMaximumHeight( size().height() - 40 );
            msgBox.setMaximumWidth( size().width() - 40 );
            response = msgBox.exec();
            if( response )
                selectedLanguages = msgBox.getSelectedLanguages();
        }
        else {
            selectedLanguages = QStringList();
            selectedLanguages.append( controller->getPreferences().getFirstLanguage() );
            selectedLanguages.append( controller->getPreferences().getTestLanguage() );
        }
        if( selectedLanguages.count() == 0 )
            return;

        for( int i = 0; i < resultsListView->topLevelItemCount(); i++ ) {
            ResultListItem* termItem = (ResultListItem*)resultsListView->topLevelItem( i );

            if( termItem->isSelected() ) {
                Term* term = termItem->getTerm();
                Vocabulary* vocab = controller->getVocabTree()->getVocabulary( term->getVocabId() );

                for( QStringList::ConstIterator it = selectedLanguages.begin(); it != selectedLanguages.end(); it++ ) {
                    QString lang = *it;
                    term->removeTranslation( lang );
                }
                
                if( term->getTranslationCount() == 0 ) {
                    if( !term->getImagePath().isNull() ) {
                        QDir imagePath( term->getImagePath() );
                        if( imagePath.isRelative() ) {
                            const QString& imagePath = controller->getApplicationDirName() + "/" + vocab->getParent()->getPath() +
                                "/v-" + QString::number( vocab->getId() ) + "/" + term->getImagePath();
                            QFile imageFile( imagePath );
                            if( imageFile.exists() ) {
                                if( !imageFile.remove() )
                                    cerr << "Could not remove image " << qPrintable( imagePath ) << endl;
                            }
                        }
                    }
                    vocab->removeTerm( term->getId() );
                    delete( termItem );
                    vocab->setModificationDate( QDateTime::currentDateTime() );
                    vocab->setDirty( true );
                }
            }
        }

        resultsListView->clearSelection();
        updateUi();
    }
}
コード例 #28
0
ファイル: kparse.cpp プロジェクト: Cruel/scummvm
reg_t kParse(EngineState *s, int argc, reg_t *argv) {
	SegManager *segMan = s->_segMan;
	reg_t stringpos = argv[0];
	Common::String string = s->_segMan->getString(stringpos);
	char *error;
	reg_t event = argv[1];
	g_sci->checkVocabularySwitch();
	Vocabulary *voc = g_sci->getVocabulary();
	voc->parser_event = event;
	reg_t params[2] = { s->_segMan->getParserPtr(), stringpos };

	ResultWordListList words;
	bool res = voc->tokenizeString(words, string.c_str(), &error);
	voc->parserIsValid = false; /* not valid */

	if (res && !words.empty()) {
		voc->synonymizeTokens(words);

		s->r_acc = make_reg(0, 1);

#ifdef DEBUG_PARSER
		debugC(kDebugLevelParser, "Parsed to the following blocks:");

		for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) {
			debugCN(2, kDebugLevelParser, "   ");
			for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) {
				debugCN(2, kDebugLevelParser, "%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group);
			}
			debugCN(2, kDebugLevelParser, "\n");
		}
#endif

		voc->replacePronouns(words);

		int syntax_fail = voc->parseGNF(words);

		if (syntax_fail) {
			s->r_acc = make_reg(0, 1);
			writeSelectorValue(segMan, event, SELECTOR(claimed), 1);

			invokeSelector(s, g_sci->getGameObject(), SELECTOR(syntaxFail), argc, argv, 2, params);
			/* Issue warning */

			debugC(kDebugLevelParser, "Tree building failed");

		} else {
			voc->parserIsValid = true;
			voc->storePronounReference();
			writeSelectorValue(segMan, event, SELECTOR(claimed), 0);

#ifdef DEBUG_PARSER
			voc->dumpParseTree();
#endif
		}

	} else {

		s->r_acc = make_reg(0, 0);
		writeSelectorValue(segMan, event, SELECTOR(claimed), 1);

		if (error) {
			s->_segMan->strcpy(s->_segMan->getParserPtr(), error);
			debugC(kDebugLevelParser, "Word unknown: %s", error);
			/* Issue warning: */

			invokeSelector(s, g_sci->getGameObject(), SELECTOR(wordFail), argc, argv, 2, params);
			free(error);
			return make_reg(0, 1); /* Tell them that it didn't work */
		}
	}

	return s->r_acc;
}
コード例 #29
0
ファイル: FissionReactor.cpp プロジェクト: acmorrow/pion-core
void FissionReactor::setConfig(const Vocabulary& v, const xmlNodePtr config_ptr)
{
	// first set config options for the Reactor base class
	ConfigWriteLock cfg_lock(*this);
	Reactor::setConfig(v, config_ptr);

	// get the input event type
	std::string config_str;
	if (! ConfigManager::getConfigOption(INPUT_EVENT_TYPE_ELEMENT_NAME, config_str, config_ptr))
		throw EmptyInputEventTypeException(getId());

	// find vocabulary term for input event type
	Vocabulary::TermRef term_ref = v.findTerm(config_str);
	if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
		throw UnknownTermException(config_str);
	m_input_event_type = v[term_ref];

	// make sure that term is object/event type
	if (m_input_event_type.term_type != Vocabulary::TYPE_OBJECT)
		throw NotAnObjectException(config_str);

	// get the input event term
	if (! ConfigManager::getConfigOption(INPUT_EVENT_TERM_ELEMENT_NAME, config_str, config_ptr))
		throw EmptyInputEventTermException(getId());

	// find vocabulary term for input event term
	term_ref = v.findTerm(config_str);
	if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
		throw UnknownTermException(config_str);
	m_input_event_term = v[term_ref];

	// only string types are currently supported for input event term
	switch (m_input_event_term.term_type) {
	case Vocabulary::TYPE_NULL:
	case Vocabulary::TYPE_OBJECT:
	case Vocabulary::TYPE_INT8:
	case Vocabulary::TYPE_INT16:
	case Vocabulary::TYPE_INT32:
	case Vocabulary::TYPE_UINT8:
	case Vocabulary::TYPE_UINT16:
	case Vocabulary::TYPE_UINT32:
	case Vocabulary::TYPE_INT64:
	case Vocabulary::TYPE_UINT64:
	case Vocabulary::TYPE_FLOAT:
	case Vocabulary::TYPE_DOUBLE:
	case Vocabulary::TYPE_LONG_DOUBLE:
	case Vocabulary::TYPE_DATE_TIME:
	case Vocabulary::TYPE_DATE:
	case Vocabulary::TYPE_TIME:
		throw TermNotStringException(config_str);
		break;
	case Vocabulary::TYPE_SHORT_STRING:
	case Vocabulary::TYPE_STRING:
	case Vocabulary::TYPE_LONG_STRING:
	case Vocabulary::TYPE_CHAR:
	case Vocabulary::TYPE_BLOB:
	case Vocabulary::TYPE_ZBLOB:
		break;	// these are all OK
	}

	// get the codec to use
	boost::mutex::scoped_lock codec_lock(m_codec_mutex);
	if (! ConfigManager::getConfigOption(CODEC_ELEMENT_NAME, m_codec_id, config_ptr))
		throw EmptyCodecException(getId());
	m_codec_ptr = getCodecFactory().getCodec(m_codec_id);	
	PION_ASSERT(m_codec_ptr);
	codec_lock.unlock();

	// check if we should copy all terms from original event
	m_copy_all_terms = false;
	std::string copy_all_terms_str;
	if (ConfigManager::getConfigOption(COPY_ALL_TERMS_ELEMENT_NAME,
									   copy_all_terms_str, config_ptr))
	{
		if (copy_all_terms_str == "true")
			m_copy_all_terms = true;
	}

	// get list of terms to copy from original event
	m_copy_terms.clear();
	xmlNodePtr copy_term_node = config_ptr;
	while ((copy_term_node = ConfigManager::findConfigNodeByName(COPY_TERM_ELEMENT_NAME, copy_term_node)) != NULL) {
		xmlChar *xml_char_ptr = xmlNodeGetContent(copy_term_node);
		if (xml_char_ptr != NULL) {
			const std::string copy_term_str(reinterpret_cast<char*>(xml_char_ptr));
			xmlFree(xml_char_ptr);
			if (! copy_term_str.empty()) {
				// find the term in the Vocabulary
				term_ref = v.findTerm(copy_term_str);
				if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
					throw UnknownTermException(copy_term_str);

				// add it to the copy terms collection
				m_copy_terms.push_back(v[term_ref]);
			}
		}

		// step to the next copy term
		copy_term_node = copy_term_node->next;
	}
}
コード例 #30
0
ファイル: score.cpp プロジェクト: xwd/mosesGit-hiero
void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCount, int distinctCount, ostream &phraseTableFile )
{
  if (phrasePair.size() == 0) return;

  PhraseAlignment *bestAlignment = findBestAlignment( phrasePair );
    
  // compute count
  float count = 0;
  for(size_t i=0; i<phrasePair.size(); i++) {
    count += phrasePair[i]->count;
  }

  // collect count of count statistics
  if (goodTuringFlag || kneserNeyFlag) {
    totalDistinct++;
    int countInt = count + 0.99999;
    if(countInt <= COC_MAX)
      countOfCounts[ countInt ]++;
  }

  // compute PCFG score
  float pcfgScore;
  if (pcfgFlag && !inverseFlag) {
    float pcfgSum = 0;
    for(size_t i=0; i<phrasePair.size(); ++i) {
        pcfgSum += phrasePair[i]->pcfgSum;
    }
    pcfgScore = pcfgSum / count;
  }

  // output phrases
  const PHRASE &phraseS = phrasePair[0]->GetSource();
  const PHRASE &phraseT = phrasePair[0]->GetTarget();

  // do not output if hierarchical and count below threshold
  if (hierarchicalFlag && count < minCountHierarchical) {
    for(int j=0; j<phraseS.size()-1; j++) {
      if (isNonTerminal(vcbS.getWord( phraseS[j] )))
        return;
    }
  }

  // source phrase (unless inverse)
  if (! inverseFlag) {
    printSourcePhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
    phraseTableFile << " ||| ";
  }

  // target phrase
  printTargetPhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
  phraseTableFile << " ||| ";

  // source phrase (if inverse)
  if (inverseFlag) {
    printSourcePhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
    phraseTableFile << " ||| ";
  }

  // lexical translation probability
  if (lexFlag) {
    double lexScore = computeLexicalTranslation( phraseS, phraseT, bestAlignment);
    phraseTableFile << ( logProbFlag ? negLogProb*log(lexScore) : lexScore );
  }

  // unaligned word penalty
  if (unalignedFlag) {
    double penalty = computeUnalignedPenalty( phraseS, phraseT, bestAlignment);
    phraseTableFile << " " << ( logProbFlag ? negLogProb*log(penalty) : penalty );
  }

  // unaligned function word penalty
  if (unalignedFWFlag) {
    double penalty = computeUnalignedFWPenalty( phraseS, phraseT, bestAlignment);
    phraseTableFile << " " << ( logProbFlag ? negLogProb*log(penalty) : penalty );
  }

  if (pcfgFlag && !inverseFlag) {
    // target-side PCFG score
    phraseTableFile << " " << pcfgScore;
  }

  phraseTableFile << " ||| ";

  // alignment info for non-terminals
  if (! inverseFlag) {
    if (hierarchicalFlag) {
      // always output alignment if hiero style, but only for non-terms
      assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
      for(int j = 0; j < phraseT.size() - 1; j++) {
        if (isNonTerminal(vcbT.getWord( phraseT[j] ))) {
          if (bestAlignment->alignedToT[ j ].size() != 1) {
            cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << endl;
            phraseTableFile.flush();
            assert(bestAlignment->alignedToT[ j ].size() == 1);
          }
          int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
          phraseTableFile << sourcePos << "-" << j << " ";
        }
      }
    } else if (wordAlignmentFlag) {
      // alignment info in pb model
      for(int j=0; j<bestAlignment->alignedToT.size(); j++) {
        const set< size_t > &aligned = bestAlignment->alignedToT[j];
        for (set< size_t >::const_iterator p(aligned.begin()); p != aligned.end(); ++p) {
          phraseTableFile << *p << "-" << j << " ";
        }
      }
    }
  }

  // counts
  
  phraseTableFile << " ||| " << totalCount << " " << count;
  if (kneserNeyFlag) 
    phraseTableFile << " " << distinctCount;
  
  // nt lengths  
  if (outputNTLengths)
  {
    phraseTableFile << " ||| ";

    if (!inverseFlag)
    {
      map<size_t, map<size_t, float> > sourceProb, targetProb;
      // 1st sourcePos, 2nd = length, 3rd = prob

      calcNTLengthProb(phrasePair, sourceProb, targetProb);
      
      outputNTLengthProbs(phraseTableFile, sourceProb, "S");
      outputNTLengthProbs(phraseTableFile, targetProb, "T");
    }    
  }
  
  phraseTableFile << endl;
}