void model1::em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Word2Vec& word2vec, bool useWord2Vec, Perplexity& viterbi_perp, bool test) { WordIndex i, j, l, m ; double cross_entropy; int pair_no=0 ; perp.clear(); viterbi_perp.clear(); ofstream of2; // for each sentence pair in the corpus if (dump_alignment||FEWDUMPS) of2.open(alignfile); PROB uniform = 1.0/noFrenchWords ; sentPair sent ; sHandler1.rewind(); while(sHandler1.getNextSentence(sent)){ Vector<WordIndex>& es = sent.eSent; Vector<WordIndex>& fs = sent.fSent; const float so = sent.getCount(); l = es.size() - 1; m = fs.size() - 1; cross_entropy = log(1.0); Vector<WordIndex> viterbi_alignment(fs.size()); double viterbi_score = 1 ; bool eindict[l + 1]; bool findict[m + 1]; bool indict[m + 1][l + 1]; bool isSimilar[m + 1][l + 1]; if(useWord2Vec && word2vec.Method == 1){ for(unsigned int dummy = 0; dummy <= m; dummy++){ for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++) isSimilar[dummy][dummy2] = false; } for(i = 1; i <= l; i++){ map<WordIndex, bool> simWords = word2vec.getVectorMap(es[i]); for(j = 1; j <= m; j++){ if(simWords.find(fs[j]) != simWords.end()){ isSimilar[j][i] = true; } } } } // cout << sent.sentenceNo << endl; if(it == 1 && useDict){ for(unsigned int dummy = 0; dummy <= l; dummy++) { eindict[dummy] = false; } for(unsigned int dummy = 0; dummy <= m; dummy++){ findict[dummy] = false; for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++) indict[dummy][dummy2] = false; } for(j = 0; j <= m; j++) for(i = 0; i <= l; i++) if(dict.indict(fs[j], es[i])){ eindict[i] = findict[j] = indict[j][i] = true; } } for(j=1; j <= m; j++){ // entries that map fs to all possible ei in this sentence. Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table LpPair<COUNT,PROB> **sPtrCachePtr; PROB denom = 0.0; WordIndex best_i = 0 ; // i for which fj is best maped to ei PROB word_best_score = 0 ; // score for the best mapping of fj if (it == 1 && !seedModel1){ denom = uniform * es.size(); word_best_score = uniform ; } else for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){ PROB e(0.0) ; (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ; if(i && useWord2Vec && !isSimilar[j][i] && word2vec.Method == 1) continue; if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH) e = (*((*sPtrCachePtr))).prob; else e = PROB_SMOOTH ; denom += e ; if (e > word_best_score){ word_best_score = e ; best_i = i ; } } viterbi_alignment[j] = best_i ; viterbi_score *= word_best_score ; /// denom ; if (denom == 0){ if (test) cerr << "WARNING: denom is zero (TEST)\n"; else cerr << "WARNING: denom is zero (TRAIN)\n"; } cross_entropy += log(denom) ; if (!test){ if(denom > 0){ COUNT val = COUNT(so) / (COUNT) double(denom) ; /* this if loop implements a constraint on counting: count(es[i], fs[j]) is implemented if and only if es[i] and fs[j] occur together in the dictionary, OR es[i] does not occur in the dictionary with any fs[x] and fs[j] does not occur in the dictionary with any es[y] */ if(it == 1 && useDict){ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){ if(i && useWord2Vec && !isSimilar[j][i] && word2vec.Method == 1) continue; if(indict[j][i] || (!findict[j] && !eindict[i])){ PROB e(0.0) ; if (it == 1 && !seedModel1) e = uniform ; else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH) e = (*((*sPtrCachePtr))).prob; else e = PROB_SMOOTH ; COUNT x=e*val; if( it==1||x>MINCOUNTINCREASE ) if ((*sPtrCachePtr) != 0) (*((*sPtrCachePtr))).count += x; else tTable.incCount(es[i], fs[j], x); } /* end of if */ } /* end of for i */ } /* end of it == 1 */ // Old code: else{ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){ if(i && useWord2Vec && !isSimilar[j][i] && word2vec.Method == 1) continue; PROB e(0.0) ; if (it == 1 && !seedModel1){ e = uniform ; } else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH) e = (*((*sPtrCachePtr))).prob; else e = PROB_SMOOTH ; //if( !(i==0) ) //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl; // if (useWord2Vec && word2vec.Method == 2) // e = e * (1. - word2vec.L) + word2vec.getW2VProb(es[i], fs[j]) * word2vec.L; COUNT x=e*val; if( pair_no==VerboseSentence ) cout << i << "(" << evlist[es[i]].word << ")," << j << "(" << fvlist[fs[j]].word << ")=" << x << endl; if( it==1||x>MINCOUNTINCREASE ) if( NoEmptyWord==0 || i!=0 ) if ((*sPtrCachePtr) != 0) (*((*sPtrCachePtr))).count += x; else tTable.incCount(es[i], fs[j], x); } /* end of for i */ } // end of else } // end of if (denom > 0) }// if (!test) } // end of for (j) ; sHandler1.setProbOfSentence(sent,cross_entropy); //cerr << sent << "CE: " << cross_entropy << " " << so << endl; perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1); viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1); if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000)) printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score); addAL(viterbi_alignment,sent.sentenceNo,l); pair_no++; } /* of while */ sHandler1.rewind(); perp.record("Model1"); viterbi_perp.record("Model1"); errorReportAL(cout, "IBM-1"); }
void model2::em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp, bool test) { massert( aTable.is_distortion==0 ); massert( aCountTable.is_distortion==0 ); WordIndex i, j, l, m ; double cross_entropy; int pair_no=0 ; perp.clear(); viterbi_perp.clear(); ofstream of2; // for each sentence pair in the corpus if (dump_alignment||FEWDUMPS ) of2.open(alignfile); sentPair sent ; vector<double> ferts(evlist.size()); sHandler1.rewind(); while(sHandler1.getNextSentence(sent)){ Vector<WordIndex>& es = sent.eSent; Vector<WordIndex>& fs = sent.fSent; const float so = sent.getCount(); l = es.size() - 1; m = fs.size() - 1; cross_entropy = log(1.0); Vector<WordIndex> viterbi_alignment(fs.size()); double viterbi_score = 1; for(j=1; j <= m; j++){ Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table // entries that map fs to all possible ei in this sentence. PROB denom = 0.0; PROB e = 0.0, word_best_score = 0; WordIndex best_i = 0 ; // i for which fj is best maped to ei for(i=0; i <= l; i++){ sPtrCache[i] = tTable.getPtr(es[i], fs[j]) ; if (sPtrCache[i] != 0 &&(*(sPtrCache[i])).prob > PROB_SMOOTH ) e = (*(sPtrCache[i])).prob * aTable.getValue(i,j, l, m) ; else e = PROB_SMOOTH * aTable.getValue(i,j, l, m); denom += e ; if (e > word_best_score){ word_best_score = e ; best_i = i ; } } viterbi_alignment[j] = best_i ; viterbi_score *= word_best_score; ///denom ; cross_entropy += log(denom) ; if (denom == 0){ if (test) cerr << "WARNING: denom is zero (TEST)\n"; else cerr << "WARNING: denom is zero (TRAIN)\n"; } if (!test){ if(denom > 0){ COUNT val = COUNT(so) / (COUNT) double(denom) ; for( i=0; i <= l; i++){ PROB e(0.0); if (sPtrCache[i] != 0 && (*(sPtrCache[i])).prob > PROB_SMOOTH) e = (*(sPtrCache[i])).prob ; else e = PROB_SMOOTH ; e *= aTable.getValue(i,j, l, m); COUNT temp = COUNT(e) * val ; if( NoEmptyWord==0 || i!=0 ) if (sPtrCache[i] != 0) (*(sPtrCache[i])).count += temp ; else tTable.incCount(es[i], fs[j], temp); aCountTable.getRef(i,j, l, m)+= temp ; } /* end of for i */ } // end of if (denom > 0) }// if (!test) } // end of for (j) ; sHandler1.setProbOfSentence(sent,cross_entropy); perp.addFactor(cross_entropy, so, l, m,1); viterbi_perp.addFactor(log(viterbi_score), so, l, m,1); if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000) ) printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.sentenceNo, viterbi_score); addAL(viterbi_alignment,sent.sentenceNo,l); pair_no++; } /* of while */ sHandler1.rewind(); perp.record("Model2"); viterbi_perp.record("Model2"); errorReportAL(cout,"IBM-2"); }
void HMM::em_loop(Perplexity& perp, SentenceHandler& sHandler1, bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp, bool test,bool doInit,int) { WordIndex i, j, l, m; double cross_entropy; int pair_no=0; perp.clear(); viterbi_perp.clear(); ofstream of2; // for each sentence pair in the corpus if (dump_alignment||FEWDUMPS) of2.open(alignfile); SentencePair sent; sHandler1.rewind(); while (sHandler1.getNextSentence(sent)) { const Vector<WordIndex>& es = sent.get_eSent(); const Vector<WordIndex>& fs = sent.get_fSent(); const float so = sent.getCount(); l = es.size() - 1; m = fs.size() - 1; cross_entropy = log(1.0); Vector<WordIndex> viterbi_alignment(fs.size()); unsigned int I=2*l,J=m; bool DependencyOfJ=(CompareAlDeps&(16|8))||(g_prediction_in_alignments==2); bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(g_prediction_in_alignments==0); HMMNetwork *net= makeHMMNetwork(es,fs,doInit); Array<double> gamma; Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1); double trainProb; trainProb=ForwardBackwardTraining(*net,gamma,epsilon); if (!test) { double *gp=conv<double>(gamma.begin()); for (unsigned int i2=0;i2<J;i2++)for (unsigned int i1=0;i1<I;++i1,++gp) if (*gp>MINCOUNTINCREASE) { COUNT add= *gp*so; if (i1>=l) { tTable.incCount(es[0],fs[1+i2],add); aCountTable.getRef(0,i2+1,l,m)+=add; } else { tTable.incCount(es[1+i1],fs[1+i2],add); aCountTable.getRef(1+i1,1+i2,l,m)+=add; } } double p0c=0.0,np0c=0.0; for (unsigned int jj=0;jj<epsilon.size();jj++) { int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]); double *ep=epsilon[jj].begin(); if (ep) { //for (i=0;i<I;i++) // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I); // for (i=0;i<I*I;++i) // ep[i] *= I; //if (DependencyOfJ) // if (J-1) // for (i=0;i<I*I;++i) // ep[i] /= (J-1); double mult=1.0; mult*=l; //if (DependencyOfJ && J-1) // mult/=(J-1); for (i=0;i<I;i++) { for (unsigned int i_bef=0;i_bef<I;i_bef++,ep++) { CLASSIFY(i,i_empty,ireal); CLASSIFY2(i_bef,i_befreal); if (i_empty) p0c+=*ep * mult; else { counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]), frenchClass ,jj+1,*ep * mult,0.0); np0c+=*ep * mult; } MASSERT( &epsilon[jj](i,i_bef)== ep); } } } } double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I; Array<double>&ai=counts.doGetAlphaInit(I); Array<double>&bi=counts.doGetBetaInit(I); int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0; for (i=0;i<I;i++,gp1++,gp2++) { CLASSIFY(i,i_empty,ireal); ai[i]+= *gp1; bi[i]+= *gp2; if (DependencyOfPrevAJ==0) { if (i_empty) p0c+=*gp1; else { counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0); np0c+=*gp1; } } } if (g_is_verbose) cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl; } cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100)); Array<int>vit; double viterbi_score=1.0; if ((g_hmm_training_special_flags&1)) HMMViterbi(*net,gamma,vit); else viterbi_score=HMMRealViterbi(*net,vit); for (j=1;j<=m;j++) { viterbi_alignment[j]=vit[j-1]+1; if (viterbi_alignment[j]>l) viterbi_alignment[j]=0; } sHandler1.setProbOfSentence(sent,cross_entropy); perp.addFactor(cross_entropy, so, l, m,1); viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1); if (g_is_verbose) { cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl; } // TODO: Use more safe resource management like RAII. delete net; net = 0; if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000)) printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score); addAL(viterbi_alignment,sent.getSentenceNo(),l); pair_no++; } /* of while */ sHandler1.rewind(); perp.record("HMM"); viterbi_perp.record("HMM"); errorReportAL(cout,"HMM"); }
void model1::em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test) { WordIndex i, j, l, m ; double cross_entropy; int pair_no=0 ; perp.clear(); viterbi_perp.clear(); ofstream of2; // for each sentence pair in the corpus if (dump_alignment||FEWDUMPS) of2.open(alignfile); cerr <<" number of French (target) words = " << noFrenchWords << endl; PROB uniform = 1.0/noFrenchWords ; cerr << "initial unifrom prob = " << uniform << endl; sentPair sent ; sHandler1.rewind(); while(sHandler1.getNextSentence(sent)){ Vector<WordIndex>& es = sent.eSent; Vector<WordIndex>& fs = sent.fSent; const float so = sent.getCount(); // number of times sentence occurs in corpus //std::cerr << "\n\nNEW sentence (#" << (pair_no + 1) << ") with count = " << so << endl; l = es.size() - 1; // source length m = fs.size() - 1; // target length cross_entropy = log(1.0); Vector<WordIndex> viterbi_alignment(fs.size()); double viterbi_score = 1 ; /*mebool eindict[l + 1]; bool findict[m + 1]; bool indict[m + 1][l + 1]; if(it == 1 && useDict){ for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false; for(unsigned int dummy = 0; dummy <= m; dummy++){ findict[dummy] = false; for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++) indict[dummy][dummy2] = false; } for(j = 0; j <= m; j++) for(i = 0; i <= l; i++) if(dict.indict(fs[j], es[i])){ eindict[i] = findict[j] = indict[j][i] = true; } }me*/ for(j=1; j <= m; j++){ //cerr << "Current french (TARGET) word = " << fs[j] << endl; // entries that map fs to all possible ei in this sentence. Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table LpPair<COUNT,PROB> **sPtrCachePtr; PROB denom = 0.0; WordIndex best_i = 0 ; // i for which fj is best maped to ei PROB word_best_score = 0 ; // score for the best mapping of fj if (it == 1 && !seedModel1){ //cerr << "Using uniform denominator\n"; denom = uniform * es.size() ; word_best_score = uniform ; } else for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){ //cerr << "current english (SOURCE) word = " << es[i] << endl; PROB e(0.0) ; srcHits_.insert(es[i]); (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ; if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH) e = (*((*sPtrCachePtr))).prob; else e = PROB_SMOOTH ; denom += e ; if (e > word_best_score){ word_best_score = e ; best_i = i ; } } viterbi_alignment[j] = best_i ; viterbi_score *= word_best_score ; /// denom ; if (denom == 0){ if (test) cerr << "WARNING: denom is zero (TEST)\n"; else cerr << "WARNING: denom is zero (TRAIN)\n"; } cross_entropy += log(denom) ; if (!test){ if(denom > 0){ COUNT val = COUNT(so) / (COUNT) double(denom) ; /* this if loop implements a constraint on counting: count(es[i], fs[j]) is implemented if and only if es[i] and fs[j] occur together in the dictionary, OR es[i] does not occur in the dictionary with any fs[x] and fs[j] does not occur in the dictionary with any es[y] */ /*meif(it == 1 && useDict){ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){ if(indict[j][i] || (!findict[j] && !eindict[i])){ PROB e(0.0) ; if (it == 1 && !seedModel1) e = uniform ; else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH) e = (*((*sPtrCachePtr))).prob; else e = PROB_SMOOTH ; COUNT x=e*val; if( it==1||x>MINCOUNTINCREASE ) if ((*sPtrCachePtr) != 0) (*((*sPtrCachePtr))).count += x; else tTable.incCount(es[i], fs[j], x); } } } // Old code: else{me*/ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){ //for(i=0; i <= l; i++) { PROB e(0.0) ; if (it == 1 && !seedModel1) e = uniform ; else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH) e = (*((*sPtrCachePtr))).prob; else e = PROB_SMOOTH ; //if( !(i==0) ) //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl; COUNT x=e*val; // new count if( pair_no==VerboseSentence ) cout << i << "(" << evlist[es[i]].word << ")," << j << "(" << fvlist[fs[j]].word << ")=" << x << endl; if( it==1||x>MINCOUNTINCREASE ) { if(step_k != 0) tTable.stepCounts_[wordPairIds(es[i], fs[j])] += x; else if( NoEmptyWord==0 || i!=0 ) if ((*sPtrCachePtr) != 0){ // handles single sentence updates //x = getInterpolatedCount(x, (*((*sPtrCachePtr))).count); // get interpolated count here (*((*sPtrCachePtr))).count += x; } else { //x = getInterpolatedCount(x, (*((*sPtrCachePtr))).count); // get interpolated count here tTable.incCount(es[i], fs[j], x); } // increment temp table instead } } /* end of for i */ //me} // end of else } // end of if (denom > 0) }// if (!test) } // end of for all (j) target words; sHandler1.setProbOfSentence(sent,cross_entropy); //cerr << sent << "CE: " << cross_entropy << " " << so << endl; perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1); viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1); if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000)) printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score); addAL(viterbi_alignment,sent.sentenceNo,l); pair_no++; } /* of while */ sHandler1.rewind(); perp.record("Model1"); viterbi_perp.record("Model1"); errorReportAL(cout, "IBM-1"); }
void model3::viterbi_loop(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1, bool dump_files, const char* alignfile, bool collect_counts, string model ) { WordIndex i, j, l, m ; ofstream of2 ; int pair_no; LogProb temp; if (dump_files) of2.open(alignfile); pair_no = 0 ; // sentence pair number // for each sentence pair in the corpus perp.clear() ; // clears cross_entrop & perplexity viterbiPerp.clear(); sentPair sent ; while(sHandler1.getNextSentence(sent)){ Vector<WordIndex>& es = sent.eSent; Vector<WordIndex>& fs = sent.fSent; const float count = sent.getCount(); if ((sent.sentenceNo % 1000) == 0) cerr <<sent.sentenceNo << '\n'; time_t sent_s = time(NULL) ; pair_no++ ; l = es.size() - 1 ; m = fs.size() - 1 ; if (Log){ logmsg << "Processing sentence pair:\n\t"; printSentencePair(es, fs, logmsg); for (i = 0 ; i <= l ; i++) logmsg << Elist.getVocabList()[es[i]].word << " "; logmsg << "\n\t"; for (j = 1 ; j <= m ; j++) logmsg << Flist.getVocabList()[fs[j]].word << " "; logmsg << "\n"; } LogProb align_total_count=0; // LogProb best_score; Vector<WordIndex> viterbi_alignment; LogProb viterbi_score ; alignmodel neighborhood; neighborhood.clear(); align_total_count = 0; findAlignmentsNeighborhood(/*tTable, aTable,*/ /*p1_count, p0_count,*/ es, fs, align_total_count, neighborhood) ; if (Peg){ for (i = 0 ; i <= l ; i++) for (j = 1 ; j <= m ; j++){ if ( (tTable.getProb(es[i], fs[j]) > PROB_SMOOTH) && (aTable.getValue(i, j, l, m) > PROB_SMOOTH) && (dTable.getValue(j, i, l, m) > PROB_SMOOTH)) findAlignmentsNeighborhood(/*tTable, aTable,*/ /*p1_count, p0_count, */ es, fs, align_total_count, neighborhood, i, j); } } // Now Collect counts over saved neighborhoods viterbi_score = 0 ; if (Verbose) cerr << "\nCollecting counts over found alignments, total prob: " << align_total_count << "\n"; if (Log) logmsg << "\nCollecting counts over found alignments, total prob: " << align_total_count << "\n"; hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator align ; int acount = 0 ; if (align_total_count == 0 ){ cerr << " WARNINIG: For the following sentence pair : \n"; printSentencePair(es, fs, cerr); cerr << "The collection of alignments found have 0 probability!!\n"; cerr << "No counts will be collected of it \n"; if (Log){ logmsg << "The collection of alignments found have 0 probability!!\n"; logmsg << "No counts will be collected of it \n"; } } else { if (collect_counts) { for(align = neighborhood.begin(); align != neighborhood.end(); align++){ temp = (*align).second/align_total_count ; collectCountsOverAlignement(/*tTable, aCountTable, */es, fs, /*p1_count, p0_count ,*/ ((*align).first), temp , count); acount++; if (viterbi_score < temp){ viterbi_alignment = ((*align).first); viterbi_score = temp; } } } // end of if (collect_counts) perp.addFactor(log(double(align_total_count)), count, l, m,0); viterbiPerp.addFactor(log(double(viterbi_score)), count, l, m,0); if (Verbose){ cerr << "Collected counts over "<<acount <<" (of " << pow(double(m), double(l+1)) <<") differnet alignments\n"; cerr << "Bucket count of alignments hash: "<< neighborhood.getHash().bucket_count()<< ", size " << neighborhood.getHash().size() << "\n"; } if (Log){ logmsg << "Collected counts over "<<acount <<" (of " << pow(double(m), double(l+1)) <<") differnet alignments\n"; logmsg << "Bucket count of alignments hash: "<< neighborhood.getHash().bucket_count()<< "\n"; } } // end of else // write best alignment (viterbi) for this sentence pair to alignment file if (collect_counts){ if (viterbi_score <= 0){ cerr << "Viterbi Alignment for this pair have score zero!!\n"; of2 << "\n\n"; } else { if (dump_files) printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, pair_no, viterbi_score); addAL(viterbi_alignment,sent.sentenceNo,l); } } // end of if (collect_counts) double period = difftime(time(NULL), sent_s); if (Log) logmsg << "processing this sentence pair ("<<l+1<<"x"<<m<<") : "<< (l+1)*m << " took : " << period << " seconds\n"; if (Verbose) cerr << "processing this sentence pair took : " << period << " seconds\n"; } /* of sentence pair E, F */ sHandler1.rewind(); errorReportAL(cerr,model); perp.record(model); viterbiPerp.record(model); if (dump_files) of2.close(); }