/// <summary> /// Compare 'a' and 'b' for sorting purposes. This returns 0 if 'a' and 'b' are equal, /// and returns -1 if 'a' is less than 'b', and +1 if 'a' is greater than 'b'. For sorting /// purposes, NaNs are considered to be orderable values beyond Infinity, and NaNs or /// Infinities of the same class with the same sign and coeffients are considered equal. /// (SNaNs are also considered to be beyond QNaNs.) Note also that for sorting purposes, /// -0 equals +0. /// </summary> /// <param name="a">The first value to compare.</param> /// <param name="b">The second value to compare.</param> /// <returns>+1 if a > b; -1 if a < b; and 0 if a == b.</returns> SMILE_API_FUNC Int Real128_Compare(Real128 a, Real128 b) { Byte aClass = CLASSIFY(a); Byte bClass = CLASSIFY(b); switch (_comparisonTable[aClass][bClass]) { case +1: // A is definitely a larger class than B. return +1; case -1: // A is definitely a smaller class than B. return -1; case 2: // Same non-finite positive type, so just compare coefficient bits. if (a.value[1] != b.value[1]) return a.value[1] > b.value[1] ? +1 : -1; if (a.value[0] != b.value[0]) return a.value[0] > b.value[0] ? +1 : -1; return 0; case 3: // Same non-finite negative type, so just compare coefficient bits. if (a.value[1] != b.value[1]) return a.value[1] > b.value[1] ? -1 : +1; if (a.value[0] != b.value[0]) return a.value[0] > b.value[0] ? -1 : +1; return 0; case 0: // Both are finite values, so compare for real. if (Real128_Eq(a, b)) return 0; // Equality is a slightly faster test, so it goes first. if (Real128_Lt(a, b)) return -1; return +1; default: // Shouldn't ever get here. return 0; } }
void HMM::em_loop(Perplexity& perp, SentenceHandler& sHandler1, bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp, bool test,bool doInit,int) { WordIndex i, j, l, m; double cross_entropy; int pair_no=0; perp.clear(); viterbi_perp.clear(); ofstream of2; // for each sentence pair in the corpus if (dump_alignment||FEWDUMPS) of2.open(alignfile); SentencePair sent; sHandler1.rewind(); while (sHandler1.getNextSentence(sent)) { const Vector<WordIndex>& es = sent.get_eSent(); const Vector<WordIndex>& fs = sent.get_fSent(); const float so = sent.getCount(); l = es.size() - 1; m = fs.size() - 1; cross_entropy = log(1.0); Vector<WordIndex> viterbi_alignment(fs.size()); unsigned int I=2*l,J=m; bool DependencyOfJ=(CompareAlDeps&(16|8))||(g_prediction_in_alignments==2); bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(g_prediction_in_alignments==0); HMMNetwork *net= makeHMMNetwork(es,fs,doInit); Array<double> gamma; Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1); double trainProb; trainProb=ForwardBackwardTraining(*net,gamma,epsilon); if (!test) { double *gp=conv<double>(gamma.begin()); for (unsigned int i2=0;i2<J;i2++)for (unsigned int i1=0;i1<I;++i1,++gp) if (*gp>MINCOUNTINCREASE) { COUNT add= *gp*so; if (i1>=l) { tTable.incCount(es[0],fs[1+i2],add); aCountTable.getRef(0,i2+1,l,m)+=add; } else { tTable.incCount(es[1+i1],fs[1+i2],add); aCountTable.getRef(1+i1,1+i2,l,m)+=add; } } double p0c=0.0,np0c=0.0; for (unsigned int jj=0;jj<epsilon.size();jj++) { int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]); double *ep=epsilon[jj].begin(); if (ep) { //for (i=0;i<I;i++) // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I); // for (i=0;i<I*I;++i) // ep[i] *= I; //if (DependencyOfJ) // if (J-1) // for (i=0;i<I*I;++i) // ep[i] /= (J-1); double mult=1.0; mult*=l; //if (DependencyOfJ && J-1) // mult/=(J-1); for (i=0;i<I;i++) { for (unsigned int i_bef=0;i_bef<I;i_bef++,ep++) { CLASSIFY(i,i_empty,ireal); CLASSIFY2(i_bef,i_befreal); if (i_empty) p0c+=*ep * mult; else { counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]), frenchClass ,jj+1,*ep * mult,0.0); np0c+=*ep * mult; } MASSERT( &epsilon[jj](i,i_bef)== ep); } } } } double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I; Array<double>&ai=counts.doGetAlphaInit(I); Array<double>&bi=counts.doGetBetaInit(I); int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0; for (i=0;i<I;i++,gp1++,gp2++) { CLASSIFY(i,i_empty,ireal); ai[i]+= *gp1; bi[i]+= *gp2; if (DependencyOfPrevAJ==0) { if (i_empty) p0c+=*gp1; else { counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0); np0c+=*gp1; } } } if (g_is_verbose) cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl; } cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100)); Array<int>vit; double viterbi_score=1.0; if ((g_hmm_training_special_flags&1)) HMMViterbi(*net,gamma,vit); else viterbi_score=HMMRealViterbi(*net,vit); for (j=1;j<=m;j++) { viterbi_alignment[j]=vit[j-1]+1; if (viterbi_alignment[j]>l) viterbi_alignment[j]=0; } sHandler1.setProbOfSentence(sent,cross_entropy); perp.addFactor(cross_entropy, so, l, m,1); viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1); if (g_is_verbose) { cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl; } // TODO: Use more safe resource management like RAII. delete net; net = 0; if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000)) printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score); addAL(viterbi_alignment,sent.getSentenceNo(),l); pair_no++; } /* of while */ sHandler1.rewind(); perp.record("HMM"); viterbi_perp.record("HMM"); errorReportAL(cout,"HMM"); }
HMMNetwork* HMM::makeHMMNetwork(const Vector<WordIndex>& es, const Vector<WordIndex>&fs, bool doInit) const { unsigned int i,j; unsigned int l = es.size() - 1; unsigned int m = fs.size() - 1; unsigned int I=2*l,J=m; int IJ=I*J; bool DependencyOfJ=(CompareAlDeps&(16|8))||(g_prediction_in_alignments==2); bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(g_prediction_in_alignments==0); HMMNetwork *net = new HMMNetwork(I,J); fill(net->alphainit.begin(),net->alphainit.end(),0.0); fill(net->betainit.begin(),net->betainit.end(),0.0); for (j=1;j<=m;j++) { for (i=1;i<=l;i++) net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]); double emptyContribution=0; emptyContribution=tTable.getProb(es[0],fs[j]); for (i=1;i<=l;i++) net->n(i+l-1,j-1)=emptyContribution; net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12)); } if (DependencyOfJ) net->e.resize(m-1); else net->e.resize(J>1); for (j=0;j<net->e.size();j++) { int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]); net->e[j].resize(I,I,0); for (unsigned int i1=0;i1<I;++i1) { Array<double> al(l); CLASSIFY2(i1,i1real); for (unsigned int i2=0;i2<l;i2++) al[i2]=probs.getAlProb(i1real,i2,l,m,ewordclasses.getClass(es[1+i1real]),frenchClass ,j+1); normalize_if_possible(conv<double>(al.begin()),conv<double>(al.end())); if (SmoothHMM&2) smooth_standard(conv<double>(al.begin()),conv<double>(al.end()),HMMAlignmentModelSmoothFactor); for (unsigned int i2=0;i2<I;i2++) { CLASSIFY(i2,empty_i2,i2real); net->e[j](i1,i2) = al[i2real]; if (empty_i2) if (i1real!=i2real) { net->e[j](i1,i2)=0; } else { net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1 } } normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I); } } if (doInit) { for (unsigned int i=0;i<I;++i) { net->alphainit[i]=net->betainit[i]=(i<I/2)?1:(2.0/I); net->betainit[i]=1.0; } } else { if (DependencyOfPrevAJ==0) { for (i=0;i<I;i++) { CLASSIFY2(i,ireal); net->alphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0); } } else { if (g_uniform_entry_exit&2)probs.getBetaInit(I,net->betainit); if (g_uniform_entry_exit&1)probs.getAlphaInit(I,net->alphainit); } } MASSERT( net->alphainit.size()==I);MASSERT( net->betainit.size()==I); normalize_if_possible(conv<double>(net->alphainit.begin()),conv<double>(net->alphainit.end())); normalize_if_possible(conv<double>(net->betainit.begin()),conv<double>(net->betainit.end())); transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies<double>(),2*l)); return net; }