//increasing efficiency: no copy of alignment (calc. everything incrementally) LogProb transpair_model5::scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue)const { if( doModel4Scoring ) return transpair_model4::scoreOfMove(a,new_i,j,thisValue); alignment b(a); b.set(j,new_i); LogProb change; const WordIndex old_i=a(j); WordIndex f0=a.fert(0); if (old_i == new_i) change=1.0; else if (old_i == 0) change=((double)p0*p0/p1) * ((f0*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) * ((PROB)(1.0)) * (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))* (t(new_i, j)/t(old_i, j))* 1.0; else if (new_i == 0) change=(double(p1) / (p0*p0)) * (double((m-2*f0)*(m-2*f0-1))/((1+f0)*(m-f0))) * (1.0) * (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))* (t(new_i, j) /t(old_i, j)) * (1.0); else change=(1.0) * (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) * (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) * (t(new_i,j)/t(old_i,j)) * (1.0); LogProb a_prob=thisValue; if( a_prob<0.0 ) a_prob=prob_of_target_and_alignment_given_source(a,2); massert(a_prob==prob_of_target_and_alignment_given_source(a,2)); LogProb b_prob=prob_of_target_and_alignment_given_source(b,2); change*=b_prob/a_prob; return change; }
void transpair_model5::computeScores(const alignment&al,vector<double>&d)const { LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ; total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0))); for (WordIndex i = 1 ; i <= al.fert(0) ; i++) total1 *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient! for (WordIndex i = 1 ; i <= l ; i++) total2 *= get_fertility(i, al.fert(i)); for (WordIndex j = 1 ; j <= m ; j++) total3*= get_t(al(j), j) ; PositionIndex prev_cept=0; PositionIndex vac_all=m; Vector<char> vac(m+1,0); for(WordIndex i=1;i<=l;i++) { PositionIndex cur_j=al.als_i[i]; PositionIndex prev_j=0; PositionIndex k=0; if(cur_j) { // process first word of cept k++; total4*=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k); vac_all--; assert(vac[cur_j]==0); vac[cur_j]=1; prev_j=cur_j; cur_j=al.als_j[cur_j].next; } while(cur_j) { // process following words of cept k++; int vprev=vacancies(vac,prev_j); total4*=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k); vac_all--; vac[cur_j]=1; prev_j=cur_j; cur_j=al.als_j[cur_j].next; } assert(k==al.fert(i)); if( k ) prev_cept=i; } assert(vac_all==al.fert(0)); d.push_back(total1);//13 d.push_back(total2);//14 d.push_back(total3);//15 d.push_back(total4);//16 }
LogProb transpair_model4::prob_of_target_and_alignment_given_source_1(const alignment&al,bool verb)const { LogProb total = 1.0 ; total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0))); if( verb) cerr << "IBM-4: (1-p1)^(m-2 f0)*p1^f0: " << total << endl; for (WordIndex i = 1 ; i <= al.fert(0) ; i++) total *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ; if( verb) cerr << "IBM-4: +NULL:binomial+distortion " << total << endl; for (WordIndex i = 1 ; i <= l ; i++) { total *= get_fertility(i, al.fert(i));// * (LogProb) factorial(al.fert(i)); if( verb) cerr << "IBM-4: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl; } for (WordIndex j = 1 ; j <= m ; j++) { total*= get_t(al(j), j) ; if( verb) cerr << "IBM-4: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl; } return total; }
LogProb transpair_model5::prob_of_target_and_alignment_given_source(const alignment&al, short distortionType,bool verb)const { if( doModel4Scoring ) return transpair_model4::prob_of_target_and_alignment_given_source(al,distortionType); LogProb total = 1.0 ; static const LogProb almostZero = 1E-299 ; double x2; if( distortionType&1 ) { total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0))); if( verb) cerr << "IBM-5: (1-p1)^(m-2 f0)*p1^f0: " << total << endl; for (WordIndex i = 1 ; i <= al.fert(0) ; i++) total *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient! if( verb) cerr << "IBM-5: +NULL:binomial+distortion " << total << endl; for (WordIndex i = 1 ; i <= l ; i++) { total *= get_fertility(i, al.fert(i)); if( verb) cerr << "IBM-5: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl; } for (WordIndex j = 1 ; j <= m ; j++) { total*= get_t(al(j), j) ; if( verb) cerr << "IBM-5: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl; } } if( distortionType&2 ) { PositionIndex prev_cept=0; PositionIndex vac_all=m; Vector<char> vac(m+1,0); for(WordIndex i=1;i<=l;i++) { PositionIndex cur_j=al.als_i[i]; PositionIndex prev_j=0; PositionIndex k=0; if(cur_j) { // process first word of cept k++; // previous position total*= (x2=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k)); vac_all--; assert(vac[cur_j]==0); vac[cur_j]=1; if( verb) cerr << "IBM-5: d=1 of " << cur_j << ": " << x2 << " -> " << total << endl; prev_j=cur_j; cur_j=al.als_j[cur_j].next; } while(cur_j) { // process following words of cept k++; // previous position int vprev=vacancies(vac,prev_j); total*= (x2=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k)); vac_all--; vac[cur_j]=1; if( verb) cerr << "IBM-5: d>1 of " << cur_j << ": " << x2 << " -> " << total << endl; prev_j=cur_j; cur_j=al.als_j[cur_j].next; } assert(k==al.fert(i)); if( k ) prev_cept=i; } assert(vac_all==al.fert(0)); } total = total?total:almostZero; return total; }