void align( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength, Trail& bestTrail, AlignMatrix& v ) { const int huBookSize = w.size(); const int enBookSize = w.otherSize(); const int thickness = w.thickness(); massert(w.size()+1 == v.size()); massert(w.otherSize()+1 == v.otherSize()); TrelliMatrix trellis( huBookSize+1,enBookSize+1,thickness, Dead ); buildDynProgMatrix( w, huLength, enLength, v, trellis ); //x std::cout << std::endl; //x dumpAlignMatrix(v); //x std::cout << std::endl; //x dumpTrelliMatrix(trellis); //x exit(-1); std::cerr << "Matrix built." << std::endl; trelliToLadder( trellis, bestTrail ); std::cerr << "Trail found." << std::endl; }
// Fills the complement of the radius of the trail with minus infties. // The return value true means success. Failure means that during the fill, // we intersected the outside of the quasidiagonal area. // In this case, the operation is not finished. bool borderDetailedAlignMatrix( AlignMatrix& alignMatrix, const Trail& trail, int radius ) { int huBookSize = alignMatrix.size(); int enBookSize = alignMatrix.otherSize(); int huPos, enPos; for ( huPos=0; huPos<huBookSize; ++huPos ) { int rowStart = alignMatrix.rowStart(huPos); int rowEnd = alignMatrix.rowEnd(huPos); for ( enPos=rowStart; enPos<rowEnd; ++enPos ) { alignMatrix.cell(huPos,enPos) = outsideOfRadiusValue; } } // We seriously use the fact that many-to-zero segments are subdivided into one-to-zero segments. // Inside setBox, an exception is thrown if we try to write outside the quasidiagonal. // If we catch such an exception, it means that the quasidiagonal is not thick enough. // In this case, we abandon the whole align, just to be sure. try { for ( int i=0; i<trail.size(); ++i ) { setBox( alignMatrix, trail[i].first, trail[i].second, radius, insideOfRadiusValue ); } } catch ( const char* errorType ) { massert( std::string(errorType) == "out of quasidiagonal" ) return false; } bool verify = true; if (verify) { int numberOfEvaluatedItems(0); for ( huPos=0; huPos<huBookSize; ++huPos ) { int rowStart = alignMatrix.rowStart(huPos); int rowEnd = alignMatrix.rowEnd(huPos); for ( enPos=rowStart; enPos<rowEnd; ++enPos ) { if (alignMatrix[huPos][enPos]==insideOfRadiusValue) { ++numberOfEvaluatedItems; } } } std::cerr << numberOfEvaluatedItems << " items inside the border." << std::endl; } return true; }
void setBox( AlignMatrix& m, int huPos, int enPos, int radius, int insideOfRadiusValue ) { for ( int x=huPos-radius; x<=huPos+radius; ++x ) { for ( int y=enPos-radius; y<=enPos+radius; ++y ) { if ( (x>=0) && (x<m.size()) && (y>=0) && (y<m.otherSize()) ) { m.cell(x,y) = insideOfRadiusValue ; } } } }
void buildDynProgMatrix( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength, QuasiDiagonal<double>& v, TrelliMatrix& trellis ) { const int huBookSize = w.size(); const int enBookSize = w.otherSize(); int huPos,enPos; // v[huPos][enPos] gives the similarity of the [0,huPos) and [0,enPos) intervals. // The smaller value, the better similarity. (Unlike in the original similarity matrix w, where bigger is better.) double infinity = 1e6; for ( huPos=0; huPos<=huBookSize; ++huPos ) { int rowStart = v.rowStart(huPos); int rowEnd = v.rowEnd(huPos); for ( enPos=rowStart; enPos<rowEnd; ++enPos ) { double& val = v.cell(huPos,enPos); unsigned char& trail = trellis.cell(huPos,enPos); bool quasiglobal_knightsMoveAllowed = true; if (quasiglobal_knightsMoveAllowed) { double lengthFitness(0); bool quasiglobal_lengthFitnessApplied = true; // The array is indexed by the step directions. The smaller value, the better. double values[Dead]; int i; for ( i=1; i<Dead; ++i ) values[i] = infinity; if (huPos>0) { values[HuSkip] = v[huPos-1][enPos] - skipScore; } if (enPos>0) { values[EnSkip] = v[huPos][enPos-1] - skipScore; } if ((huPos>0) && (enPos>0)) { if (quasiglobal_lengthFitnessApplied) { lengthFitness = closeness(huLength[huPos-1], enLength[enPos-1]); } else { lengthFitness = 0; } values[Diag] = v[huPos-1][enPos-1] - w[huPos-1][enPos-1] - lengthFitness ; } const double dotLength = 2.0 ; if ((huPos>1) && (enPos>0)) { if (quasiglobal_lengthFitnessApplied) { lengthFitness = closeness(huLength[huPos-2]+huLength[huPos-1]+dotLength, enLength[enPos-1]); } else { lengthFitness = 0; } const double& a = w[huPos-1][enPos-1] ; const double& b = w[huPos-2][enPos-1] ; double lengthSimilarity = values[HuHuEnSkip] = v[huPos-2][enPos-1] - ( a<b ? a : b ) - skipScore - lengthFitness ; // The worse of the two crossed square. } if ((huPos>0) && (enPos>1)) { if (quasiglobal_lengthFitnessApplied) { // Attention, the two-sentence length is the first argument. Usually the Hungarian is the first argument, but not here. lengthFitness = closeness(enLength[enPos-2]+enLength[enPos-1]+dotLength, huLength[huPos-1]); } else { lengthFitness = 0; } const double& a = w[huPos-1][enPos-1] ; const double& b = w[huPos-1][enPos-2] ; values[HuEnEnSkip] = v[huPos-1][enPos-2] - ( a<b ? a : b ) - skipScore - lengthFitness ; // The worse of the two crossed square. } unsigned char direction = Dead; double bestValue = infinity; for ( i=1; i<Dead; ++i ) { if (values[i]<bestValue) { bestValue = values[i]; direction = i; } } trail = direction; if (direction==Dead) { val = 0; } else { val = bestValue; } } else // (!quasiglobal_knightsMoveAllowed) { int borderCase = ( (huPos==0) ? 0 : 2 ) + ( (enPos==0) ? 0 : 1 ) ; switch (borderCase) { case 0: { val = 0; trail = Dead; break; } case 1: // huPos==0 { val = v[0][enPos-1] - skipScore ; trail = EnSkip; break; } case 2: // enPos==0 { val = v[huPos-1][0] - skipScore ; trail = HuSkip; break; } case 3: { double x = v[huPos-1][enPos] - skipScore ; double y = v[huPos] [enPos-1] - skipScore ; double xy = v[huPos-1][enPos-1] - w[huPos-1][enPos-1] ; double best = xy; trail = Diag; if (x<best) { best = x; trail = HuSkip; } if (y<best) { best = y; trail = EnSkip; } val = best; break; } } } } } }