예제 #1
0
void align( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength,
            Trail& bestTrail, AlignMatrix& v )
{
  const int huBookSize = w.size();
  const int enBookSize = w.otherSize();
  const int thickness  = w.thickness();

  massert(w.size()+1 == v.size());
  massert(w.otherSize()+1 == v.otherSize());

  TrelliMatrix trellis( huBookSize+1,enBookSize+1,thickness, Dead );

  buildDynProgMatrix( w, huLength, enLength, v, trellis );

//x   std::cout << std::endl;
//x   dumpAlignMatrix(v);
//x   std::cout << std::endl;
//x   dumpTrelliMatrix(trellis);
//x   exit(-1);

  std::cerr << "Matrix built." << std::endl;

  trelliToLadder( trellis, bestTrail );

  std::cerr << "Trail found." << std::endl;
}
예제 #2
0
// Fills the complement of the radius of the trail with minus infties.
// The return value true means success. Failure means that during the fill,
// we intersected the outside of the quasidiagonal area.
// In this case, the operation is not finished.
bool borderDetailedAlignMatrix( AlignMatrix& alignMatrix, const Trail& trail, int radius )
{
  int huBookSize = alignMatrix.size();
  int enBookSize = alignMatrix.otherSize();

  int huPos, enPos;
  for ( huPos=0; huPos<huBookSize; ++huPos )
  {
    int rowStart = alignMatrix.rowStart(huPos);
    int rowEnd   = alignMatrix.rowEnd(huPos);
    for ( enPos=rowStart; enPos<rowEnd; ++enPos )
    {
      alignMatrix.cell(huPos,enPos) = outsideOfRadiusValue;
    }
  }

  // We seriously use the fact that many-to-zero segments are subdivided into one-to-zero segments.
  // Inside setBox, an exception is thrown if we try to write outside the quasidiagonal.
  // If we catch such an exception, it means that the quasidiagonal is not thick enough.
  // In this case, we abandon the whole align, just to be sure.
  try
  {
    for ( int i=0; i<trail.size(); ++i )
    {
      setBox( alignMatrix, trail[i].first, trail[i].second, radius, insideOfRadiusValue );
    }
  }
  catch ( const char* errorType )
  {
    massert( std::string(errorType) == "out of quasidiagonal" )
    return false;
  }

  bool verify = true;
  if (verify)
  {
    int numberOfEvaluatedItems(0);
    for ( huPos=0; huPos<huBookSize; ++huPos )
    {
      int rowStart = alignMatrix.rowStart(huPos);
      int rowEnd   = alignMatrix.rowEnd(huPos);
      for ( enPos=rowStart; enPos<rowEnd; ++enPos )
      {
        if (alignMatrix[huPos][enPos]==insideOfRadiusValue)
        {
          ++numberOfEvaluatedItems;
        }
      }
    }

    std::cerr << numberOfEvaluatedItems << " items inside the border." << std::endl;
  }

  return true;
}
예제 #3
0
void setBox( AlignMatrix& m, int huPos, int enPos, int radius, int insideOfRadiusValue )
{
  for ( int x=huPos-radius; x<=huPos+radius; ++x )
  {
    for ( int y=enPos-radius; y<=enPos+radius; ++y )
    {
      if ( (x>=0) && (x<m.size()) && (y>=0) && (y<m.otherSize()) )
      {
        m.cell(x,y) = insideOfRadiusValue ;
      }
    }
  }
}
예제 #4
0
void buildDynProgMatrix( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength,
                         QuasiDiagonal<double>& v, TrelliMatrix& trellis )
{
  const int huBookSize = w.size();
  const int enBookSize = w.otherSize();

  int huPos,enPos;

  // v[huPos][enPos] gives the similarity of the [0,huPos) and [0,enPos) intervals.
  // The smaller value, the better similarity. (Unlike in the original similarity matrix w, where bigger is better.)

  double infinity = 1e6;

  for ( huPos=0; huPos<=huBookSize; ++huPos )
  {
    int rowStart = v.rowStart(huPos);
    int rowEnd   = v.rowEnd(huPos);
    for ( enPos=rowStart; enPos<rowEnd; ++enPos )
    {
      double& val = v.cell(huPos,enPos);
      unsigned char& trail = trellis.cell(huPos,enPos);

      bool quasiglobal_knightsMoveAllowed = true;
      if (quasiglobal_knightsMoveAllowed)
      {
        double lengthFitness(0);

        bool quasiglobal_lengthFitnessApplied = true;

        // The array is indexed by the step directions. The smaller value, the better.
        double values[Dead];
        int i;
        for ( i=1; i<Dead; ++i )
          values[i] = infinity;

        if (huPos>0)
        {
          values[HuSkip] = v[huPos-1][enPos]   - skipScore;
        }

        if (enPos>0)
        {
          values[EnSkip] = v[huPos][enPos-1]   - skipScore;
        }

        if ((huPos>0) && (enPos>0))
        {
          if (quasiglobal_lengthFitnessApplied)
          {
            lengthFitness = closeness(huLength[huPos-1], enLength[enPos-1]);
          }
          else
          {
            lengthFitness = 0;
          }

          values[Diag] = v[huPos-1][enPos-1] - w[huPos-1][enPos-1] - lengthFitness ;
        }

        const double dotLength = 2.0 ;

        if ((huPos>1) && (enPos>0))
        {
          if (quasiglobal_lengthFitnessApplied)
          {
            lengthFitness = closeness(huLength[huPos-2]+huLength[huPos-1]+dotLength, enLength[enPos-1]);
          }
          else
          {
            lengthFitness = 0;
          }

          const double& a = w[huPos-1][enPos-1] ;
          const double& b = w[huPos-2][enPos-1] ;
          double lengthSimilarity =
          values[HuHuEnSkip] = v[huPos-2][enPos-1] - ( a<b ? a : b ) - skipScore - lengthFitness ; // The worse of the two crossed square.
        }

        if ((huPos>0) && (enPos>1))
        {
          if (quasiglobal_lengthFitnessApplied)
          {
            // Attention, the two-sentence length is the first argument. Usually the Hungarian is the first argument, but not here.
            lengthFitness = closeness(enLength[enPos-2]+enLength[enPos-1]+dotLength, huLength[huPos-1]);
          }
          else
          {
            lengthFitness = 0;
          }

          const double& a = w[huPos-1][enPos-1] ;
          const double& b = w[huPos-1][enPos-2] ;
          values[HuEnEnSkip] = v[huPos-1][enPos-2] - ( a<b ? a : b ) - skipScore - lengthFitness ; // The worse of the two crossed square.
        }

        unsigned char direction = Dead;
        double bestValue = infinity;
        for ( i=1; i<Dead; ++i )
        {
          if (values[i]<bestValue)
          {
            bestValue = values[i];
            direction = i;
          }
        }

        trail = direction;
        if (direction==Dead)
        {
          val = 0;
        }
        else
        {
          val = bestValue;
        }
      }
      else // (!quasiglobal_knightsMoveAllowed)
      {
        int borderCase = ( (huPos==0) ? 0 : 2 ) + ( (enPos==0) ? 0 : 1 ) ;

        switch (borderCase)
        {
        case 0:
          {
            val = 0;
            trail = Dead;
            break;
          }
        case 1: // huPos==0
          {
            val = v[0][enPos-1] - skipScore ;
            trail = EnSkip;
            break;
          }
        case 2: // enPos==0
          {
            val = v[huPos-1][0] - skipScore ;
            trail = HuSkip;
            break;
          }
        case 3:
          {
            double x  = v[huPos-1][enPos]   - skipScore ;
            double y  = v[huPos]  [enPos-1] - skipScore ;
            double xy = v[huPos-1][enPos-1] - w[huPos-1][enPos-1] ;

            double best = xy;
            trail = Diag;
            if (x<best)
            {
              best = x;
              trail = HuSkip;
            }
            if (y<best)
            {
              best = y;
              trail = EnSkip;
            }
            val = best;
            break;
          }
        }
      }
    }
  }
}