Пример #1
0
float SquareMatrix::CalcFutureScore2( Bitmap const &bitmap, size_t startPos, size_t endPos ) const
{
  const size_t notInGap= numeric_limits<size_t>::max();
  float futureScore = 0.0f;
  size_t startGap = bitmap.GetFirstGapPos();
  if (startGap == NOT_FOUND) return futureScore; // everything filled

  // start loop at first gap
  size_t startLoop = startGap+1;
  if (startPos == startGap) { // unless covered by phrase
    startGap = notInGap;
    startLoop = endPos+1; // -> postpone start
  }

  size_t lastCovered = bitmap.GetLastPos();
  if (endPos > lastCovered || lastCovered == NOT_FOUND) lastCovered = endPos;

  for(size_t currPos = startLoop; currPos <= lastCovered ; currPos++) {
    // start of a new gap?
    if(startGap == notInGap && bitmap.GetValue(currPos) == false && (currPos < startPos || currPos > endPos)) {
      startGap = currPos;
    }
    // end of a gap?
    else if(startGap != notInGap && (bitmap.GetValue(currPos) == true || (startPos <= currPos && currPos <= endPos))) {
      futureScore += GetScore(startGap, currPos - 1);
      startGap = notInGap;
    }
  }
  // coverage ending with gap?
  if (lastCovered != bitmap.GetSize() - 1) {
    futureScore += GetScore(lastCovered+1, bitmap.GetSize() - 1);
  }

  return futureScore;
}
bool
IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
{
  size_t s = prev.GetStartPos();
  size_t e = cur.GetEndPos();
  return (e+1 == s || (e < s && !cov.GetValue(s-1)));
}
bool
IsMonotonicStep(Range  const& prev, // words range of last source phrase
                Range  const& cur,  // words range of current source phrase
                Bitmap const& cov)  // coverage bitmap
{
  size_t e = prev.GetEndPos() + 1;
  size_t s = cur.GetStartPos();
  return (s == e || (s >= e && !cov.GetValue(e)));
}
Пример #4
0
float SquareMatrix::CalcFutureScore( Bitmap const &bitmap ) const
{
  const size_t notInGap= numeric_limits<size_t>::max();
  size_t startGap = notInGap;
  float futureScore = 0.0f;
  for(size_t currPos = 0 ; currPos < bitmap.GetSize() ; currPos++) {
    // start of a new gap?
    if(bitmap.GetValue(currPos) == false && startGap == notInGap) {
      startGap = currPos;
    }
    // end of a gap?
    else if(bitmap.GetValue(currPos) == true && startGap != notInGap) {
      futureScore += GetScore(startGap, currPos - 1);
      startGap = notInGap;
    }
  }
  // coverage ending with gap?
  if (startGap != notInGap) {
    futureScore += GetScore(startGap, bitmap.GetSize() - 1);
  }

  return futureScore;
}
Пример #5
0
void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , Bitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
{

  int gFlag = 0;
  int gp = 0;
  int ans;


  if ( j < j1) { // j1 is the index of the source word we are about to generate ...
    //if(coverageVector[j]==0) // if source word at j is not generated yet ...
    if(coverageVector.GetValue(j)==0) { // if source word at j is not generated yet ...
      operations.push_back("_INS_GAP_");
      gFlag++;
      gap[j]="Unfilled";
    }
    if (j == E) {
      j = j1;
    } else {
      operations.push_back("_JMP_FWD_");
      j=E;
    }
  }

  if (j1 < j) {
    // if(j < E && coverageVector[j]==0)
    if(j < E && coverageVector.GetValue(j)==0) {
      operations.push_back("_INS_GAP_");
      gFlag++;
      gap[j]="Unfilled";
    }

    j=closestGap(gap,j1,gp);
    operations.push_back("_JMP_BCK_"+ intToString(gp));

    //cout<<"I am j "<<j<<endl;
    //cout<<"I am j1 "<<j1<<endl;

    if(j==j1)
      gap[j]="Filled";
  }

  if (j < j1) {
    operations.push_back("_INS_GAP_");
    gap[j] = "Unfilled";
    gFlag++;
    j=j1;
  }

  if(contFlag == 0) { // First words of the multi-word cept ...

    if(english == "_TRANS_SLF_") { // Unknown word ...
      operations.push_back("_TRANS_SLF_");
    } else {
      operations.push_back("_TRANS_" + english + "_TO_" + german);
    }

    //ans = firstOpenGap(coverageVector);
    ans = coverageVector.GetFirstGapPos();

    if (ans != -1)
      gapWidth += j - ans;

  } else if (contFlag == 2) {

    operations.push_back("_INS_" + german);
    ans = coverageVector.GetFirstGapPos();

    if (ans != -1)
      gapWidth += j - ans;
    deletionCount++;
  } else {
    operations.push_back("_CONT_CEPT_");
  }

  //coverageVector[j]=1;
  coverageVector.SetValue(j,1);
  j+=1;

  if(E<j)
    E=j;

  if (gFlag > 0)
    gapCount++;

  openGapCount += getOpenGaps();

  //if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
  if (j < coverageVector.GetSize()) {
    if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) {
      j1 = j;
      german = currF[j1-startIndex];
      english = "_INS_";
      generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
    }
  }

}