Пример #1
0
void ExtractTask::saveAllHieroPhrases( int startT, int endT, int startS, int endS, HoleCollection &holeColl, int countS)
{
  LabelIndex labelIndex,labelCount;

  // number of target head labels
  int numLabels = m_options.targetSyntax ? m_sentence.targetTree.GetNodes(startT,endT).size() : 1;
  if (m_options.targetSyntacticPreferences && !numLabels) {
    numLabels++;
  }
  labelCount.push_back(numLabels);
  labelIndex.push_back(0);

  // number of source head labels
  numLabels =  m_options.sourceSyntax ? m_sentence.sourceTree.GetNodes(startS,endS).size() : 1;
  labelCount.push_back(numLabels);
  labelIndex.push_back(0);

  // number of target hole labels
  for( HoleList::const_iterator hole = holeColl.GetHoles().begin();
       hole != holeColl.GetHoles().end(); hole++ ) {
    int numLabels = m_options.targetSyntax ? m_sentence.targetTree.GetNodes(hole->GetStart(1),hole->GetEnd(1)).size() : 1 ;
    if (m_options.targetSyntacticPreferences && !numLabels) {
      numLabels++;
    }
    labelCount.push_back(numLabels);
    labelIndex.push_back(0);
  }

  // number of source hole labels
  holeColl.SortSourceHoles();
  for( vector<Hole*>::iterator i = holeColl.GetSortedSourceHoles().begin();
       i != holeColl.GetSortedSourceHoles().end(); i++ ) {
    const Hole &hole = **i;
    int numLabels =  m_options.sourceSyntax ? m_sentence.sourceTree.GetNodes(hole.GetStart(0),hole.GetEnd(0)).size() : 1 ;
    labelCount.push_back(numLabels);
    labelIndex.push_back(0);
  }

  // loop through the holes
  bool done = false;
  while(!done) {
    saveHieroPhrase( startT, endT, startS, endS, holeColl, labelIndex, countS );
    for(unsigned int i=0; i<labelIndex.size(); i++) {
      labelIndex[i]++;
      if(labelIndex[i] == labelCount[i]) {
        labelIndex[i] = 0;
        if (i == labelIndex.size()-1)
          done = true;
      } else {
        break;
      }
    }
  }
}
Пример #2
0
void printAllHieroPhrases( SentenceAlignmentWithSyntax &sentence
                           , int startT, int endT, int startS, int endS
                           , HoleCollection &holeColl)
{
  LabelIndex labelIndex,labelCount;

  // number of target head labels
  int numLabels = options.targetSyntax ? sentence.targetTree.GetNodes(startT,endT).size() : 1;
  labelCount.push_back(numLabels);
  labelIndex.push_back(0);

  // number of source head labels
  numLabels =  options.sourceSyntax ? sentence.sourceTree.GetNodes(startS,endS).size() : 1;
  labelCount.push_back(numLabels);
  labelIndex.push_back(0);

  // number of target hole labels
  for( HoleList::const_iterator hole = holeColl.GetHoles().begin();
       hole != holeColl.GetHoles().end(); hole++ ) {
    int numLabels =  options.targetSyntax ? sentence.targetTree.GetNodes(hole->GetStart(1),hole->GetEnd(1)).size() : 1 ;
    labelCount.push_back(numLabels);
    labelIndex.push_back(0);
  }

  // number of source hole labels
  holeColl.SortSourceHoles();
  for( vector<Hole*>::iterator i = holeColl.GetSortedSourceHoles().begin();
       i != holeColl.GetSortedSourceHoles().end(); i++ ) {
    const Hole &hole = **i;
    int numLabels =  options.sourceSyntax ? sentence.sourceTree.GetNodes(hole.GetStart(0),hole.GetEnd(0)).size() : 1 ;
    labelCount.push_back(numLabels);
    labelIndex.push_back(0);
  }

  // loop through the holes
  bool done = false;
  while(!done) {
    printHieroPhrase( sentence, startT, endT, startS, endS, holeColl, labelIndex );
    for(int i=0; i<labelIndex.size(); i++) {
      labelIndex[i]++;
      if(labelIndex[i] == labelCount[i]) {
        labelIndex[i] = 0;
        if (i == labelIndex.size()-1)
          done = true;
      } else {
        break;
      }
    }
  }
}
Пример #3
0
string printSourceHieroPhrase( SentenceAlignmentWithSyntax &sentence
                               , int startT, int endT, int startS, int endS
                               , HoleCollection &holeColl, const LabelIndex &labelIndex)
{
  vector<Hole*>::iterator iterHoleList = holeColl.GetSortedSourceHoles().begin();
  assert(iterHoleList != holeColl.GetSortedSourceHoles().end());

  bool stringToTree = !options.sourceSyntax && options.targetSyntax;

  string out = "";
  int outPos = 0;
  int holeCount = 0;
  for(int currPos = startS; currPos <= endS; currPos++) {
    bool isHole = false;
    if (iterHoleList != holeColl.GetSortedSourceHoles().end()) {
      const Hole &hole = **iterHoleList;
      isHole = hole.GetStart(0) == currPos;
    }

    if (isHole) {
      Hole &hole = **iterHoleList;

      const string &targetLabel = hole.GetLabel(1);
      assert(targetLabel != "");

      const string &sourceLabel =  hole.GetLabel(0);

      if (stringToTree) {
        out += "[" + sourceLabel + "] ";
      } else {
        out += "[" + sourceLabel + "][" + targetLabel + "] ";
      }

      currPos = hole.GetEnd(0);
      hole.SetPos(outPos, 0);
      ++iterHoleList;
      ++holeCount;
    } else {
      out += sentence.source[currPos] + " ";
    }

    outPos++;
  }

  assert(iterHoleList == holeColl.GetSortedSourceHoles().end());
  return out.erase(out.size()-1);
}
string ExtractTask::saveSourceHieroPhrase( int startT, int endT, int startS, int endS
    , HoleCollection &holeColl, const LabelIndex &labelIndex)
{
  vector<Hole*>::iterator iterHoleList = holeColl.GetSortedSourceHoles().begin();
  assert(iterHoleList != holeColl.GetSortedSourceHoles().end());

  string out = "";
  int outPos = 0;
  int holeCount = 0;
  for(int currPos = startS; currPos <= endS; currPos++) {
    bool isHole = false;
    if (iterHoleList != holeColl.GetSortedSourceHoles().end()) {
      const Hole &hole = **iterHoleList;
      isHole = hole.GetStart(0) == currPos;
    }

    if (isHole) {
      Hole &hole = **iterHoleList;

      const string &targetLabel = hole.GetLabel(1);
      assert(targetLabel != "");

      const string &sourceLabel =  hole.GetLabel(0);
      if (m_options.unpairedExtractFormat) {
        out += "[" + sourceLabel + "] ";
      } else {
        out += "[" + sourceLabel + "][" + targetLabel + "] ";
      }

      currPos = hole.GetEnd(0);
      hole.SetPos(outPos, 0);
      ++iterHoleList;
      ++holeCount;
    } else {
      out += m_sentence.source[currPos] + " ";
    }

    outPos++;
  }

  assert(iterHoleList == holeColl.GetSortedSourceHoles().end());
  return out.erase(out.size()-1);
}
Пример #5
0
void preprocessSourceHieroPhrase( SentenceAlignmentWithSyntax &sentence
                                  , int startT, int endT, int startS, int endS
                                  , WordIndex &indexS, HoleCollection &holeColl, const LabelIndex &labelIndex)
{
  vector<Hole*>::iterator iterHoleList = holeColl.GetSortedSourceHoles().begin();
  assert(iterHoleList != holeColl.GetSortedSourceHoles().end());

  int outPos = 0;
  int holeCount = 0;
  int holeTotal = holeColl.GetHoles().size();
  for(int currPos = startS; currPos <= endS; currPos++) {
    bool isHole = false;
    if (iterHoleList != holeColl.GetSortedSourceHoles().end()) {
      const Hole &hole = **iterHoleList;
      isHole = hole.GetStart(0) == currPos;
    }

    if (isHole) {
      Hole &hole = **iterHoleList;

      int labelI = labelIndex[ 2+holeCount+holeTotal ];
      string label = options.sourceSyntax ?
                     sentence.sourceTree.GetNodes(currPos,hole.GetEnd(0))[ labelI ]->GetLabel() : "X";
      hole.SetLabel(label, 0);

      currPos = hole.GetEnd(0);
      hole.SetPos(outPos, 0);
      ++iterHoleList;
      ++holeCount;
    } else {
      indexS[currPos] = outPos;
    }

    outPos++;
  }

  assert(iterHoleList == holeColl.GetSortedSourceHoles().end());
}