Exemplo n.º 1
0
void extract(SentenceAlignment &sentence) {
  int countE = sentence.target.size();
  int countF = sentence.source.size();
	
  HPhraseVector inboundPhrases;
	
  HSentenceVertices inTopLeft;
  HSentenceVertices inTopRight;
  HSentenceVertices inBottomLeft;
  HSentenceVertices inBottomRight;
	
  HSentenceVertices outTopLeft;
  HSentenceVertices outTopRight;
  HSentenceVertices outBottomLeft;
  HSentenceVertices outBottomRight;
	
  HSentenceVertices::const_iterator it;
	
  bool relaxLimit = hierModel;
  bool buildExtraStructure = phraseModel || hierModel;
	
  // check alignments for target phrase startE...endE
  // loop over extracted phrases which are compatible with the word-alignments
  for(int startE=0; startE<countE; ++startE) {
    for(int endE=startE; (endE<countE && (relaxLimit || endE<startE+maxPhraseLength)); ++endE) {
			
      int minF = 9999;
      int maxF = -1;
      vector<int> usedF = sentence.alignedCountS;
      for(int ei=startE;ei<=endE;++ei)
				for(int i=0;i<sentence.alignedToT[ei].size();++i) {
					int fi = sentence.alignedToT[ei][i];
					if (fi<minF) { minF = fi; }
					if (fi>maxF) { maxF = fi; }
					--usedF[fi];
				}
			
      if (maxF >= 0 && // aligned to any source words at all
					(relaxLimit || maxF-minF < maxPhraseLength)) { // source phrase within limits
				
				// check if source words are aligned to out of bound target words
				bool out_of_bounds = false;
				for(int fi=minF;fi<=maxF && !out_of_bounds;++fi)
					out_of_bounds = usedF[fi] > 0;
				
				// cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
				if (!out_of_bounds){
					// start point of source phrase may retreat over unaligned
					for(int startF=minF;
							(startF>=0 &&
							 (relaxLimit || startF>maxF-maxPhraseLength) && // within length limit
							 (startF==minF || sentence.alignedCountS[startF]==0)); // unaligned
							--startF)
						// end point of source phrase may advance over unaligned
						for(int endF=maxF;
								(endF<countF &&
								 (relaxLimit || endF<startF+maxPhraseLength) && // within length limit
								 (endF==maxF || sentence.alignedCountS[endF]==0)); // unaligned
								++endF){ // at this point we have extracted a phrase
							if(buildExtraStructure){ // phrase || hier
								if(endE-startE < maxPhraseLength && endF-startF < maxPhraseLength){ // within limit
									inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE),
																									 HPhraseVertex(endF,endE)));
									insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight,
																			 startF, startE, endF, endE);
								} else
									insertPhraseVertices(outTopLeft, outTopRight, outBottomLeft, outBottomRight,
																			 startF, startE, endF, endE);
							} else {
								string orientationInfo = "";
								if(wordModel) {
									REO_POS wordPrevOrient, wordNextOrient;
									bool connectedLeftTopP  = isAligned( sentence, startF-1, startE-1 );
									bool connectedRightTopP = isAligned( sentence, endF+1,   startE-1 );
									bool connectedLeftTopN  = isAligned( sentence, endF+1, endE+1 );
									bool connectedRightTopN = isAligned( sentence, startF-1,   endE+1 );
									wordPrevOrient = getOrientWordModel(sentence, wordType, connectedLeftTopP, connectedRightTopP, startF, endF, startE, endE, countF, 0, 1, &ge, &lt);
									wordNextOrient = getOrientWordModel(sentence, wordType, connectedLeftTopN, connectedRightTopN, endF, startF, endE, startE, 0, countF, -1, &lt, &ge);
									orientationInfo += getOrientString(wordPrevOrient, wordType) + " " + getOrientString(wordNextOrient, wordType);
//									if(allModelsOutputFlag)
//										" | | ";
								}
								addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
							}
						}
				}
      }
    }
  }
	
  if(buildExtraStructure){ // phrase || hier
    string orientationInfo = "";
    REO_POS wordPrevOrient, wordNextOrient, phrasePrevOrient, phraseNextOrient, hierPrevOrient, hierNextOrient;
		
    for(size_t i = 0; i < inboundPhrases.size(); ++i){
      int startF = inboundPhrases[i].first.first;
      int startE = inboundPhrases[i].first.second;
      int endF = inboundPhrases[i].second.first;
      int endE = inboundPhrases[i].second.second;
			
      bool connectedLeftTopP  = isAligned(sentence, startF-1, startE-1);
      bool connectedRightTopP = isAligned(sentence, endF+1,   startE-1);
      bool connectedLeftTopN  = isAligned(sentence, endF+1,   endE+1);
      bool connectedRightTopN = isAligned(sentence, startF-1, endE+1);
      
      if(wordModel){
				wordPrevOrient = getOrientWordModel(sentence, wordType,
																						connectedLeftTopP, connectedRightTopP,
																						startF, endF, startE, endE, countF, 0, 1,
																						&ge, &lt);
				wordNextOrient = getOrientWordModel(sentence, wordType,
																						connectedLeftTopN, connectedRightTopN,
																						endF, startF, endE, startE, 0, countF, -1,
																						&lt, &ge);
      }
			
      if (phraseModel) {
				phrasePrevOrient = getOrientPhraseModel(sentence, phraseType, 
																								connectedLeftTopP, connectedRightTopP,
																								startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft);
				phraseNextOrient = getOrientPhraseModel(sentence, phraseType,
																								connectedLeftTopN, connectedRightTopN,
																								endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight);
      } else
				phrasePrevOrient = phraseNextOrient = UNKNOWN;
			
      if(hierModel){
				hierPrevOrient = getOrientHierModel(sentence, hierType, 
																						connectedLeftTopP, connectedRightTopP,
																						startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft, outBottomRight, outBottomLeft, phrasePrevOrient);
				hierNextOrient = getOrientHierModel(sentence, hierType,
																						connectedLeftTopN, connectedRightTopN,
																						endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight, outBottomLeft, outBottomRight, phraseNextOrient);
      }
			
      orientationInfo = ((wordModel)? getOrientString(wordPrevOrient, wordType) + " " + getOrientString(wordNextOrient, wordType) : "") + " | " +
			((phraseModel)? getOrientString(phrasePrevOrient, phraseType) + " " + getOrientString(phraseNextOrient, phraseType) : "") + " | " +
			((hierModel)? getOrientString(hierPrevOrient, hierType) + " " + getOrientString(hierNextOrient, hierType) : "");
			
      addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
    }
  }
}
void ExtractTask::extract(SentenceAlignment &sentence)
{
  int countE = sentence.target.size();
  int countF = sentence.source.size();

  HPhraseVector inboundPhrases;

  HSentenceVertices inTopLeft;
  HSentenceVertices inTopRight;
  HSentenceVertices inBottomLeft;
  HSentenceVertices inBottomRight;

  HSentenceVertices outTopLeft;
  HSentenceVertices outTopRight;
  HSentenceVertices outBottomLeft;
  HSentenceVertices outBottomRight;

  HSentenceVertices::const_iterator it;

  bool relaxLimit = m_options.isHierModel();
  bool buildExtraStructure = m_options.isPhraseModel() || m_options.isHierModel();

  // check alignments for target phrase startE...endE
  // loop over extracted phrases which are compatible with the word-alignments
  for(int startE=0; startE<countE; startE++) {
    for(int endE=startE;
        (endE<countE && (relaxLimit || endE<startE+m_options.maxPhraseLength));
        endE++) {

      int minF = 9999;
      int maxF = -1;
      vector< int > usedF = sentence.alignedCountS;
      for(int ei=startE; ei<=endE; ei++) {
        for(size_t i=0; i<sentence.alignedToT[ei].size(); i++) {
          int fi = sentence.alignedToT[ei][i];
          if (fi<minF) {
            minF = fi;
          }
          if (fi>maxF) {
            maxF = fi;
          }
          usedF[ fi ]--;
        }
      }

      if (maxF >= 0 && // aligned to any source words at all
          (relaxLimit || maxF-minF < m_options.maxPhraseLength)) { // source phrase within limits

        // check if source words are aligned to out of bound target words
        bool out_of_bounds = false;
        for(int fi=minF; fi<=maxF && !out_of_bounds; fi++)
          if (usedF[fi]>0) {
            // cout << "ouf of bounds: " << fi << "\n";
            out_of_bounds = true;
          }

        // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
        if (!out_of_bounds) {
          // start point of source phrase may retreat over unaligned
          for(int startF=minF;
              (startF>=0 &&
               (relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
               (startF==minF || sentence.alignedCountS[startF]==0)); // unaligned
              startF--)
            // end point of source phrase may advance over unaligned
            for(int endF=maxF;
                (endF<countF &&
                 (relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
                 (endF - startF + 1 > m_options.minPhraseLength) && // within length limit
                 (endF==maxF || sentence.alignedCountS[endF]==0)); // unaligned
                endF++) { // at this point we have extracted a phrase
              if(buildExtraStructure) { // phrase || hier
                if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) { // within limit
                  inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE),
                                                   HPhraseVertex(endF,endE)));
                  insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight,
                                       startF, startE, endF, endE);
                } else
                  insertPhraseVertices(outTopLeft, outTopRight, outBottomLeft, outBottomRight,
                                       startF, startE, endF, endE);
              } else {
                string orientationInfo = "";
                if(m_options.isWordModel()) {
                  REO_POS wordPrevOrient, wordNextOrient;
                  bool connectedLeftTopP  = isAligned( sentence, startF-1, startE-1 );
                  bool connectedRightTopP = isAligned( sentence, endF+1,   startE-1 );
                  bool connectedLeftTopN  = isAligned( sentence, endF+1, endE+1 );
                  bool connectedRightTopN = isAligned( sentence, startF-1,   endE+1 );
                  wordPrevOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopP, connectedRightTopP, startF, endF, startE, endE, countF, 0, 1, &ge, &lt);
                  wordNextOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopN, connectedRightTopN, endF, startF, endE, startE, 0, countF, -1, &lt, &ge);
                  orientationInfo += getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType());
                  if(m_options.isAllModelsOutputFlag())
                    " | | ";
                }
                addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
              }
            }
        }
      }
    }
  }


}