void ExtractTask::saveAllHieroPhrases( int startT, int endT, int startS, int endS, HoleCollection &holeColl, int countS) { LabelIndex labelIndex,labelCount; // number of target head labels int numLabels = m_options.targetSyntax ? m_sentence.targetTree.GetNodes(startT,endT).size() : 1; if (m_options.targetSyntacticPreferences && !numLabels) { numLabels++; } labelCount.push_back(numLabels); labelIndex.push_back(0); // number of source head labels numLabels = m_options.sourceSyntax ? m_sentence.sourceTree.GetNodes(startS,endS).size() : 1; labelCount.push_back(numLabels); labelIndex.push_back(0); // number of target hole labels for( HoleList::const_iterator hole = holeColl.GetHoles().begin(); hole != holeColl.GetHoles().end(); hole++ ) { int numLabels = m_options.targetSyntax ? m_sentence.targetTree.GetNodes(hole->GetStart(1),hole->GetEnd(1)).size() : 1 ; if (m_options.targetSyntacticPreferences && !numLabels) { numLabels++; } labelCount.push_back(numLabels); labelIndex.push_back(0); } // number of source hole labels holeColl.SortSourceHoles(); for( vector<Hole*>::iterator i = holeColl.GetSortedSourceHoles().begin(); i != holeColl.GetSortedSourceHoles().end(); i++ ) { const Hole &hole = **i; int numLabels = m_options.sourceSyntax ? m_sentence.sourceTree.GetNodes(hole.GetStart(0),hole.GetEnd(0)).size() : 1 ; labelCount.push_back(numLabels); labelIndex.push_back(0); } // loop through the holes bool done = false; while(!done) { saveHieroPhrase( startT, endT, startS, endS, holeColl, labelIndex, countS ); for(unsigned int i=0; i<labelIndex.size(); i++) { labelIndex[i]++; if(labelIndex[i] == labelCount[i]) { labelIndex[i] = 0; if (i == labelIndex.size()-1) done = true; } else { break; } } } }
void printAllHieroPhrases( SentenceAlignmentWithSyntax &sentence , int startT, int endT, int startS, int endS , HoleCollection &holeColl) { LabelIndex labelIndex,labelCount; // number of target head labels int numLabels = options.targetSyntax ? sentence.targetTree.GetNodes(startT,endT).size() : 1; labelCount.push_back(numLabels); labelIndex.push_back(0); // number of source head labels numLabels = options.sourceSyntax ? sentence.sourceTree.GetNodes(startS,endS).size() : 1; labelCount.push_back(numLabels); labelIndex.push_back(0); // number of target hole labels for( HoleList::const_iterator hole = holeColl.GetHoles().begin(); hole != holeColl.GetHoles().end(); hole++ ) { int numLabels = options.targetSyntax ? sentence.targetTree.GetNodes(hole->GetStart(1),hole->GetEnd(1)).size() : 1 ; labelCount.push_back(numLabels); labelIndex.push_back(0); } // number of source hole labels holeColl.SortSourceHoles(); for( vector<Hole*>::iterator i = holeColl.GetSortedSourceHoles().begin(); i != holeColl.GetSortedSourceHoles().end(); i++ ) { const Hole &hole = **i; int numLabels = options.sourceSyntax ? sentence.sourceTree.GetNodes(hole.GetStart(0),hole.GetEnd(0)).size() : 1 ; labelCount.push_back(numLabels); labelIndex.push_back(0); } // loop through the holes bool done = false; while(!done) { printHieroPhrase( sentence, startT, endT, startS, endS, holeColl, labelIndex ); for(int i=0; i<labelIndex.size(); i++) { labelIndex[i]++; if(labelIndex[i] == labelCount[i]) { labelIndex[i] = 0; if (i == labelIndex.size()-1) done = true; } else { break; } } } }
string printSourceHieroPhrase( SentenceAlignmentWithSyntax &sentence , int startT, int endT, int startS, int endS , HoleCollection &holeColl, const LabelIndex &labelIndex) { vector<Hole*>::iterator iterHoleList = holeColl.GetSortedSourceHoles().begin(); assert(iterHoleList != holeColl.GetSortedSourceHoles().end()); bool stringToTree = !options.sourceSyntax && options.targetSyntax; string out = ""; int outPos = 0; int holeCount = 0; for(int currPos = startS; currPos <= endS; currPos++) { bool isHole = false; if (iterHoleList != holeColl.GetSortedSourceHoles().end()) { const Hole &hole = **iterHoleList; isHole = hole.GetStart(0) == currPos; } if (isHole) { Hole &hole = **iterHoleList; const string &targetLabel = hole.GetLabel(1); assert(targetLabel != ""); const string &sourceLabel = hole.GetLabel(0); if (stringToTree) { out += "[" + sourceLabel + "] "; } else { out += "[" + sourceLabel + "][" + targetLabel + "] "; } currPos = hole.GetEnd(0); hole.SetPos(outPos, 0); ++iterHoleList; ++holeCount; } else { out += sentence.source[currPos] + " "; } outPos++; } assert(iterHoleList == holeColl.GetSortedSourceHoles().end()); return out.erase(out.size()-1); }
string ExtractTask::saveSourceHieroPhrase( int startT, int endT, int startS, int endS , HoleCollection &holeColl, const LabelIndex &labelIndex) { vector<Hole*>::iterator iterHoleList = holeColl.GetSortedSourceHoles().begin(); assert(iterHoleList != holeColl.GetSortedSourceHoles().end()); string out = ""; int outPos = 0; int holeCount = 0; for(int currPos = startS; currPos <= endS; currPos++) { bool isHole = false; if (iterHoleList != holeColl.GetSortedSourceHoles().end()) { const Hole &hole = **iterHoleList; isHole = hole.GetStart(0) == currPos; } if (isHole) { Hole &hole = **iterHoleList; const string &targetLabel = hole.GetLabel(1); assert(targetLabel != ""); const string &sourceLabel = hole.GetLabel(0); if (m_options.unpairedExtractFormat) { out += "[" + sourceLabel + "] "; } else { out += "[" + sourceLabel + "][" + targetLabel + "] "; } currPos = hole.GetEnd(0); hole.SetPos(outPos, 0); ++iterHoleList; ++holeCount; } else { out += m_sentence.source[currPos] + " "; } outPos++; } assert(iterHoleList == holeColl.GetSortedSourceHoles().end()); return out.erase(out.size()-1); }
void preprocessSourceHieroPhrase( SentenceAlignmentWithSyntax &sentence , int startT, int endT, int startS, int endS , WordIndex &indexS, HoleCollection &holeColl, const LabelIndex &labelIndex) { vector<Hole*>::iterator iterHoleList = holeColl.GetSortedSourceHoles().begin(); assert(iterHoleList != holeColl.GetSortedSourceHoles().end()); int outPos = 0; int holeCount = 0; int holeTotal = holeColl.GetHoles().size(); for(int currPos = startS; currPos <= endS; currPos++) { bool isHole = false; if (iterHoleList != holeColl.GetSortedSourceHoles().end()) { const Hole &hole = **iterHoleList; isHole = hole.GetStart(0) == currPos; } if (isHole) { Hole &hole = **iterHoleList; int labelI = labelIndex[ 2+holeCount+holeTotal ]; string label = options.sourceSyntax ? sentence.sourceTree.GetNodes(currPos,hole.GetEnd(0))[ labelI ]->GetLabel() : "X"; hole.SetLabel(label, 0); currPos = hole.GetEnd(0); hole.SetPos(outPos, 0); ++iterHoleList; ++holeCount; } else { indexS[currPos] = outPos; } outPos++; } assert(iterHoleList == holeColl.GetSortedSourceHoles().end()); }