void LeftBinarize( SyntaxTree &tree, ParentNodes &parents ) { for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++) { const SplitPoints &point = *p; if (point.size() > 3) { const vector< SyntaxNode* >& topNodes = tree.GetNodes( point[0], point[point.size()-1]-1); string topLabel = topNodes[0]->GetLabel(); for(size_t i=2; i<point.size()-1; i++) { // cerr << "LeftBin " << point[0] << "-" << (point[point.size()-1]-1) << ": " << point[0] << "-" << point[i]-1 << " ^" << topLabel << endl; tree.AddNode( point[0], point[i]-1, "^" + topLabel ); } } } }
void RightBinarize( SyntaxNodeCollection &tree, ParentNodes &parents ) { for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++) { const SplitPoints &point = *p; if (point.size() > 3) { int endPoint = point[point.size()-1]-1; const vector< SyntaxNode* >& topNodes = tree.GetNodes( point[0], endPoint); string topLabel = topNodes[0]->label; for(size_t i=1; i<point.size()-2; i++) { // cerr << "RightBin " << point[0] << "-" << (point[point.size()-1]-1) << ": " << point[i] << "-" << endPoint << " ^" << topLabel << endl; tree.AddNode( point[i], endPoint, "^" + topLabel ); } } } }
void SAMT( SyntaxTree &tree, ParentNodes &parents ) { int numWords = tree.GetNumWords(); SyntaxTree newTree; // to store new nodes // look through parents to combine children for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++) { const SplitPoints &point = *p; // neighboring childen: DET+ADJ if (point.size() >= 3) { // cerr << "complex parent: "; // for(int i=0;i<point.size();i++) cerr << point[i] << " "; // cerr << endl; for(int i = 0; i+2 < point.size(); i++) { // cerr << "\tadding " << point[i] << ";" << point[i+1] << ";" << (point[i+2]-1) << ": " << tree.GetNodes(point[i ],point[i+1]-1)[0]->GetLabel() << "+" << tree.GetNodes(point[i+1],point[i+2]-1)[0]->GetLabel() << endl; newTree.AddNode( point[i],point[i+2]-1, tree.GetNodes(point[i ],point[i+1]-1)[0]->GetLabel() + "+" + tree.GetNodes(point[i+1],point[i+2]-1)[0]->GetLabel() ); } } if (point.size() >= 4) { int ps = point.size(); string topLabel = tree.GetNodes(point[0],point[ps-1]-1)[0]->GetLabel(); // cerr << "\tadding " << topLabel + "\\" + tree.GetNodes(point[0],point[1]-1)[0]->GetLabel() << endl; newTree.AddNode( point[1],point[ps-1]-1, topLabel + "\\" + tree.GetNodes(point[0],point[1]-1)[0]->GetLabel() ); // cerr << "\tadding " << topLabel + "/" + tree.GetNodes(point[ps-2],point[ps-1]-1)[0]->GetLabel() << endl; newTree.AddNode( point[0],point[ps-2]-1, topLabel + "/" + tree.GetNodes(point[ps-2],point[ps-1]-1)[0]->GetLabel() ); } } // rules for any bordering constituents... for(int size = 2; size < numWords; size++) { for(int start = 0; start < numWords-size+1; start++) { int end = start+size-1; bool done = false; if (tree.HasNode( start,end ) || newTree.HasNode( start,end ) || SAMTLevel <= 1) { continue; } // if matching two adjacent parse constituents: use ++ for(int mid=start+1; mid<=end && !done; mid++) { if (tree.HasNode(start,mid-1) && tree.HasNode(mid,end)) { // cerr << "\tadding " << tree.GetNodes(start,mid-1)[0]->GetLabel() << "++" << tree.GetNodes(mid, end )[0]->GetLabel() << endl; newTree.AddNode( start, end, tree.GetNodes(start,mid-1)[0]->GetLabel() + "++" + tree.GetNodes(mid, end )[0]->GetLabel() ); done = true; } } if (done) continue; // if matching a constituent A right-minus const. B: use A//B for(int postEnd=end+1; postEnd<numWords && !done; postEnd++) { if (tree.HasNode(start,postEnd) && tree.HasNode(end+1,postEnd)) { newTree.AddNode( start, end, tree.GetNodes(start,postEnd)[0]->GetLabel() + "//" + tree.GetNodes(end+1,postEnd)[0]->GetLabel() ); done = true; } } if (done) continue; // if matching a constituent A left-minus constituent B: use A\\B for(int preStart=start-1; preStart>=0; preStart--) { if (tree.HasNode(preStart,end) && tree.HasNode(preStart,start-1)) { // cerr << "\tadding " << tree.GetNodes(preStart,end )[0]->GetLabel() << "\\\\" <<tree.GetNodes(preStart,start-1)[0]->GetLabel() << endl; newTree.AddNode( start, end, tree.GetNodes(preStart,end )[0]->GetLabel() + "\\\\" + tree.GetNodes(preStart,start-1)[0]->GetLabel() ); done = true; } } if (done) continue; // if matching three consecutive constituents, use double-plus // SAMT Level 3, not yet implemented // else: assign default category _FAIL if (SAMTLevel>=4) { newTree.AddNode( start, end, "_FAIL" ); } } } // adding all new nodes vector< SyntaxNode* > nodes = newTree.GetAllNodes(); for( int i=0; i<nodes.size(); i++ ) { tree.AddNode( nodes[i]->GetStart(), nodes[i]->GetEnd(), nodes[i]->GetLabel()); } }