Esempio n. 1
0
void store( SyntaxTree &tree, vector< string > &words )
{
  // output words
  for( size_t i=0; i<words.size(); i++ ) {
    if (i>0) {
      cout << " ";
    }
    cout << words[i];
  }

  // output tree nodes
  vector< SyntaxNode* > nodes = tree.GetAllNodes();
  for( size_t i=0; i<nodes.size(); i++ ) {
    cout << " <tree span=\"" << nodes[i]->GetStart()
         << "-" << nodes[i]->GetEnd()
         << "\" label=\"" << nodes[i]->GetLabel()
         << "\"/>";
  }
  cout << endl;
}
void SAMT( SyntaxTree &tree, ParentNodes &parents )
{
	int numWords = tree.GetNumWords();

	SyntaxTree newTree; // to store new nodes

	// look through parents to combine children
	for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++)
	{
		const SplitPoints &point = *p;
		
		// neighboring childen: DET+ADJ
		if (point.size() >= 3) {
			// cerr << "complex parent: ";
			// for(int i=0;i<point.size();i++) cerr << point[i] << " ";
			// cerr << endl;

			for(int i = 0; i+2 < point.size(); i++)
			{
				// cerr << "\tadding " << point[i] << ";" << point[i+1] << ";" << (point[i+2]-1) << ": " << tree.GetNodes(point[i  ],point[i+1]-1)[0]->GetLabel() << "+" << tree.GetNodes(point[i+1],point[i+2]-1)[0]->GetLabel() << endl;
				
				newTree.AddNode( point[i],point[i+2]-1,
												 tree.GetNodes(point[i  ],point[i+1]-1)[0]->GetLabel() 
												 + "+" + 
												 tree.GetNodes(point[i+1],point[i+2]-1)[0]->GetLabel() );
			} 
		}
		if (point.size() >= 4) {
			int ps = point.size();
			string topLabel = tree.GetNodes(point[0],point[ps-1]-1)[0]->GetLabel();

			// cerr << "\tadding " << topLabel + "\\" + tree.GetNodes(point[0],point[1]-1)[0]->GetLabel() << endl;
			newTree.AddNode( point[1],point[ps-1]-1,
											 topLabel 
											 + "\\" +
											 tree.GetNodes(point[0],point[1]-1)[0]->GetLabel() );

			// cerr << "\tadding " << topLabel + "/" + tree.GetNodes(point[ps-2],point[ps-1]-1)[0]->GetLabel() << endl;
			newTree.AddNode( point[0],point[ps-2]-1,
											 topLabel 
											 + "/" +
											 tree.GetNodes(point[ps-2],point[ps-1]-1)[0]->GetLabel() );
		}
	}

	// rules for any bordering constituents...
	for(int size = 2; size < numWords; size++)
	{
		for(int start = 0; start < numWords-size+1; start++)
		{
			int end = start+size-1;
			bool done = false;

			if (tree.HasNode( start,end ) || newTree.HasNode( start,end )
					|| SAMTLevel <= 1)
			{
				continue;
			}

			// if matching two adjacent parse constituents: use ++
	
			for(int mid=start+1; mid<=end && !done; mid++)
			{
				if (tree.HasNode(start,mid-1) && tree.HasNode(mid,end)) {
					// cerr << "\tadding " << tree.GetNodes(start,mid-1)[0]->GetLabel() << "++" << tree.GetNodes(mid,  end  )[0]->GetLabel() << endl;

					newTree.AddNode( start, end, 
													 tree.GetNodes(start,mid-1)[0]->GetLabel() 
													 + "++" + 
													 tree.GetNodes(mid,  end  )[0]->GetLabel() );
					done = true;
				}
			}
			if (done) continue;

			// if matching a constituent A right-minus const. B: use A//B
			for(int postEnd=end+1; postEnd<numWords && !done; postEnd++)
			{
				if (tree.HasNode(start,postEnd) && tree.HasNode(end+1,postEnd))
				{
					newTree.AddNode( start, end, 
													 tree.GetNodes(start,postEnd)[0]->GetLabel() 
													 + "//" + 
													 tree.GetNodes(end+1,postEnd)[0]->GetLabel() );
					done = true;
				}
			}
			if (done) continue;

			// if matching a constituent A left-minus constituent B: use A\\B
			for(int preStart=start-1; preStart>=0; preStart--)
			{
				if (tree.HasNode(preStart,end) && tree.HasNode(preStart,start-1))
				{
					// cerr << "\tadding " << tree.GetNodes(preStart,end    )[0]->GetLabel() << "\\\\" <<tree.GetNodes(preStart,start-1)[0]->GetLabel() << endl;
					newTree.AddNode( start, end, 
													 tree.GetNodes(preStart,end    )[0]->GetLabel() 
													 + "\\\\" + 
													 tree.GetNodes(preStart,start-1)[0]->GetLabel() );
					done = true;
				}
			}
			if (done) continue;

      // if matching three consecutive constituents, use double-plus
			// SAMT Level 3, not yet implemented

			// else: assign default category _FAIL
			if (SAMTLevel>=4)
			{
				newTree.AddNode( start, end, "_FAIL" );
			}
		}
	}

	// adding all new nodes
	vector< SyntaxNode* > nodes = newTree.GetAllNodes();
	for( int i=0; i<nodes.size(); i++ )
	{
		tree.AddNode( nodes[i]->GetStart(), nodes[i]->GetEnd(), nodes[i]->GetLabel());
	}
}