예제 #1
0
void LeftBinarize( SyntaxTree &tree, ParentNodes &parents )
{
  for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++) {
    const SplitPoints &point = *p;
    if (point.size() > 3) {
      const vector< SyntaxNode* >& topNodes
      = tree.GetNodes( point[0], point[point.size()-1]-1);
      string topLabel = topNodes[0]->GetLabel();

      for(size_t i=2; i<point.size()-1; i++) {
        // cerr << "LeftBin  " << point[0] << "-" << (point[point.size()-1]-1) << ": " << point[0] << "-" << point[i]-1 << " ^" << topLabel << endl;
        tree.AddNode( point[0], point[i]-1, "^" + topLabel );
      }
    }
  }
}
예제 #2
0
void RightBinarize( SyntaxNodeCollection &tree, ParentNodes &parents )
{
    for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++) {
        const SplitPoints &point = *p;
        if (point.size() > 3) {
            int endPoint = point[point.size()-1]-1;
            const vector< SyntaxNode* >& topNodes
                = tree.GetNodes( point[0], endPoint);
            string topLabel = topNodes[0]->label;

            for(size_t i=1; i<point.size()-2; i++) {
                // cerr << "RightBin " << point[0] << "-" << (point[point.size()-1]-1) << ": " << point[i] << "-" << endPoint << " ^" << topLabel << endl;
                tree.AddNode( point[i], endPoint, "^" + topLabel );
            }
        }
    }
}
예제 #3
0
ParentNodes determineSplitPoints(const SyntaxNodeCollection &nodeColl)
{
    ParentNodes parents;

    const std::size_t numWords = nodeColl.GetNumWords();

    // looping through all spans of size >= 2
    for( int length=2; length<=numWords; length++ ) {
        for( int startPos = 0; startPos <= numWords-length; startPos++ ) {
            if (nodeColl.HasNode( startPos, startPos+length-1 )) {
                // processing one (parent) span

                //std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
                SplitPoints splitPoints;
                splitPoints.push_back( startPos );
                //std::cerr << " " << startPos;

                int first = 1;
                int covered = 0;
                int found_somehing = 1; // break loop if nothing found
                while( covered < length && found_somehing ) {
                    // find largest covering subspan (child)
                    // starting at last covered position
                    found_somehing = 0;
                    for( int midPos=length-first; midPos>covered; midPos-- ) {
                        if( nodeColl.HasNode( startPos+covered, startPos+midPos-1 ) ) {
                            covered = midPos;
                            splitPoints.push_back( startPos+covered );
                            // std::cerr << " " << ( startPos+covered );
                            first = 0;
                            found_somehing = 1;
                        }
                    }
                }
                // std::cerr << std::endl;
                parents.push_back( splitPoints );
            }
        }
    }
    return parents;
}
ParentNodes SyntaxTree::Parse()
{
  ParentNodes parents;

  int size = m_index.size();

  // looping through all spans of size >= 2
  for( int length=2; length<=size; length++ ) {
    for( int startPos = 0; startPos <= size-length; startPos++ ) {
      if (HasNode( startPos, startPos+length-1 )) {
        // processing one (parent) span

        //std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
        SplitPoints splitPoints;
        splitPoints.push_back( startPos );
        //std::cerr << " " << startPos;

        int first = 1;
        int covered = 0;
        while( covered < length ) {
          // find largest covering subspan (child)
          // starting at last covered position
          for( int midPos=length-first; midPos>covered; midPos-- ) {
            if( HasNode( startPos+covered, startPos+midPos-1 ) ) {
              covered = midPos;
              splitPoints.push_back( startPos+covered );
              // std::cerr << " " << ( startPos+covered );
              first = 0;
            }
          }
        }
        // std::cerr << std::endl;
        parents.push_back( splitPoints );
      }
    }
  }
  return parents;
}
예제 #5
0
void SAMT( SyntaxTree &tree, ParentNodes &parents )
{
	int numWords = tree.GetNumWords();

	SyntaxTree newTree; // to store new nodes

	// look through parents to combine children
	for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++)
	{
		const SplitPoints &point = *p;
		
		// neighboring childen: DET+ADJ
		if (point.size() >= 3) {
			// cerr << "complex parent: ";
			// for(int i=0;i<point.size();i++) cerr << point[i] << " ";
			// cerr << endl;

			for(int i = 0; i+2 < point.size(); i++)
			{
				// cerr << "\tadding " << point[i] << ";" << point[i+1] << ";" << (point[i+2]-1) << ": " << tree.GetNodes(point[i  ],point[i+1]-1)[0]->GetLabel() << "+" << tree.GetNodes(point[i+1],point[i+2]-1)[0]->GetLabel() << endl;
				
				newTree.AddNode( point[i],point[i+2]-1,
												 tree.GetNodes(point[i  ],point[i+1]-1)[0]->GetLabel() 
												 + "+" + 
												 tree.GetNodes(point[i+1],point[i+2]-1)[0]->GetLabel() );
			} 
		}
		if (point.size() >= 4) {
			int ps = point.size();
			string topLabel = tree.GetNodes(point[0],point[ps-1]-1)[0]->GetLabel();

			// cerr << "\tadding " << topLabel + "\\" + tree.GetNodes(point[0],point[1]-1)[0]->GetLabel() << endl;
			newTree.AddNode( point[1],point[ps-1]-1,
											 topLabel 
											 + "\\" +
											 tree.GetNodes(point[0],point[1]-1)[0]->GetLabel() );

			// cerr << "\tadding " << topLabel + "/" + tree.GetNodes(point[ps-2],point[ps-1]-1)[0]->GetLabel() << endl;
			newTree.AddNode( point[0],point[ps-2]-1,
											 topLabel 
											 + "/" +
											 tree.GetNodes(point[ps-2],point[ps-1]-1)[0]->GetLabel() );
		}
	}

	// rules for any bordering constituents...
	for(int size = 2; size < numWords; size++)
	{
		for(int start = 0; start < numWords-size+1; start++)
		{
			int end = start+size-1;
			bool done = false;

			if (tree.HasNode( start,end ) || newTree.HasNode( start,end )
					|| SAMTLevel <= 1)
			{
				continue;
			}

			// if matching two adjacent parse constituents: use ++
	
			for(int mid=start+1; mid<=end && !done; mid++)
			{
				if (tree.HasNode(start,mid-1) && tree.HasNode(mid,end)) {
					// cerr << "\tadding " << tree.GetNodes(start,mid-1)[0]->GetLabel() << "++" << tree.GetNodes(mid,  end  )[0]->GetLabel() << endl;

					newTree.AddNode( start, end, 
													 tree.GetNodes(start,mid-1)[0]->GetLabel() 
													 + "++" + 
													 tree.GetNodes(mid,  end  )[0]->GetLabel() );
					done = true;
				}
			}
			if (done) continue;

			// if matching a constituent A right-minus const. B: use A//B
			for(int postEnd=end+1; postEnd<numWords && !done; postEnd++)
			{
				if (tree.HasNode(start,postEnd) && tree.HasNode(end+1,postEnd))
				{
					newTree.AddNode( start, end, 
													 tree.GetNodes(start,postEnd)[0]->GetLabel() 
													 + "//" + 
													 tree.GetNodes(end+1,postEnd)[0]->GetLabel() );
					done = true;
				}
			}
			if (done) continue;

			// if matching a constituent A left-minus constituent B: use A\\B
			for(int preStart=start-1; preStart>=0; preStart--)
			{
				if (tree.HasNode(preStart,end) && tree.HasNode(preStart,start-1))
				{
					// cerr << "\tadding " << tree.GetNodes(preStart,end    )[0]->GetLabel() << "\\\\" <<tree.GetNodes(preStart,start-1)[0]->GetLabel() << endl;
					newTree.AddNode( start, end, 
													 tree.GetNodes(preStart,end    )[0]->GetLabel() 
													 + "\\\\" + 
													 tree.GetNodes(preStart,start-1)[0]->GetLabel() );
					done = true;
				}
			}
			if (done) continue;

      // if matching three consecutive constituents, use double-plus
			// SAMT Level 3, not yet implemented

			// else: assign default category _FAIL
			if (SAMTLevel>=4)
			{
				newTree.AddNode( start, end, "_FAIL" );
			}
		}
	}

	// adding all new nodes
	vector< SyntaxNode* > nodes = newTree.GetAllNodes();
	for( int i=0; i<nodes.size(); i++ )
	{
		tree.AddNode( nodes[i]->GetStart(), nodes[i]->GetEnd(), nodes[i]->GetLabel());
	}
}