Ejemplo n.º 1
0
int main(int argc, char ** argv) {
  GOOGLE_PROTOBUF_VERIFY_VERSION;
  google::ParseCommandLineFlags(&argc, &argv, true);

  int sent = 0;
  int last_nonterm_node = -1;
  string name = FLAGS_hypergraph_prefix;
  Hypergraph * h;
  vector <Hypergraph_Node *> nodes(10);
  int cur_edge_id = 0;
  int cur_word_node_id = 0;
  ifstream in(FLAGS_joshua_out_file.c_str(), ios::in | ios::binary);
  Hypergraph_Node * node;
  Hypergraph_Edge * edge;
  int sent_num, length, num_nodes,num_edges;
  //CodedOutputStream::SetTotalBytesLimit(5000000000, 5000000000);
  
  while (in) {
    string blank;
    string t1;
    in >> t1;
    //t = l.strip().split();
    if (t1 == "#SENT:") {
      // flush last sent
      if (sent!= 0) {

        // need to add <s> and </s>
        int subroot = last_nonterm_node;
        int newroot;

        {
          node = h->add_node();
          node->set_label("NEW ROOT");
          edge = node->add_edge();
          Hypergraph_Node * wnode;
          for (int start=0; start < 2; start++) {
            wnode = h->add_node();
            wnode->set_id(cur_word_node_id);
            wnode->set_label("Front");
            wnode->SetExtension(is_word, true);
            wnode->SetExtension(word, "<s>");
            edge->add_tail_node_ids(cur_word_node_id);
            cur_word_node_id++;
          }
          edge->add_tail_node_ids(subroot);
          for (int end=0; end < 2; end++) {
            wnode = h->add_node();
            wnode->set_id(cur_word_node_id);
            wnode->set_label("Back");
            wnode->SetExtension(is_word, true);
            wnode->SetExtension(word, "</s>");
            edge->add_tail_node_ids(cur_word_node_id);
            cur_word_node_id++;
          }
          node->set_id(cur_word_node_id);
          edge->set_id(cur_edge_id);
          cur_edge_id++;
          h->set_root(cur_word_node_id);
          cur_word_node_id++;
        }

        stringstream file_name;
        file_name << name << sent;
        fstream output(file_name.str().c_str(), ios::out | ios::binary);
        h->SerializeToOstream(&output);
        output.close();
      }

      // prep new sent
      h = new Hypergraph();
      cur_edge_id = 0 ;
      sent +=1;
      nodes.clear();

      string buf;
      in >> sent_num >> length >> num_nodes >> num_edges; 
      // need to add nodes for each word
      cur_word_node_id = num_nodes;
      getline(in, buf);

    } else if (t1 == "#I") {
      node = h->add_node();
      int id, left_span, right_span;
      string sym;
      string ig1, ig2, ig3;
      in >> id >> left_span >> right_span >> sym >> ig1 >> ig2 >> ig3;

      stringstream label;
      label << id -1 << " ["<< left_span << ", "<<right_span << "] " <<sym;
      node->set_id(id-1);
      node->set_label(label.str());
      if (nodes.size()<=node->id()) { 
        nodes.resize(node->id()+1);
      }
      nodes[node->id()] = node;
      last_nonterm_node = node->id();      
      cout << "Node id " << node->id() << endl;
    } else {
Ejemplo n.º 2
0
void Manager::SerializeSearchGraphPB(
    long translationId,
    std::ostream& outputStream) const {
	using namespace hgmert;
  std::map < int, bool > connected;
  std::map < int, int > i2hgnode;
  std::vector< const Hypothesis *> connectedList;
	GetConnectedGraph(&connected, &connectedList);
  connected[ 0 ] = true;
  Hypergraph hg;
	hg.set_is_sorted(false);
	int num_feats = (*m_search->GetHypothesisStacks().back()->begin())->GetScoreBreakdown().size();
	hg.set_num_features(num_feats);
	StaticData::Instance().GetScoreIndexManager().SerializeFeatureNamesToPB(&hg);
	Hypergraph_Node* goal = hg.add_nodes();  // idx=0 goal node must have idx 0
	Hypergraph_Node* source = hg.add_nodes();  // idx=1
	i2hgnode[-1] = 1; // source node
  const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
  const HypothesisStack &finalStack = *hypoStackColl.back();
  for (std::vector < HypothesisStack* >::const_iterator iterStack = hypoStackColl.begin();
	  iterStack != hypoStackColl.end() ; ++iterStack)
  {
    const HypothesisStack &stack = **iterStack;
    HypothesisStack::const_iterator iterHypo;
		
    for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
    {
      const Hypothesis *hypo = *iterHypo;
			bool is_goal = hypo->GetWordsBitmap().IsComplete();
      if (connected.find( hypo->GetId() ) != connected.end())
      {
				int headNodeIdx;
				Hypergraph_Node* headNode = GetHGNode(hypo, &i2hgnode, &hg, &headNodeIdx);
				if (is_goal) {
					Hypergraph_Edge* ge = hg.add_edges();
					ge->set_head_node(0);  // goal
					ge->add_tail_nodes(headNodeIdx);
				  ge->mutable_rule()->add_trg_words("[X,1]");
				}
				Hypergraph_Edge* edge = hg.add_edges();
				SerializeEdgeInfo(hypo, edge);
				edge->set_head_node(headNodeIdx);
				const Hypothesis* prev = hypo->GetPrevHypo();
				int tailNodeIdx = 1; // source
				if (prev)
				  tailNodeIdx = i2hgnode.find(prev->GetId())->second;
				edge->add_tail_nodes(tailNodeIdx);

        const ArcList *arcList = hypo->GetArcList();
        if (arcList != NULL)
        {
          ArcList::const_iterator iterArcList;
          for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
          {
            const Hypothesis *loserHypo = *iterArcList;
						assert(connected[loserHypo->GetId()]);
						Hypergraph_Edge* edge = hg.add_edges();
						SerializeEdgeInfo(loserHypo, edge);
						edge->set_head_node(headNodeIdx);
						tailNodeIdx = i2hgnode.find(loserHypo->GetPrevHypo()->GetId())->second;
						edge->add_tail_nodes(tailNodeIdx);
          }
        } // end if arcList empty
      } // end if connected
    } // end for iterHypo
  } // end for iterStack
	hg.SerializeToOstream(&outputStream);
}