int main(int argc, char ** argv) { GOOGLE_PROTOBUF_VERIFY_VERSION; google::ParseCommandLineFlags(&argc, &argv, true); int sent = 0; int last_nonterm_node = -1; string name = FLAGS_hypergraph_prefix; Hypergraph * h; vector <Hypergraph_Node *> nodes(10); int cur_edge_id = 0; int cur_word_node_id = 0; ifstream in(FLAGS_joshua_out_file.c_str(), ios::in | ios::binary); Hypergraph_Node * node; Hypergraph_Edge * edge; int sent_num, length, num_nodes,num_edges; //CodedOutputStream::SetTotalBytesLimit(5000000000, 5000000000); while (in) { string blank; string t1; in >> t1; //t = l.strip().split(); if (t1 == "#SENT:") { // flush last sent if (sent!= 0) { // need to add <s> and </s> int subroot = last_nonterm_node; int newroot; { node = h->add_node(); node->set_label("NEW ROOT"); edge = node->add_edge(); Hypergraph_Node * wnode; for (int start=0; start < 2; start++) { wnode = h->add_node(); wnode->set_id(cur_word_node_id); wnode->set_label("Front"); wnode->SetExtension(is_word, true); wnode->SetExtension(word, "<s>"); edge->add_tail_node_ids(cur_word_node_id); cur_word_node_id++; } edge->add_tail_node_ids(subroot); for (int end=0; end < 2; end++) { wnode = h->add_node(); wnode->set_id(cur_word_node_id); wnode->set_label("Back"); wnode->SetExtension(is_word, true); wnode->SetExtension(word, "</s>"); edge->add_tail_node_ids(cur_word_node_id); cur_word_node_id++; } node->set_id(cur_word_node_id); edge->set_id(cur_edge_id); cur_edge_id++; h->set_root(cur_word_node_id); cur_word_node_id++; } stringstream file_name; file_name << name << sent; fstream output(file_name.str().c_str(), ios::out | ios::binary); h->SerializeToOstream(&output); output.close(); } // prep new sent h = new Hypergraph(); cur_edge_id = 0 ; sent +=1; nodes.clear(); string buf; in >> sent_num >> length >> num_nodes >> num_edges; // need to add nodes for each word cur_word_node_id = num_nodes; getline(in, buf); } else if (t1 == "#I") { node = h->add_node(); int id, left_span, right_span; string sym; string ig1, ig2, ig3; in >> id >> left_span >> right_span >> sym >> ig1 >> ig2 >> ig3; stringstream label; label << id -1 << " ["<< left_span << ", "<<right_span << "] " <<sym; node->set_id(id-1); node->set_label(label.str()); if (nodes.size()<=node->id()) { nodes.resize(node->id()+1); } nodes[node->id()] = node; last_nonterm_node = node->id(); cout << "Node id " << node->id() << endl; } else {
void Manager::SerializeSearchGraphPB( long translationId, std::ostream& outputStream) const { using namespace hgmert; std::map < int, bool > connected; std::map < int, int > i2hgnode; std::vector< const Hypothesis *> connectedList; GetConnectedGraph(&connected, &connectedList); connected[ 0 ] = true; Hypergraph hg; hg.set_is_sorted(false); int num_feats = (*m_search->GetHypothesisStacks().back()->begin())->GetScoreBreakdown().size(); hg.set_num_features(num_feats); StaticData::Instance().GetScoreIndexManager().SerializeFeatureNamesToPB(&hg); Hypergraph_Node* goal = hg.add_nodes(); // idx=0 goal node must have idx 0 Hypergraph_Node* source = hg.add_nodes(); // idx=1 i2hgnode[-1] = 1; // source node const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks(); const HypothesisStack &finalStack = *hypoStackColl.back(); for (std::vector < HypothesisStack* >::const_iterator iterStack = hypoStackColl.begin(); iterStack != hypoStackColl.end() ; ++iterStack) { const HypothesisStack &stack = **iterStack; HypothesisStack::const_iterator iterHypo; for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) { const Hypothesis *hypo = *iterHypo; bool is_goal = hypo->GetWordsBitmap().IsComplete(); if (connected.find( hypo->GetId() ) != connected.end()) { int headNodeIdx; Hypergraph_Node* headNode = GetHGNode(hypo, &i2hgnode, &hg, &headNodeIdx); if (is_goal) { Hypergraph_Edge* ge = hg.add_edges(); ge->set_head_node(0); // goal ge->add_tail_nodes(headNodeIdx); ge->mutable_rule()->add_trg_words("[X,1]"); } Hypergraph_Edge* edge = hg.add_edges(); SerializeEdgeInfo(hypo, edge); edge->set_head_node(headNodeIdx); const Hypothesis* prev = hypo->GetPrevHypo(); int tailNodeIdx = 1; // source if (prev) tailNodeIdx = i2hgnode.find(prev->GetId())->second; edge->add_tail_nodes(tailNodeIdx); const ArcList *arcList = hypo->GetArcList(); if (arcList != NULL) { ArcList::const_iterator iterArcList; for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) { const Hypothesis *loserHypo = *iterArcList; assert(connected[loserHypo->GetId()]); Hypergraph_Edge* edge = hg.add_edges(); SerializeEdgeInfo(loserHypo, edge); edge->set_head_node(headNodeIdx); tailNodeIdx = i2hgnode.find(loserHypo->GetPrevHypo()->GetId())->second; edge->add_tail_nodes(tailNodeIdx); } } // end if arcList empty } // end if connected } // end for iterHypo } // end for iterStack hg.SerializeToOstream(&outputStream); }