void PhraseNode::AddTargetPhrase(size_t pos, const SourcePhrase &sourcePhrase
                                 , TargetPhrase *targetPhrase, OnDiskWrapper &onDiskWrapper
                                 , size_t tableLimit, const std::vector<float> &counts, OnDiskPt::PhrasePtr spShort)
{	
  size_t phraseSize = sourcePhrase.GetSize();
  if (pos < phraseSize) {
    const Word &word = sourcePhrase.GetWord(pos);

    PhraseNode &node = m_children[word];
    if (m_currChild != &node) {
      // new node
      node.SetPos(pos);

      if (m_currChild) {
        m_currChild->Save(onDiskWrapper, pos, tableLimit);
      }

      m_currChild = &node;
    }

    // keep searching for target phrase node.. 
    node.AddTargetPhrase(pos + 1, sourcePhrase, targetPhrase, onDiskWrapper, tableLimit, counts, spShort);
  } else {
    // drilled down to the right node
    m_counts = counts;
    targetPhrase->SetSourcePhrase(spShort);
    m_targetPhraseColl.AddTargetPhrase(targetPhrase);
  }
}
Exemplo n.º 2
0
void XenIO::writeNewPT(boost::shared_ptr<PhraseTable> ptrPT, boost::shared_ptr<Score> ptrScore) {
    XenOption* opt = XenOption::getInstance();
    Score sc2;
    
    if (opt->getLocal()) {
        std::vector<SourcePhrase> srcPh = ptrPT->getSrcPhrases();
    
        for (unsigned int i = 0; i < srcPh.size(); i++) {
            SourcePhrase sP = srcPh[i];
        
            for (unsigned int j = 0; j < sP.getScoresXE()->getSize(); j++) {
                sc2.addScore(sP.getScoresXE()->getScore(j));
            }
        }
    }
    
    try {
        std::cout << "Writing new phrase-table to " + opt->getOutName() << std::endl;
        
        std::string oF = opt->getOutName();
        
        boost::iostreams::filtering_ostream out;
        out.push(boost::iostreams::gzip_compressor());
        out.push(boost::iostreams::file_sink(oF.c_str(), std::ios_base::out | std::ios_base::binary));
        
        if (!out.good())
            throw XenCommon::XenCEption("Can't write to " + opt->getOutName() + ".gz");

        for (unsigned int i = 0; i < ptrScore->getSize(); i++) {
            out << ptrPT->getSource(i) << " ||| " << ptrPT->getTarget(i) << " ||| " << ptrPT->getScores(i) << " " << XenCommon::toString(ptrScore->getScore(i));
            if (opt->getLocal())
                out << " " << XenCommon::toString(sc2.getScore(i));
            out << " ||| " << ptrPT->getAlignment(i) << " ||| " << ptrPT->getCounts(i) << std::endl;
            
            if (out.bad())
                throw XenCommon::XenCEption("Something went wrong in output stream...");
        }
        
        out.flush();
        out.reset();
    } catch (XenCommon::XenCEption &e) {
        throw;
    }
}
int main(int argc, char **argv)
{
  int tableLimit = 20;
  std::string ttable = "";
  bool useAlignments = false;

  for(int i = 1; i < argc; i++) {
    if(!strcmp(argv[i], "-tlimit")) {
      if(i + 1 == argc)
        usage();
      tableLimit = atoi(argv[++i]);
    } else if(!strcmp(argv[i], "-t")) {
      if(i + 1 == argc)
        usage();
      ttable = argv[++i];
    }
    else
      usage();
  }

  if(ttable == "")
    usage();

	OnDiskWrapper onDiskWrapper;
  bool retDb = onDiskWrapper.BeginLoad(ttable);
	CHECK(retDb);
	
	cerr << "Ready..." << endl;
	
  std::string line;
  while(getline(std::cin, line)) {
    std::vector<std::string> tokens;
    tokens = Moses::Tokenize(line, " ");

		cerr << "line: " << line << endl;
		
		// create source phrase
    SourcePhrase sourcePhrase;

		for (size_t pos = 0; pos < tokens.size(); ++pos)
		{
		  const string &tok = tokens[pos];
		  
		  if (pos == tokens.size() - 1) 
		  { // last position. LHS non-term
			  Tokenize(sourcePhrase, tok, false, true, onDiskWrapper);
			}
			else
			{
			  Tokenize(sourcePhrase, tok, true, true, onDiskWrapper);
			}
		}
		
    const PhraseNode *node = &onDiskWrapper.GetRootSourceNode();
		cerr << "node=" << node << endl;
    assert(node);
    
    for (size_t pos = 0; pos < sourcePhrase.GetSize(); ++pos)
		{
		  const Word &word = sourcePhrase.GetWord(pos);
		  cerr << word << " ";
		  node = node->GetChild(word, onDiskWrapper);
  		cerr << "node=" << node << endl;
		  
		  if (node == NULL)
		  {
		    break;
		  }
		}
    
    if (node)
    { // source phrase points to a bunch of rules
      const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
      string str = coll->GetDebugStr();
      cout << "Found " << coll->GetSize() << endl;
      
      for (size_t ind = 0; ind < coll->GetSize(); ++ind)
      {
        const TargetPhrase &targetPhrase = coll->GetTargetPhrase(ind);
        cerr << "  ";
        targetPhrase.DebugPrint(cerr, onDiskWrapper.GetVocab());
        cerr << endl;
        

      }
    }
    else
    {
      cout << "Not found" << endl;
    }
    
    std::cout << '\n';
    std::cout.flush();
  }
  
  cerr << "Finished." << endl;
	
}