Ejemplo n.º 1
0
int main (int argc, char * const argv[])
{
  // insert code here...
  Moses::ResetUserTime();
  Moses::PrintUserTime("Starting");

  if (argc != 8) {
    std::cerr << "Usage: " << argv[0] << " numSourceFactors numTargetFactors numScores tableLimit sortScoreIndex inputPath outputPath" << std::endl;
    return 1;
  }

  int numSourceFactors	= Moses::Scan<int>(argv[1])
                          , numTargetFactors	= Moses::Scan<int>(argv[2])
                              , numScores				= Moses::Scan<int>(argv[3])
                                  , tableLimit				= Moses::Scan<int>(argv[4]);
  TargetPhraseCollection::s_sortScoreInd			= Moses::Scan<int>(argv[5]);
  assert(TargetPhraseCollection::s_sortScoreInd < numScores);

  const string filePath 	= argv[6]
                            ,destPath	= argv[7];

  Moses::InputFileStream inStream(filePath);

  OnDiskWrapper onDiskWrapper;
  bool retDb = onDiskWrapper.BeginSave(destPath, numSourceFactors, numTargetFactors, numScores);
  assert(retDb);

  PhraseNode &rootNode = onDiskWrapper.GetRootSourceNode();
  size_t lineNum = 0;
  char line[100000];

  //while(getline(inStream, line))
  while(inStream.getline(line, 100000)) {
    lineNum++;
    if (lineNum%1000 == 0) cerr << "." << flush;
    if (lineNum%10000 == 0) cerr << ":" << flush;
    if (lineNum%100000 == 0) cerr << lineNum << flush;
    //cerr << lineNum << " " << line << endl;

    std::vector<float> misc(1);
    SourcePhrase sourcePhrase;
    TargetPhrase *targetPhrase = new TargetPhrase(numScores);
    OnDiskPt::PhrasePtr spShort = Tokenize(sourcePhrase, *targetPhrase, line, onDiskWrapper, numScores, misc);
    assert(misc.size() == onDiskWrapper.GetNumCounts());

    rootNode.AddTargetPhrase(sourcePhrase, targetPhrase, onDiskWrapper, tableLimit, misc, spShort);
  }

  rootNode.Save(onDiskWrapper, 0, tableLimit);
  onDiskWrapper.EndSave();

  Moses::PrintUserTime("Finished");

  //pause();
  return 0;

} // main()
int main(int argc, char **argv)
{
  int tableLimit = 20;
  std::string ttable = "";
  bool useAlignments = false;

  for(int i = 1; i < argc; i++) {
    if(!strcmp(argv[i], "-tlimit")) {
      if(i + 1 == argc)
        usage();
      tableLimit = atoi(argv[++i]);
    } else if(!strcmp(argv[i], "-t")) {
      if(i + 1 == argc)
        usage();
      ttable = argv[++i];
    }
    else
      usage();
  }

  if(ttable == "")
    usage();

	OnDiskWrapper onDiskWrapper;
  bool retDb = onDiskWrapper.BeginLoad(ttable);
	CHECK(retDb);
	
	cerr << "Ready..." << endl;
	
  std::string line;
  while(getline(std::cin, line)) {
    std::vector<std::string> tokens;
    tokens = Moses::Tokenize(line, " ");

		cerr << "line: " << line << endl;
		
		// create source phrase
    SourcePhrase sourcePhrase;

		for (size_t pos = 0; pos < tokens.size(); ++pos)
		{
		  const string &tok = tokens[pos];
		  
		  if (pos == tokens.size() - 1) 
		  { // last position. LHS non-term
			  Tokenize(sourcePhrase, tok, false, true, onDiskWrapper);
			}
			else
			{
			  Tokenize(sourcePhrase, tok, true, true, onDiskWrapper);
			}
		}
		
    const PhraseNode *node = &onDiskWrapper.GetRootSourceNode();
		cerr << "node=" << node << endl;
    assert(node);
    
    for (size_t pos = 0; pos < sourcePhrase.GetSize(); ++pos)
		{
		  const Word &word = sourcePhrase.GetWord(pos);
		  cerr << word << " ";
		  node = node->GetChild(word, onDiskWrapper);
  		cerr << "node=" << node << endl;
		  
		  if (node == NULL)
		  {
		    break;
		  }
		}
    
    if (node)
    { // source phrase points to a bunch of rules
      const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
      string str = coll->GetDebugStr();
      cout << "Found " << coll->GetSize() << endl;
      
      for (size_t ind = 0; ind < coll->GetSize(); ++ind)
      {
        const TargetPhrase &targetPhrase = coll->GetTargetPhrase(ind);
        cerr << "  ";
        targetPhrase.DebugPrint(cerr, onDiskWrapper.GetVocab());
        cerr << endl;
        

      }
    }
    else
    {
      cout << "Not found" << endl;
    }
    
    std::cout << '\n';
    std::cout.flush();
  }
  
  cerr << "Finished." << endl;
	
}