Пример #1
0
int main(int argc, char* argv[]) {
   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("o", "{s|t[d]|d|c}", "outout format; 's' segmented format, 't' pos-tagged format in sentences, 'td' pos-tagged format in documents withstd::cout sentence boundary delimination, 'd' refers to dependency parse tree format, and 'c' refers to constituent parse tree format", "c");

      if (options.args.size() < 2 || options.args.size() > 4) {
         std::cout << "\nUsage: " << argv[0] << " feature_path [input_file [outout_file]]" << std::endl;
         std::cout << configurations.message() << std::endl;
         return 1;
      }
      std::string warning = configurations.loadConfigurations(options.opts);
      if (!warning.empty()) {
         std::cout << "Warning: " << warning << std::endl;
      }

      std::string sInputFile = options.args.size() > 2 ? options.args[2] : "";
      std::string sToFile = options.args.size() > 3 ? options.args[3] : "";
      std::string sOutFormat = configurations.getConfiguration("o");

      bool bOutDoc = (sOutFormat=="td");
  
      if (sOutFormat == "t" || sOutFormat == "td")
          tag(sInputFile, sToFile, options.args[1], bOutDoc);
      if (sOutFormat == "c" )
          parse(sInputFile, sToFile, options.args[1]);
      if (sOutFormat == "d" )
          depparse(sInputFile, sToFile, options.args[1]);
      return 0;
   } catch(const std::string&e) {std::cerr<<"Error: "<<e<<std::endl;return 1;}
}
Пример #2
0
int main(int argc, char* argv[]) {

   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("c", "", "process CoNLL format", "");
      if (options.args.size() != 4) {
         std::cout << "\nUsage: " << argv[0] << " training_data model num_iterations" << std::endl ;
         std::cout << configurations.message() << std::endl;
         return 1;
      } 
      configurations.loadConfigurations(options.opts);
   
      unsigned long training_rounds;
      if (!fromString(training_rounds, options.args[3])) {
         std::cerr << "Error: the number of training iterations must be an integer." << std::endl;
         return 1;
      }
   
      bool bCoNLL = configurations.getConfiguration("c").empty() ? false : true;

      std::cout << "Training started" << std::endl;
      int time_start = clock();
      for (int i=0; i<training_rounds; ++i) 
         auto_train(options.args[1], options.args[2], bCoNLL);
      std::cout << "Training has finished successfully. Total time taken is: " << double(clock()-time_start)/CLOCKS_PER_SEC << std::endl;
   
      return 0;
   } catch (const std::string &e) {
      std::cerr << "Error: " << e << std::endl;
      return 1;
   }

}
Пример #3
0
int main(int argc, char* argv[]) {
   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("m", "M", "the maximum sentence size in character count", "512");

      // check arguments
      if (options.args.size() > 3) {
         std::cout << "Usage: " << argv[0] << " input_file outout_file" << std::endl;
         std::cout << configurations.message();
         return 1;
      }
      configurations.loadConfigurations(options.opts);
   
      unsigned long nMaxSentSize;
      std::string s = configurations.getConfiguration("m");
      if (!fromString(nMaxSentSize, s)) {
         std::cerr << "Error: the maximum sentence size must be an integer." << std::endl;
         exit(1);
      }
      std::string sInputFile = options.args.size()>1 ? options.args[1] : "";
      std::string sOutputFile = options.args.size()>2 ? options.args[2] : "";
      // main
      process(sInputFile, sOutputFile, nMaxSentSize);

      // return normal
      return 0;
   }
   catch(const std::string &e) {
      std::cerr << "Error: " << e << " Stop." << std::endl;
      return 1;
   }
}
Пример #4
0
int main(int argc, char* argv[]) {
   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("d", "Path", "use a dictionary", "");
      configurations.defineConfiguration("k", "Path", "use special knowledge", "");
      configurations.defineConfiguration("n", "N", "n-best output", "1");

      if (options.args.size() < 2 || options.args.size() > 4) {
         std::cout << "\nUsage: " << argv[0] << " input_file output_file feature_file" << std::endl;
         std::cout << configurations.message() << std::endl;
         return 1;
      }
      std::string warning = configurations.loadConfigurations(options.opts);
      if (!warning.empty()) {
         std::cout << "Warning: " << warning << std::endl;
      }

      std::string sTagDict = configurations.getConfiguration("d");
      std::string sKnowledge = configurations.getConfiguration("k");
      int nBest;
      if (!fromString(nBest, configurations.getConfiguration("n"))) {
         std::cout<<"Error: the n-best list output size is not integer." << std::endl; return 1;
      }

      process(argv[1], argv[2], argv[3], nBest, sTagDict, sKnowledge);
      return 0;
   } catch(const std::string&e) {std::cerr<<"Error: "<<e<<std::endl;return 1;}
}
Пример #5
0
int main(int argc, char* argv[]) {
   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("n", "N", "N best list rerank", "10");
      configurations.defineConfiguration("s", "", "Use segmentor scores in ranking", "");
      // check arguments
      if (options.args.size() != 4) {
         std::cout << "Usage: " << argv[0] << " input_file outout_file model_file" << std::endl;
         std::cout << configurations.message() << std::endl;
         return 1;
      }
      configurations.loadConfigurations(options.opts);
   
      int nBest;
      if (!fromString(nBest, configurations.getConfiguration("n"))) {
         std::cerr<<"Error: N must be integer."<<std::endl; return 1;
      }
      bool bRankingIncludeSeg = !configurations.getConfiguration("s").empty() ? true : false;
   
      process(argv[1], argv[2], argv[3], nBest, bRankingIncludeSeg);
      return 0;
   } catch (const std::string &e) { std::cerr<<"Error: "<<e<<std::endl; return 1;
   }
}
Пример #6
0
int main(int argc, char* argv[]) {

   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      if (options.args.size() != 4) {
         std::cout << "\nUsage: " << argv[0] << " training_data model num_iterations" << std::endl ;
         std::cout << configurations.message() << std::endl;
         return 1;
      } 
   
      int training_rounds;
      if (!fromString(training_rounds, options.args[3])) {
         std::cerr << "Error: the number of training iterations must be an integer." << std::endl;
         return 1;
      }
      std::string warning = configurations.loadConfigurations(options.opts);
      if (!warning.empty()) {
         std::cout << "Warning: " << warning << std::endl;
      }
   
      std::cout << "Training started." << std::endl;
      int time_start = clock();
#ifdef NO_NEG_FEATURE
      if (!FileExists(options.args[2]))
         extract_features(options.args[1], options.args[2]); 
#endif     
      for (int i=0; i<training_rounds; ++i) {
         auto_train(options.args[1], options.args[2]); // set update tag dict false now
      }
      std::cout << "Training has finished successfully. Total time taken is: " << double(clock()-time_start)/CLOCKS_PER_SEC << std::endl;
   
   } catch (const std::string &e) {
      std::cerr << "Error: " << e << std::endl;
      exit(1);
   }

   return 0;

}
Пример #7
0
int main(int argc, char* argv[]) {
   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("n", "N", "N best list outout", "1");
      configurations.defineConfiguration("d", "Path", "save scores to Path", "");
      configurations.defineConfiguration("s", "Path", "privide stacking features in Path", "");
      // check arguments
      if (options.args.size() < 2 || options.args.size() > 4) {
         std::cout << "Usage: " << argv[0] << " model_file [input_file [outout_file]]" << std::endl;
         std::cout << configurations.message() << std::endl;
         return 1;
      }
      std::string warning = configurations.loadConfigurations(options.opts);
      if (!warning.empty()) {
         std::cout << "Warning: " << warning << std::endl;
      }
   
      // check options
      int nBest;
      if (!fromString(nBest, configurations.getConfiguration("n"))) {
         std::cout << "The N best specification must be an integer." << std::endl;
         return 1;
      }
      std::string sOutputScores = configurations.getConfiguration("d");
      std::string sStackFile = configurations.getConfiguration("s");

      // main
      std::string sInputFile = options.args.size() > 2 ? options.args[2] : "";
      std::string sOutputFile = options.args.size() > 3 ? options.args[3] : "";
      process(sInputFile, sOutputFile, options.args[1], nBest, sOutputScores, sStackFile);

      // return normal
      return 0;
   }
   catch(const std::string &e) {
      std::cerr << "Error: " << e << " Stop." << std::endl;
      return 1;
   }
}
Пример #8
0
int main(int argc, char* argv[]) {
   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      std::stringstream out; out << tagger::MAX_SENTENCE_SIZE; 
//std::cout << out.str() << std::endl;
      configurations.defineConfiguration("m", "M", "maximum sentence size", out.str());
      configurations.defineConfiguration("n", "N", "N best list outout", "1");
      configurations.defineConfiguration("s", "", "outout scores", "");

      if (options.args.size() < 2 || options.args.size() > 4) {
         std::cout << "\nUsage: " << argv[0] << " feature_file [input_file [outout_file]]" << std::endl;
         std::cout << configurations.message() << std::endl;
         return 1;
      }
      std::string warning = configurations.loadConfigurations(options.opts);
      if (!warning.empty()) {
         std::cout << "Warning: " << warning << std::endl;
      }
   
      unsigned long nBest, nMaxSentSize;
      if (!fromString(nMaxSentSize, configurations.getConfiguration("m"))) {
         std::cerr<<"Error: the size of largest sentence is not integer." << std::endl; return 1;
      }  
      if (!fromString(nBest, configurations.getConfiguration("n"))) {
         std::cerr<<"Error: the number of N best is not integer." << std::endl; return 1;
      }  
      bool bScores = configurations.getConfiguration("s").empty() ? false : true;
#ifdef SEGMENTED
      bool bSegmented = true;
#else
      bool bSegmented = false;
#endif
   
      std::string sInputFile = options.args.size() > 2 ? options.args[2] : "";
      std::string sToFile = options.args.size() > 3 ? options.args[3] : "";
      process(sInputFile, sToFile, options.args[1], nBest, nMaxSentSize, bSegmented, bScores);
      return 0;
   } catch(const std::string&e) {std::cerr<<"Error: "<<e<<std::endl;return 1;}
}
Пример #9
0
int main(int argc, char* argv[]) {
   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("d", "Path", "use dictionary from the given path", "");
      configurations.defineConfiguration("k", "Path", "use special knowledge from the given path", "");

      if (options.args.size() != 4) {
         std::cout << "\nUsage: " << argv[0] << " training_data model num_iterations" << std::endl ;
         std::cout << configurations.message() << std::endl;
         return 1;
      } 
      std::string warning = configurations.loadConfigurations(options.opts);
      if (!warning.empty()) {
         std::cout << "Warning: " << warning << std::endl;
      }

      std::string sTagDict = configurations.getConfiguration("d");
      std::string sKnowledge = configurations.getConfiguration("k");

      unsigned training_rounds;
      if (!fromString(training_rounds, options.args[3])) {
         std::cerr << "Error: the number of training iterations must be an integer." << std::endl;
         return 1;
      }
      std::cout << "Training started" << std::endl;
      int time_start = clock();
      for (unsigned i=0; i<training_rounds; ++i)
         auto_train(argv[1], argv[2], sTagDict, sKnowledge);
      std::cout << "Training has finished successfully. Total time taken is: " << double(clock()-time_start)/CLOCKS_PER_SEC << std::endl;
      return 0;
   } catch (const std::string &e) {
      std::cerr << "Error: " << e << std::endl;
      return 1;
   }
}
Пример #10
0
int main(int argc, char* argv[]) {

   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("c", "", "process CoNLL format", "");
      configurations.defineConfiguration("p", "path", "supertags", "");
      configurations.defineConfiguration("r", "", "use rules", "");
#ifdef SUPPORT_FEATURE_EXTRACTION
      configurations.defineConfiguration("f", "", "extract features only: weights will be counts", "");
#endif
#ifdef SUPPORT_META_FEATURE_DEFINITION
      configurations.defineConfiguration("t", "path", "meta feature types", "");
#endif
      if (options.args.size() != 4) {
         std::cout << "\nUsage: " << argv[0] << " training_data model num_iterations" << std::endl ;
         std::cout << configurations.message() << std::endl;
         return 1;
      } 
      configurations.loadConfigurations(options.opts);
   
      int training_rounds;
      if (!fromString(training_rounds, options.args[3])) {
         std::cerr << "Error: the number of training iterations must be an integer." << std::endl;
         return 1;
      }
   
      bool bCoNLL = configurations.getConfiguration("c").empty() ? false : true;
      std::string sSuperPath = configurations.getConfiguration("p");
      bool bRules = configurations.getConfiguration("r").empty() ? false : true;
      bool bExtract = false;
#ifdef SUPPORT_FEATURE_EXTRACTION
      bExtract = configurations.getConfiguration("f").empty() ? false : true;
#endif
   std::string sMetaPath;
#ifdef SUPPORT_META_FEATURE_DEFINITION
      sMetaPath = configurations.getConfiguration("t");
#endif

      std::cout << "Training started" << std::endl;
      int time_start = clock();
      for (int i=0; i<training_rounds; ++i) 
         auto_train(options.args[1], options.args[2], bRules, sSuperPath, bCoNLL, bExtract, sMetaPath);
      std::cout << "Training has finished successfully. Total time taken is: " << double(clock()-time_start)/CLOCKS_PER_SEC << std::endl;
   
      return 0;
   } catch (const std::string &e) {
      std::cerr << std::endl << "Error: " << e << std::endl;
      return 1;
   }

}
Пример #11
0
int main(int argc, char* argv[]) {

//TODO This is temporary! Allow user to enter their own lexicons.
#ifdef JOINT_MORPH
	std::cerr << "Loading lexicons...\n";
	bool bSuccess = TARGET_LANGUAGE::initLexicon("/home/cgomezr/en/train.conll",true);
	std::cerr << "Successfully loaded primary lexicon? " << bSuccess << "\n";
	bSuccess = english::initLemmaLexicon("/home/cgomezr/multext-lexicons/en/wordform-improved.txt");
	std::cerr << "Successfully loaded the lemma lexicon? " << bSuccess << "\n";
#endif

   try {
      COptions options(argc, argv);
      CConfigurations configurations;
      configurations.defineConfiguration("c", "", "process CoNLL format", "");
      configurations.defineConfiguration("p", "path", "supertags", "");
      configurations.defineConfiguration("r", "", "use rules", "");
#ifdef SUPPORT_FEATURE_EXTRACTION
      configurations.defineConfiguration("f", "", "extract features only: weights will be counts", "");
#endif
#ifdef SUPPORT_META_FEATURE_DEFINITION
      configurations.defineConfiguration("t", "path", "meta feature types", "");
#endif
      if (options.args.size() != 4) {
         std::cout << "\nUsage: " << argv[0] << " training_data model num_iterations" << std::endl ;
         std::cout << configurations.message() << std::endl;
         return 1;
      }
      configurations.loadConfigurations(options.opts);

      int training_rounds;
      if (!fromString(training_rounds, options.args[3])) {
         std::cout << "Error: the number of training iterations must be an integer." << std::endl;
         return 1;
      }

      bool bCoNLL = configurations.getConfiguration("c").empty() ? false : true;
      std::string sSuperPath = configurations.getConfiguration("p");
      bool bRules = configurations.getConfiguration("r").empty() ? false : true;
      bool bExtract = false;
#ifdef SUPPORT_FEATURE_EXTRACTION
      bExtract = configurations.getConfiguration("f").empty() ? false : true;
#endif
   std::string sMetaPath;
#ifdef SUPPORT_META_FEATURE_DEFINITION
      sMetaPath = configurations.getConfiguration("t");
#endif

      std::cout << "Training started" << std::endl;
      int time_start = clock();
      for (int i=0; i<training_rounds; ++i)
         auto_train(options.args[1], options.args[2], bRules, sSuperPath, bCoNLL, bExtract, sMetaPath);
      std::cout << "Training has finished successfully. Total time taken is: " << double(clock()-time_start)/CLOCKS_PER_SEC << std::endl;

      return 0;
   } catch (const std::string &e) {
      std::cerr << std::endl << "Error: " << e << std::endl;
      return 1;
   }

}
Пример #12
0
int test_main(int argc, char* argv[]) {
	try {
	      COptions options(argc, argv);
	      CConfigurations configurations;
	      configurations.defineConfiguration("f", "Format", "output format: 0, binary; 1, unbinary, all; 2, unbinary, constituent", "0");
	      // check arguments
	      if (options.args.size() != 3) {
	         std::cout << "Usage: " << argv[0] << " input_file outout_file" << std::endl;
	         std::cout << configurations.message() << std::endl;
	         return 1;
	      }
	      configurations.loadConfigurations(options.opts);

	      unsigned long nformat = 0;
	      if (!fromString(nformat, configurations.getConfiguration("f"))) {
	    	  std::cout << "The output format specification must be an integer." << std::endl;
	    	  return 1;
	      }

	      if(nformat != 0 && nformat != 1 && nformat != 2)
	      {
	    	  std::cout << "The output format specification must be 0,1 or 2." << std::endl;
	    	  return 1;
	      }

	      std::ifstream is(options.args[1].c_str());
	      std::ofstream os(options.args[2].c_str());

	      static CJointTree ref_sent;
	      int nCount=0;

	      is >> ref_sent;
	      while( ! ref_sent.empty() ) {
	    	  TRACE_WORD("Sentence " << nCount << " ... ");
	    	  nCount ++;

	    	  TRACE("done.");
	    	  if(nformat == 0)
	    	  {
	    		  os << ref_sent.str() << std::endl;
	    	  }
	    	  else if(nformat == 1)
	    	  {
	    		  std::string outstr = ref_sent.str_unbinarizedall();
	    		  os << outstr << std::endl;
	    	  }
	    	  else if(nformat == 2)
			  {
	    		  std::string outstr = ref_sent.str_unbinarized();
	    		  os << outstr << std::endl;
			  }
	    	  else
	    	  {
	    		  assert(false);
	    	  }

	    	  is >> ref_sent;
	      }

	      is.close();
	      os.close();

	}
	catch (const std::string &e) {
		std::cerr << "Error: " << e << std::endl;
	    exit(1);
	}


	return 0;
}