Esempio n. 1
0
void auto_train(const std::string &sOutputFile, const std::string &sFeatureFile, const std::string &sBinaryRulePath, const std::string &sUnaryRulePath, const std::string &sConInputPath) {

   std::cout << "Training iteration is started... " << std::endl ; std::cout.flush();

   CConParser parser(sFeatureFile, true);
   if (!sBinaryRulePath.empty()) 
      parser.LoadBinaryRules(sBinaryRulePath);
   if (!sUnaryRulePath.empty())
      parser.LoadUnaryRules(sUnaryRulePath);

   std::ifstream is(sOutputFile.c_str());
   ASSERT(is.is_open(), "The training file is unaccessible.");

   std::ifstream *cis=0;
   if (!sConInputPath.empty()) cis=new std::ifstream(sConInputPath.c_str());

   static CSentenceMultiCon<CConstituent> con_input;
   static CSentenceParsed ref_sent; 

   int nCount=0;
   
   is >> ref_sent;
   while( ! ref_sent.empty() ) {
      std::cout << "Sentence " << nCount << " ... ";
      nCount ++;
      if (!sConInputPath.empty()) {
         ASSERT((*cis) >> con_input, "No input provided for the sentence, though the input data is provided.");
         parser.train( con_input, ref_sent, nCount );
      }
      else {
Esempio n. 2
0
void process(const std::string &sInputFile, const std::string &sOutputFile){
	std::cerr<<"Converting start ..." << std::endl;

	std::ifstream is(sInputFile.c_str());
	ASSERT(is.is_open(), "The training file is unaccessible.");
	std::ofstream os(sOutputFile.c_str());

	static CSentenceParsed sent;

	int nCount=0;

	CConParser parser;
	CCoNLLOutput o_conll;
	is >> sent;
	while( ! sent.empty() ) {
		std::cerr << "Sentence " << nCount << " ... ";
		nCount ++;
		parser.convert( sent, &o_conll );
		os<<o_conll;
		os.flush();
		std::cerr << "done." << std::endl;
		is >> sent;
	}
	is.close();
	os.close();
	std::cerr << "Done. " << std::endl;
}
Esempio n. 3
0
void extract_features(const std::string &sOutputFile, const std::string &sFeatureFile) {

   std::cout << "Extracting feature... "; std::cout.flush();

   CConParser parser(sFeatureFile,  conparser::MAX_SENTENCE_SIZE, true);

   std::ifstream is(sOutputFile.c_str());
   ASSERT(is.is_open(), "The training file is unaccessible.");

   static CSentenceParsed ref_sent; 

   int nCount=0;
   
   is >> ref_sent;
   while( ! ref_sent.empty() ) {
   	nCount ++;
      parser.getPositiveFeatures( ref_sent );
      is >> ref_sent;
   }

   parser.finishtraining();

   is.close();

   std::cout << "done. " << std::endl;

}
Esempio n. 4
0
void auto_train(const std::string &sOutputFile, const std::string &sFeatureFile) {

   std::cout << "Training iteration is started... " << std::endl ; std::cout.flush();

   CConParser parser(sFeatureFile, conparser::MAX_SENTENCE_SIZE, true);

   std::ifstream is(sOutputFile.c_str());
   ASSERT(is.is_open(), "The training file is unaccessible.");


   static CSentenceParsed ref_sent; 

   int nCount=0;
   
   is >> ref_sent;
   while( ! ref_sent.empty() ) {
//      TRACE_WORD("Sentence " << nCount << " ... ");
      nCount ++;
      parser.train( ref_sent, nCount );
      if(nCount%1000==0)
      {
      	std::cout << nCount << " ";
      	std::cout.flush();
      }
//      TRACE("done.");
      is >> ref_sent;
   }

   std::cout << std::endl;

   parser.finishtraining();

   is.close();

   TRACE("Done. ");

}
Esempio n. 5
0
void train(std::string sInputFile, std::string sReferenceFile, std::string sFeatureDB) {
   TRACE("Training started");
   int time_start = clock();
   CReranker reranker(sFeatureDB, true);
   std::ifstream input_file(sInputFile.c_str());
   std::ifstream reference_file(sReferenceFile.c_str());
   std::ifstream tagging_score_file(std::string(sInputFile+".scores.tagging").c_str());
   std::ifstream parsing_score_file(std::string(sInputFile+".scores.parsing").c_str());

   int nTagAll, nParseAll;
   int nTagNeeded, nParseNeeded;

   CSentenceParsed *nbest;
   CSentenceParsed correct;

   double *prior_scores;
   
   input_file >> nTagAll >> nParseAll;
   input_file >> nTagNeeded >> nParseNeeded;
   std::string line;
   getline(input_file, line);
   TRACE("Reranking "<<nTagNeeded<<"/"<<nParseNeeded<<" from "<<nTagAll<<"/"<<nParseAll);

   int nCount;

   CSentenceParsed tmp_sent;
   double tmp_priors[2];

   int nBest = nTagNeeded*nParseNeeded;
   
   nbest = new CSentenceParsed[nBest];
   prior_scores = new double[nBest*2];

   nCount = 0;
   reference_file >> correct;
   while( !(correct.empty()) ) {
      std::cout << "Sentence " << ++nCount << std::endl;
      int index=0;
      for (int i=0; i<nTagAll; ++i) {
         for (int j=0; j<nParseAll; j++) {
            input_file >> tmp_sent;
            tagging_score_file >> tmp_priors[0];
            parsing_score_file >> tmp_priors[1];
            if (i<nTagNeeded && j<nParseNeeded) {
               nbest[index] = tmp_sent;
               prior_scores[index*2] = tmp_priors[0] / (8003*9) / 1000;
               prior_scores[index*2+1] = tmp_priors[1] / (8003*15) / 1000;
               //std::cout << prior_scores[index*2] << "\t" << prior_scores[index*2+1] << std::endl;
               index++;
            }
         }
      }
      reranker.train(nbest, &correct, nBest, prior_scores);
      reference_file >> correct;
   }
   reranker.finishTraining();
   delete[] nbest;
   delete[] prior_scores;
   input_file.close();
   tagging_score_file.close();
   parsing_score_file.close();
   TRACE("Training has finished successfully. Total time taken is: " << double(clock()-time_start)/CLOCKS_PER_SEC);
}