void auto_train(const std::string &sOutputFile, const std::string &sFeatureFile, const std::string &sBinaryRulePath, const std::string &sUnaryRulePath, const std::string &sConInputPath) { std::cout << "Training iteration is started... " << std::endl ; std::cout.flush(); CConParser parser(sFeatureFile, true); if (!sBinaryRulePath.empty()) parser.LoadBinaryRules(sBinaryRulePath); if (!sUnaryRulePath.empty()) parser.LoadUnaryRules(sUnaryRulePath); std::ifstream is(sOutputFile.c_str()); ASSERT(is.is_open(), "The training file is unaccessible."); std::ifstream *cis=0; if (!sConInputPath.empty()) cis=new std::ifstream(sConInputPath.c_str()); static CSentenceMultiCon<CConstituent> con_input; static CSentenceParsed ref_sent; int nCount=0; is >> ref_sent; while( ! ref_sent.empty() ) { std::cout << "Sentence " << nCount << " ... "; nCount ++; if (!sConInputPath.empty()) { ASSERT((*cis) >> con_input, "No input provided for the sentence, though the input data is provided."); parser.train( con_input, ref_sent, nCount ); } else {
void process(const std::string &sInputFile, const std::string &sOutputFile){ std::cerr<<"Converting start ..." << std::endl; std::ifstream is(sInputFile.c_str()); ASSERT(is.is_open(), "The training file is unaccessible."); std::ofstream os(sOutputFile.c_str()); static CSentenceParsed sent; int nCount=0; CConParser parser; CCoNLLOutput o_conll; is >> sent; while( ! sent.empty() ) { std::cerr << "Sentence " << nCount << " ... "; nCount ++; parser.convert( sent, &o_conll ); os<<o_conll; os.flush(); std::cerr << "done." << std::endl; is >> sent; } is.close(); os.close(); std::cerr << "Done. " << std::endl; }
void extract_features(const std::string &sOutputFile, const std::string &sFeatureFile) { std::cout << "Extracting feature... "; std::cout.flush(); CConParser parser(sFeatureFile, conparser::MAX_SENTENCE_SIZE, true); std::ifstream is(sOutputFile.c_str()); ASSERT(is.is_open(), "The training file is unaccessible."); static CSentenceParsed ref_sent; int nCount=0; is >> ref_sent; while( ! ref_sent.empty() ) { nCount ++; parser.getPositiveFeatures( ref_sent ); is >> ref_sent; } parser.finishtraining(); is.close(); std::cout << "done. " << std::endl; }
void auto_train(const std::string &sOutputFile, const std::string &sFeatureFile) { std::cout << "Training iteration is started... " << std::endl ; std::cout.flush(); CConParser parser(sFeatureFile, conparser::MAX_SENTENCE_SIZE, true); std::ifstream is(sOutputFile.c_str()); ASSERT(is.is_open(), "The training file is unaccessible."); static CSentenceParsed ref_sent; int nCount=0; is >> ref_sent; while( ! ref_sent.empty() ) { // TRACE_WORD("Sentence " << nCount << " ... "); nCount ++; parser.train( ref_sent, nCount ); if(nCount%1000==0) { std::cout << nCount << " "; std::cout.flush(); } // TRACE("done."); is >> ref_sent; } std::cout << std::endl; parser.finishtraining(); is.close(); TRACE("Done. "); }
void train(std::string sInputFile, std::string sReferenceFile, std::string sFeatureDB) { TRACE("Training started"); int time_start = clock(); CReranker reranker(sFeatureDB, true); std::ifstream input_file(sInputFile.c_str()); std::ifstream reference_file(sReferenceFile.c_str()); std::ifstream tagging_score_file(std::string(sInputFile+".scores.tagging").c_str()); std::ifstream parsing_score_file(std::string(sInputFile+".scores.parsing").c_str()); int nTagAll, nParseAll; int nTagNeeded, nParseNeeded; CSentenceParsed *nbest; CSentenceParsed correct; double *prior_scores; input_file >> nTagAll >> nParseAll; input_file >> nTagNeeded >> nParseNeeded; std::string line; getline(input_file, line); TRACE("Reranking "<<nTagNeeded<<"/"<<nParseNeeded<<" from "<<nTagAll<<"/"<<nParseAll); int nCount; CSentenceParsed tmp_sent; double tmp_priors[2]; int nBest = nTagNeeded*nParseNeeded; nbest = new CSentenceParsed[nBest]; prior_scores = new double[nBest*2]; nCount = 0; reference_file >> correct; while( !(correct.empty()) ) { std::cout << "Sentence " << ++nCount << std::endl; int index=0; for (int i=0; i<nTagAll; ++i) { for (int j=0; j<nParseAll; j++) { input_file >> tmp_sent; tagging_score_file >> tmp_priors[0]; parsing_score_file >> tmp_priors[1]; if (i<nTagNeeded && j<nParseNeeded) { nbest[index] = tmp_sent; prior_scores[index*2] = tmp_priors[0] / (8003*9) / 1000; prior_scores[index*2+1] = tmp_priors[1] / (8003*15) / 1000; //std::cout << prior_scores[index*2] << "\t" << prior_scores[index*2+1] << std::endl; index++; } } } reranker.train(nbest, &correct, nBest, prior_scores); reference_file >> correct; } reranker.finishTraining(); delete[] nbest; delete[] prior_scores; input_file.close(); tagging_score_file.close(); parsing_score_file.close(); TRACE("Training has finished successfully. Total time taken is: " << double(clock()-time_start)/CLOCKS_PER_SEC); }