int EludeCaller::Run() { pair<int, double> best_model(-1, -1.0); if (!train_file_.empty() && !load_model_file_.empty() && VERB >= 4 && !linear_calibration_) { cerr << "Warning: a model can be either trained or loaded from a file. " << "The two options should not be used together, unless linear calibration " << "should be carried out. In such a case please use the -j option. " << "The model will be trained using the peptides in " << train_file_ << endl; } // train a retention model if (!train_file_.empty()) { ProcessTrainData(); // initialize the feature table train_features_table_ = DataManager::InitFeatureTable( RetentionFeatures::kMaxNumberFeatures, train_psms_); if (automatic_model_sel_) { best_model = AutomaticModelSelection(); } else if (only_hydrophobicity_index_) { map<string, double> custom_hydrophobicity_index = TrainRetentionIndex(); if (!index_file_.empty()) { SaveRetentionIndexToFile(index_file_, custom_hydrophobicity_index); } else { PrintHydrophobicityIndex(custom_hydrophobicity_index); } cerr << "Now I saved the index" << endl; return 0; } else if (load_model_file_.empty()) { TrainRetentionModel(); } } else if (automatic_model_sel_) { if (!test_file_.empty()) { ProcessTestData(); processed_test_ = true; } best_model = AutomaticModelSelection(); } // load a model from a file if (!load_model_file_.empty() && !automatic_model_sel_) { rt_model_ = new RetentionModel(the_normalizer_); rt_model_->LoadModelFromFile(load_model_file_); } // save the model if (!save_model_file_.empty()) { if (rt_model_ != NULL && !rt_model_->IsModelNull()) { rt_model_->SaveModelToFile(save_model_file_); } else if (VERB >= 2) { cerr << "Warning: No trained model available. Nothing to save to " << save_model_file_ << endl; } } // append a file to the library if (append_model_) { if (automatic_model_sel_) { if (VERB >= 3) { cerr << "Warning: The model should already be in the library if " << "the automatic model selection option is employed. No model " << "will be appended to the library"<< endl; } } else if (rt_model_ == NULL) { if (VERB >= 3) { cerr << "Warning: No model available, nothing to append to the library." << endl; } } else { AddModelLibrary(); } } // save the retention index to a file if (!index_file_.empty()) { SaveIndexToFile(best_model.first); } // test a model if (!test_file_.empty()) { // process the test data if (!processed_test_) { ProcessTestData(); } if (test_psms_.size() <= 0) { if (VERB >= 3) { cerr << "Warning: no test psms available, nothing to do. " << endl; return 0; } } // initialize the feature table test_features_table_ = DataManager::InitFeatureTable( RetentionFeatures::kMaxNumberFeatures, test_psms_); int ret = 1; if (automatic_model_sel_) { int index = best_model.first; if (index < 0) { if (VERB >= 2) { cerr << "Error: No model available to predict rt. Execution aborted." << endl; } return 0; } rt_models_[index]->PredictRT(test_aa_alphabet_, ignore_ptms_, "test psms", test_psms_); if (linear_calibration_ && train_psms_.size() > 1) { rt_models_[index]->PredictRT(train_aa_alphabet_, ignore_ptms_, "calibration psms", train_psms_); } } else { int ret = rt_model_->PredictRT(test_aa_alphabet_, ignore_ptms_, "test psms", test_psms_); if (ret != 0) { if (VERB >= 2) { cerr << "Error: the amino acids alphabet in the test data does not match " <<"the ones used to train the model. Please use the -p option to ignore the ptms " <<"in the test data data are were not present in the training set " << endl; } return 0; } if (linear_calibration_ && train_psms_.size() > 1) { ret = rt_model_->PredictRT(train_aa_alphabet_, ignore_ptms_, "training psms", train_psms_); if (ret != 0) { if (VERB >= 2) { cerr << "Error: the amino acids alphabet in training data does not match " <<"the one used to train the model. Please use the -p option to ignore the ptms " <<"that were not present in the set used to train the model " << endl; } return 0; } } } // linear calibration is performed only for automatic model selection or when // loading a model from a file if (linear_calibration_ && (automatic_model_sel_ || (!load_model_file_.empty() && train_psms_.size() >= 2))) { if (train_psms_.size() <= 1 && !automatic_model_sel_) { if (VERB >= 3) { cerr << "Warning: at least 2 training psms are needed to calibrate the model. " << "No calibration performed. " << endl; } } else { // get the a and b coefficients if (linear_calibration_ && train_psms_.size() < 2) { if (VERB >= 4) { cerr << "Warning: No (enough) calibration peptides. Linear calibration " << "cannot be performed " << endl; } } else { pair<vector<double> , vector<double> > rts = GetRTs(train_psms_); lts = new LTSRegression(); lts->setData(rts.first, rts.second); lts->runLTS(); AdjustLinearly(test_psms_); } } } // compute performance measures if (test_includes_rt_) { double rank_correl = ComputeRankCorrelation(test_psms_); double pearson_correl = ComputePearsonCorrelation(test_psms_); double win = ComputeWindow(test_psms_); if (VERB >= 3) { cerr << "Performance measures for the test data: " << endl; cerr << " Pearson's correlation r = " << pearson_correl << endl; cerr << " Spearman's rank correlation rho = " << rank_correl << endl; cerr << " Delta_t 95% = " << win << endl; } } // write the predictions to file if (!output_file_.empty()) { DataManager::WriteOutFile(output_file_, test_psms_, test_includes_rt_); } else { if (VERB >= 2 && !supress_print_) { PrintPredictions(test_psms_); } } } return 0; }
int PredictPtron(int argc, char **argv, ostream& errMsg){ errMsg << endl; errMsg << "Perceptron prediction using backpropagation with gradient descent:\n"; errMsg << "[mode]: continue training(-1) training(0), testing(1), both(2), Anomaly Detection(3), AD (4), ADAM (5), clean AD (6), clean ADAM(7)\n"; errMsg << "[norm]: unnormalized(0)/normalized(1) data\n"; errMsg << endl; if ( argc < 4 ) return(0); string test_filename; string train_filename; // string prefix = argv[2]; cout << "# I/O prefix: " << prefix << endl; // int mode = atoi(argv[3]); cout << "# testing/training mode is: " << mode << endl; if( -1 > mode || mode > 8 ){ cerr << "Assert: Invalid parameter [mode]\n\n"; exit(-1); } if( mode == 4 || mode==5) return( Ptron_edam( argc, argv, errMsg ) ); if( mode == 6 || mode==7) return( Ptron_clean( argc, argv, errMsg ) ); errMsg << "[lrate]: learning rate. \n"; errMsg << "[eta]: momentum factor.\n"; errMsg << "[stopErr]: Stopping criterion: MSE < stopErr.\n"; errMsg << "[stopIter]: Stopping criterion: maximum number of iterations.\n"; errMsg << "<numIn>: Size of input vector.\n"; errMsg << "<nOut> number of nodes in output layer\n"; errMsg << "[fname]: name of file with error data\n"; if ( argc < 8 )return(0); // int norm = atoi(argv[4]); cout << "# using normalized data: " << norm << endl; // train_filename = prefix + "-train.dat"; test_filename = prefix + "-test.dat"; string npfname = prefix + "-norm_param.dat"; // double lrate = atof(argv[5]); cout << "# lrate is: " << lrate << endl; double eta = atof( argv[6] ); cout << "# eta is: " << eta << endl; float stopErr = atof( argv[7] ); int stopIter = atoi( argv[8] ); cout << "# Training will terminate either when minimum MSE change is: " << stopErr << endl; cout << "# or when " << stopIter << " training iterations have been performed.\n"; // int num_inputs; int num_outputs; if( mode == -1 || mode == 0 || mode == 2 ){ if( argc != 11 ){ cerr << "ERROR: PredictPtron(int argc, char **argv) -- need architecture information.\n"; return( 0 ); } cout << "# Neural Network Architecture is: "; num_inputs = atoi( argv[9] ); cout << num_inputs << " "; num_outputs = atoi( argv[10] ); cout << num_outputs << endl; } else if( mode == 3 || mode == 4){ if( argc != 9 ) { cerr << "Assert: Need to input error datafile\n"; exit(-1); } //mode = 2; // testing only test_filename = argv[9]; } // //////////////////////////////////////////////////////////// // Begin Test/Train //////////////////////////////////////////////////////////// adet_ptron model; // // if training only or both training and testing // train naive predictor n1 if( mode == -1 || mode == 0 || mode == 2 ) { // // Read in training data vector< double > jdate_train; vector< double > jdate_test; vector< vector< float > > TrainExamples; vector< vector< float > > TestExamples; vector< vector< float > > normParam; //ReadTSData( train_filename, norm, jdate_train, TrainExamples, normParam); GetTTExamples( npfname, train_filename, jdate_train, TrainExamples, normParam ); GetTTExamples( npfname, test_filename, jdate_test, TestExamples, normParam ); if( norm == 1 ){ NormalizeExamples( 1, TrainExamples, normParam ); } // // if( mode == -1 ){ string ifile_name = prefix + "-ptron_predictor.out"; ifstream ifile( ifile_name.c_str() ); if( !ifile ) { cerr << "Assert: could not open file " << ifile_name << endl; exit(-1); } model = adet_ptron( ifile ); ifile.close(); model.ResetStopCrit( double(stopErr), stopIter ); } else{ model = adet_ptron( num_inputs, num_outputs, stopErr, stopIter ); } // // Train network //model.TrainXV( TrainExamples, TestExamples, lrate, eta, 1., 1. ); model.k_FoldXV( 10, TrainExamples, lrate, eta, 1., 1. ); string ofile_name = prefix + "-ptron_predictor.out"; ofstream ofile( ofile_name.c_str() ); if( !ofile ) { cerr << "Assert: could not open file " << ofile_name << endl; exit(-1); } model.Print( ofile ); ofile.close(); // // Evaluate predictor performance on training set vector< vector< float > > Results_Train; Results_Train = model.Test( TrainExamples ); // // if using normalized values, unnormalize the results if( norm == 1 ){ //cout << "Attempting to UnNormalize the results " << endl; UnnormalizeResults( Results_Train, normParam ); } // // Print out training error cout << "# Anomaly Detection Results:\n"; PrintError( Results_Train ); //PrintPredictions( Results_Train, jdate_train ); } // // Testing only, so initialize ann predictor from file else { string ifile_name = prefix + "-ptron_predictor.out"; ifstream ifile( ifile_name.c_str() ); if( !ifile ) { cerr << "Assert: could not open file " << ifile_name << endl; exit(-1); } model = adet_ptron( ifile ); ifile.close(); // model.Print(cout); // ofstream ofile( "test_percep.out" ); // if( !ofile ) // { // cerr << "Assert: could not open file test_percep.out" << endl; // exit(-1); // } // p1.Output( ofile ); // ofile.close(); } // // Testing only, both Train/Test, and Anomaly Detection // Evaluate performance of predictor on Testing set if ( mode == 1 || mode == 2 || mode == 3) { // // Read in testing data vector< double > jdate_test; vector< vector< float > > TestExamples; vector< vector< float > > normParam; //ReadTSData( test_filename, norm, jdate_test, TestExamples, normParam); 01/06 GetTTExamples( npfname, test_filename, jdate_test, TestExamples, normParam ); if( norm == 1 ){ NormalizeExamples( 1, TestExamples, normParam ); } // // Evaluate predictor performance on testing set vector< vector< float > > Results_Test; Results_Test = model.Test( TestExamples ); // // if using normalized values, unnormalize the results if( norm == 1 ) { UnnormalizeResults( Results_Test, normParam ); } // if( mode == 3 ) { // // Print out anomalies found FindAnomalies( Results_Test, jdate_test ); PrintPredictions( Results_Test, jdate_test ); } else { // // Print out testing error cout << "# Anomaly Detection Results:\n"; PrintError( Results_Test ); PrintPredictions( Results_Test, jdate_test ); } } return( 1 ); }