//---------------------------------------- // Method: TestUtil::TestUtil() // Purpose: Constructor // Usage: To be called at the start of each test method // Inputs: sourceClassInput = the name of the source class being tested // sourceMethodInput = the name of the source method being tested // testFileInput = the name of file containing the test code, e.g., __FILE__ // testLineInput = the line number in the file where testing is done, e.g. __LINE__ // verbosityInput = the level of verbosity in the print output, default=1, but set to 0 will supress fail messages // Outputs: none //---------------------------------------- TestUtil( const std::string& sourceClassInput = "Unknown", const std::string& sourceMethodInput = "Unknown", const std::string& testFileInput = "Unknown", const int& testLineInput = 0, const int& verbosityInput = 1 ): outputKeyword( "GpstkTest" ), sourceClass( sourceClassInput ), sourceMethod( sourceMethodInput ), testFileName( testFileInput ), testFileLine( "0" ), tolerance( 0 ), testMessage( "Developer is a lazy slacker" ), failBit( 0 ), verbosity( verbosityInput ), testCount( 0 ), subtestID( 1 ), failCount( 0 ) { // convert int to string setTestLine( testLineInput ); // strip off the path from the full-path filename // so that "/home/user/test.txt" becomes "test.txt" std::string file_sep = gpstk::getFileSep(); testFileName = testFileName.substr( testFileName.find_last_of( file_sep ) + 1 ); }
void assert( bool testExpression, const std::string& test_message, const std::string& line_number ) { setTestMessage( test_message ); setTestLine( line_number ); if( testExpression == false ) { fail(); } else { pass(); } print(); next(); }
void ClassificationValidation::testInputData(bool transform) { int lines = model_->data->descriptor_matrix_[0].size(); test_substances_.resize(lines); test_Y_.resize(lines, model_->data->Y_.size()); class_results_.resize(clas_model->labels_.size()); class_results_.setZero(); bool back_transform = 0; if (transform && model_->data->descriptor_transformations_.size() > 0) { // if test data is to be transformed according to centering of training data, BUT has already been centered itself back_transform = 1; } for (int i = 0; i < lines; i++) { setTestLine(i, i, back_transform); } testAllSubstances(transform); quality_input_test_ = quality_; }
void fail( const std::string& fail_message, const std::string& line_number ) { setTestMessage( fail_message ); setTestLine( line_number ); fail(); }
void setTestMessage( const std::string& test_message, const std::string& line_number ) { setTestMessage( test_message ); setTestLine( line_number ); }
void ClassificationValidation::crossValidation(int k, bool restore) { if (model_->data->descriptor_matrix_.size() == 0 || model_->data->Y_.size() == 0) { throw Exception::InconsistentUsage(__FILE__, __LINE__, "Data must be fetched from input-files by QSARData before cross-validation can be done!"); } Eigen::MatrixXd desc_backup; //Eigen::MatrixXd res_backup; Eigen::MatrixXd y_backup; if (restore) { desc_backup = model_->descriptor_matrix_; // save matrices in order in restore them after cross-validation //res_backup = clas_model->training_result_; y_backup = model_->Y_; } int lines = model_->data->descriptor_matrix_[0].size(); int col = model_->data->descriptor_matrix_.size(); if (!model_->descriptor_IDs_.empty()) { col = model_->descriptor_IDs_.size(); } double average_accuracy = 0; class_results_.resize(clas_model->labels_.size()); class_results_.setZero(); // test k times for (int i = 0; i < k; i++) { int test_size = (lines+i)/k; int training_size = lines-test_size; model_->Y_.resize(training_size, model_->data->Y_.size()); model_->descriptor_matrix_.resize(training_size, col); test_substances_.resize(test_size); test_Y_.resize(test_size, model_->data->Y_.size()); int train_line = 0; // no of line in descriptor_matrix_ of model_ int test_line = 0; //copy data to training and test data set for (int line = 0; line < lines; line++) { if ((line+1+i)%k == 0) { setTestLine(test_line, line); test_line++; } else { setTrainingLine(train_line, line); train_line++; } } // test Model with model_->predict() for each line of test-data model_->train(); testAllSubstances(0); // do not transform cross-validation test-data again... average_accuracy += quality_; } quality_cv_ = average_accuracy/k; class_results_ = class_results_/k; if (restore) { model_->descriptor_matrix_ = desc_backup; // prevent confusion of cross-validation coefficients with coefficients model_->Y_ = y_backup; model_->readTrainingData(); model_->train(); } }
void ClassificationValidation::bootstrap(int k, bool restore) { if (model_->data->descriptor_matrix_.size() == 0 || model_->data->Y_.size() == 0) { throw Exception::InconsistentUsage(__FILE__, __LINE__, "Data must be fetched from input-files by QSARData before bootstrapping can be done!"); } Eigen::MatrixXd desc_backup; Eigen::MatrixXd res_backup; Eigen::MatrixXd y_backup; if (restore) { desc_backup = model_->descriptor_matrix_; // save matrices in order in restore them after cross-validation //res_backup = clas_model->training_result_; y_backup = model_->Y_; } class_results_.resize(clas_model->labels_.size()); class_results_.setZero(); quality_cv_ = 0; int N = model_->data->descriptor_matrix_[0].size(); int no_descriptors = model_->data->descriptor_matrix_.size(); if (!model_->descriptor_IDs_.empty()) { no_descriptors = model_->descriptor_IDs_.size(); } boost::mt19937 rng(PreciseTime::now().getMicroSeconds()); double overall_fit = 0; double overall_pred = 0; Eigen::VectorXd class_results_pred; class_results_pred.resize(clas_model->labels_.size()); class_results_pred.setZero(); Eigen::VectorXd class_results_fit; class_results_fit.resize(clas_model->labels_.size()); class_results_fit.setZero(); for (int i = 0; i < k; i++) // create and evaluate k bootstrap samples { vector<int> sample_substances(N, 0); // numbers of occurences of substances within this sample class_results_.setZero(); /// create training matrix and train the model_ model_->descriptor_matrix_.resize(N, no_descriptors); model_->Y_.resize(N, model_->data->Y_.size()); for (int j = 0; j < N; j++) { //int pos = rand()%N; int pos = rng() % N; setTrainingLine(j, pos); sample_substances[pos]++; } model_->train(); /// find size of test data set int test_size = 0; for (int j = 0; j < N; j++) { if (sample_substances[j] > 0) { continue; } test_size++; } test_substances_.resize(test_size); test_Y_.resize(test_size, model_->data->Y_.size()); /// create test data set and calculate quality_ of prediction int test_line = 0; for (int j = 0; j < N; j++) { if (sample_substances[j] == 0) { setTestLine(test_line, j); test_line++; } } testAllSubstances(0); overall_pred += quality_; class_results_pred += class_results_; class_results_.setZero(); // clear pred. result before adding training fit result!! /// create test data set and calculate quality_ of fit to training data test_substances_.resize(N); test_Y_.resize(N, model_->data->Y_.size()); test_line = 0; for (int j = 0; j < N; j++) { while (sample_substances[j] > 0) // insert substance as often as it occurs in the training data set { setTestLine(test_line, j); test_line++; sample_substances[j]--; } } testAllSubstances(0); overall_fit += quality_; class_results_fit += class_results_; } overall_pred = overall_pred/k; overall_fit = overall_fit/k; class_results_pred = class_results_pred/k; class_results_fit = class_results_fit/k; quality_cv_ = 0.632*overall_pred + 0.368*overall_fit; class_results_ = class_results_pred*0.632 + class_results_fit*0.368; if (restore) { model_->descriptor_matrix_ = desc_backup; // prevent confusion of cross-validation coefficients with coefficients model_->Y_ = y_backup; model_->readTrainingData(); model_->train(); } }