Ejemplo n.º 1
0
  //----------------------------------------
  // Method:  TestUtil::TestUtil()
  // Purpose: Constructor
  // Usage:   To be called at the start of each test method
  // Inputs:  sourceClassInput  = the name of the source class being tested
  //          sourceMethodInput = the name of the source method being tested
  //          testFileInput     = the name of file containing the test code, e.g., __FILE__
  //          testLineInput     = the line number in the file where testing is done, e.g. __LINE__
  //          verbosityInput    = the level of verbosity in the print output, default=1, but set to 0 will supress fail messages
  // Outputs: none
  //----------------------------------------
  TestUtil( const std::string& sourceClassInput  = "Unknown",
            const std::string& sourceMethodInput = "Unknown",
            const std::string& testFileInput     = "Unknown",
            const         int& testLineInput     = 0,
            const         int& verbosityInput    = 1
           ):
      outputKeyword( "GpstkTest" ),
      sourceClass( sourceClassInput  ),
      sourceMethod( sourceMethodInput ),
      testFileName( testFileInput ),
      testFileLine( "0" ),
      tolerance( 0 ),    
      testMessage( "Developer is a lazy slacker" ),
      failBit( 0 ),
      verbosity( verbosityInput ),
      testCount( 0 ),
      subtestID( 1 ),
      failCount( 0 )
  {
      // convert int to string
      setTestLine( testLineInput );

      // strip off the path from the full-path filename
      // so that "/home/user/test.txt" becomes "test.txt"
      std::string file_sep = gpstk::getFileSep();
      testFileName = testFileName.substr( testFileName.find_last_of( file_sep ) + 1 );
  }
Ejemplo n.º 2
0
  void assert( bool testExpression, const std::string& test_message, const std::string& line_number )
  {
      setTestMessage( test_message );
      setTestLine( line_number );
      
      if( testExpression == false )
      {
          fail();
      }
      else
      {
          pass();
      }

      print();
      next();
  }
Ejemplo n.º 3
0
		void ClassificationValidation::testInputData(bool transform)
		{	
			int lines = model_->data->descriptor_matrix_[0].size();
			test_substances_.resize(lines);
			test_Y_.resize(lines, model_->data->Y_.size());
			
			class_results_.resize(clas_model->labels_.size());
			class_results_.setZero();
			
			bool back_transform = 0; 
			if (transform && model_->data->descriptor_transformations_.size() > 0)
			{
				// if test data is to be transformed according to centering of training data, BUT has already been centered itself
				back_transform = 1; 
			}
			
			for (int i = 0; i < lines; i++)
			{
				setTestLine(i, i, back_transform); 
			}
			
			testAllSubstances(transform); 
			quality_input_test_ = quality_;
		}
Ejemplo n.º 4
0
 void fail( const std::string& fail_message, const std::string& line_number )
 {
     setTestMessage( fail_message );
     setTestLine( line_number );
     fail();
 }
Ejemplo n.º 5
0
 void setTestMessage( const std::string& test_message, const std::string& line_number )
 {
     setTestMessage( test_message );
     setTestLine( line_number );
 }
Ejemplo n.º 6
0
		void ClassificationValidation::crossValidation(int k, bool restore)
		{
			if (model_->data->descriptor_matrix_.size() == 0 || model_->data->Y_.size() == 0)
			{
				throw Exception::InconsistentUsage(__FILE__, __LINE__, "Data must be fetched from input-files by QSARData before cross-validation can be done!"); 
			}
			
			Eigen::MatrixXd desc_backup;
			//Eigen::MatrixXd res_backup;
			Eigen::MatrixXd y_backup;
			if (restore)
			{
				desc_backup = model_->descriptor_matrix_; // save matrices in order in restore them after cross-validation
				//res_backup = clas_model->training_result_;
				y_backup = model_->Y_;
			}
			
			int lines = model_->data->descriptor_matrix_[0].size();
			int col = model_->data->descriptor_matrix_.size();
			if (!model_->descriptor_IDs_.empty())
			{
				col = model_->descriptor_IDs_.size();
			}
			double average_accuracy = 0;
			class_results_.resize(clas_model->labels_.size());
			class_results_.setZero();
			
			// test k times
			for (int i = 0; i < k; i++)
			{	
				int test_size = (lines+i)/k;
				int training_size = lines-test_size;
				model_->Y_.resize(training_size, model_->data->Y_.size());
				model_->descriptor_matrix_.resize(training_size, col); 
				test_substances_.resize(test_size);
				test_Y_.resize(test_size, model_->data->Y_.size());
				
				int train_line = 0;  // no of line in descriptor_matrix_ of model_
				int test_line = 0;
				
				//copy data to training and test data set
				for (int line = 0; line < lines; line++)
				{
					if ((line+1+i)%k == 0)
					{
						setTestLine(test_line, line);
						test_line++;
					}
					else
					{
						setTrainingLine(train_line, line);
						train_line++;
					}
					
				}
				
				// test Model with model_->predict() for each line of test-data
				model_->train();
				testAllSubstances(0);  // do not transform cross-validation test-data again...
				average_accuracy += quality_;
			}
			quality_cv_ = average_accuracy/k;
			class_results_ = class_results_/k;
			
			if (restore)
			{
				model_->descriptor_matrix_ = desc_backup;   // prevent confusion of cross-validation coefficients with coefficients
				model_->Y_ = y_backup;
				model_->readTrainingData();
				model_->train();
			}
		}
Ejemplo n.º 7
0
		void ClassificationValidation::bootstrap(int k, bool restore)
		{
			if (model_->data->descriptor_matrix_.size() == 0 || model_->data->Y_.size() == 0)
			{
				throw Exception::InconsistentUsage(__FILE__, __LINE__, "Data must be fetched from input-files by QSARData before bootstrapping can be done!"); 
			}
			Eigen::MatrixXd desc_backup;
			Eigen::MatrixXd res_backup;
			Eigen::MatrixXd y_backup;
			if (restore)
			{
				desc_backup = model_->descriptor_matrix_; // save matrices in order in restore them after cross-validation
				//res_backup = clas_model->training_result_;
				y_backup = model_->Y_;
			}

			class_results_.resize(clas_model->labels_.size());
			class_results_.setZero();
			quality_cv_ = 0;
			int N = model_->data->descriptor_matrix_[0].size();
			int no_descriptors = model_->data->descriptor_matrix_.size();
			if (!model_->descriptor_IDs_.empty())
			{
				no_descriptors = model_->descriptor_IDs_.size();
			}

			boost::mt19937 rng(PreciseTime::now().getMicroSeconds());
			
			double overall_fit = 0;
			double overall_pred = 0;
			Eigen::VectorXd class_results_pred; 
			class_results_pred.resize(clas_model->labels_.size()); class_results_pred.setZero();
			Eigen::VectorXd class_results_fit; 
			class_results_fit.resize(clas_model->labels_.size()); class_results_fit.setZero();

			for (int i = 0; i < k; i++) // create and evaluate k bootstrap samples
			{
				vector<int> sample_substances(N, 0); // numbers of occurences of substances within this sample
				
				class_results_.setZero();
			
				/// create training matrix and train the model_
				model_->descriptor_matrix_.resize(N, no_descriptors);
				model_->Y_.resize(N, model_->data->Y_.size());
				for (int j = 0; j < N; j++)
				{
					//int pos = rand()%N;
					int pos = rng() % N;
					setTrainingLine(j, pos);
					sample_substances[pos]++;
				}
				model_->train();
			
				
				/// find size of test data set
				int test_size = 0;
				for (int j = 0; j < N; j++)
				{
					if (sample_substances[j] > 0) 
					{
						continue;
					}
					test_size++; 
				}
				test_substances_.resize(test_size);
				test_Y_.resize(test_size, model_->data->Y_.size());
				
			
				/// create test data set and calculate quality_ of prediction
				int test_line = 0;
				for (int j = 0; j < N; j++) 
				{
					if (sample_substances[j] == 0) 
					{	
						setTestLine(test_line, j);
						test_line++;
					}
				}
				testAllSubstances(0);
				overall_pred += quality_;
				class_results_pred += class_results_;		
			
				class_results_.setZero(); // clear pred. result before adding training fit result!!
				
				/// create test data set and calculate quality_ of fit to training data	
				test_substances_.resize(N);
				test_Y_.resize(N, model_->data->Y_.size());
				test_line = 0;
				for (int j = 0; j < N; j++)
				{	
					while (sample_substances[j] > 0) // insert substance as often as it occurs in the training data set 
					{
						setTestLine(test_line, j);
						test_line++;
						sample_substances[j]--;
					}
				}
				testAllSubstances(0);
				overall_fit += quality_;
				class_results_fit += class_results_;
			}
			
			overall_pred = overall_pred/k;
			overall_fit = overall_fit/k;
			class_results_pred = class_results_pred/k;
			class_results_fit = class_results_fit/k;
			
			quality_cv_ = 0.632*overall_pred + 0.368*overall_fit;
			class_results_ = class_results_pred*0.632 + class_results_fit*0.368;
			
			if (restore)
			{
				model_->descriptor_matrix_ = desc_backup;   // prevent confusion of cross-validation coefficients with coefficients
				model_->Y_ = y_backup;
				model_->readTrainingData();
				model_->train();
			}
		}