Пример #1
0
int main( int argc, char** argv )
{
	if( argc != 3 )
	{
		std::cout << "Usage: MLBayesTest <TrainingDataSetFile> <TestDataSetFile>" << std::endl;
		return 0;
	}

	DataSet* data, *test_data;
	data = MLLIB->createDataSet();
	test_data = MLLIB->createDataSet();
	
	// Load the training dataset
	std::string dspath = argv[1];
	if( !MLLIB->loadDataSet( dspath + ".arff", data ) )
	{
		return 0;
	}

	// Load the test dataset
	dspath = argv[2];
	if( !MLLIB->loadDataSet( dspath + ".arff", test_data ) )
	{
		return 0;
	}

	// Create each classifier, train it and test it
	BayesClassifier* bcl = static_cast<BayesClassifier*>( MLLIB->createClassifier(CT_Bayes) );
	bcl->setBayesType(BT_Naive);
	bcl->train(data);
	bcl->test(test_data);
	bcl->reset();
	bcl->setBayesType(BT_TAN);
	bcl->train(data);
	bcl->test(test_data);
	bcl->reset();
	bcl->setBayesType(BT_SparseCandidate);
	bcl->train(data);
	bcl->test(test_data);
	bcl->reset();

	return 0;
}
Пример #2
0
void bayes_measure(int ss1_mod, int ss2_mod, SampleSet& ss1, SampleSet& ss2) {
  cout << "----new Bayes training----" << endl;

  vector<vector<double>> samples;
  std::vector<int> labels;
  int s1_count = 0, s2_count = 0;

  for (int i = 0; i < ss1.size(); ++i) {
    if (std::rand() % ss1_mod != 0)
      continue;
    samples.push_back(toBayesSample(ss1[i]));
    labels.push_back(+1);
    s1_count++;
  }

  for (int i = 0; i < ss2.size(); ++i) {
    if (std::rand() % ss2_mod == 0) {
      samples.push_back(toBayesSample(ss2[i]));
      labels.push_back(-1);
      s2_count++;
    }
  }

  BayesClassifier c;
  auto decision_fun = c.train(samples, labels);

  // Measure
  samples.clear();
  labels.clear();
  int s1_all_count = 0, s2_all_count = 0;

  for (int i = 0; i < ss1.size(); ++i) {
    samples.push_back(toBayesSample(ss1[i]));
    labels.push_back(+1);
    s1_all_count++;
  }

  for (int i = 0; i < ss2.size(); ++i) {
    samples.push_back(toBayesSample(ss2[i]));
    labels.push_back(-1);
    s2_all_count++;
  }

  cout << "Train " << s1_count << " + " << s2_count << " measures for " << s1_all_count << " + " << s2_all_count << endl;
  cout << "Measure for " << samples.size() << " samples" << endl;
  measure_for_dataset(decision_fun, samples, labels);
}