Example #1
0
TEST(naive_bayes, serialization) {
    NaiveBayes bayes;
    std::vector<std::string> words1{"hello", "world", "world"};
    std::vector<std::string> words2{"f**k", "world", "world"};
    std::vector<Example> examples = {
        Example(words1, "positive"),
        Example(words2, "negative")
    };
    bayes.fit(examples);
    auto scores = bayes.scores(examples[0]);

    TempFile tmp_file;
    std::ofstream ofs(tmp_file.filename);
    boost::archive::text_oarchive oa(ofs);
    oa << bayes;
    ofs.close();

    NaiveBayes bayes_restore;
    std::ifstream ifs(tmp_file.filename);
    boost::archive::text_iarchive ia(ifs);
    ia >> bayes_restore;

    auto scores_restore = bayes_restore.scores(examples[0]);

    EXPECT_EQ(scores.get("positive"), scores_restore.get("positive"));
    EXPECT_EQ(scores.get("negative"), scores_restore.get("negative"));
}
Example #2
0
TEST(naive_bayes, example) {
    NaiveBayes bayes;
    std::vector<std::string> words1{"hello", "world", "world"};
    std::vector<std::string> words2{"f**k", "world", "world"};
    std::vector<Example> examples = {
        Example(words1, "positive"),
        Example(words2, "negative")
    };
    bayes.fit(examples);
    print_scores(bayes.scores(examples[0]));
    print_scores(bayes.scores(examples[1]));
}
Example #3
0
int main(int argc, const char * argv[]) {
    DataSet train = createDataSet("/Users/Hacker/Desktop/vote_train.arff.txt");
    DataSet test = createDataSet("/Users/Hacker/Desktop/vote_test.arff.txt");
    vector<Instance> testData = test.instances;
    vector<string> className = test.labels;
    long size = testData.size();
    NaiveBayes *n = new NaiveBayes(train);
    n->training(train);
    //Learning by NaiveBayes
    long correct_count = 0;
    
    
    for(long i = 0; i < size; i ++)
    {
        double probability = 0.00;
        probability = n->classify(testData[i]);
        string actualclass = className[testData[i].getlabel()];
        if(probability > 0.5)
        {
            cout<<className[0]<<" "<<actualclass<<" ";
            if(className[0] == actualclass)
            {
                correct_count ++;
            }
            printf("%.16f",probability);
        }
        else
        {
            probability = 1 - probability;
            cout<<className[1]<<" "<<actualclass<<" ";
            if(className[1] == actualclass)
            {
                correct_count ++;
            }
            printf("%.16f",probability);
        }
        cout<<endl;
    }
    
    cout<<"Number Of Correct Classification:"<<correct_count<<endl;
    
    
    
    delete n;
    
    return 0;
}
Example #4
0
 int mostLikely(Digit const & digit )
 {
     Vector<int> values(64);
     for(int i = 0; i < 8; i++)
         for(int j = 0; j < 8; j++)
             values.append(mapColor(digit.color[i][j]));
     return nb.classify(values);
 }
Example #5
0
 //learns all probabilities from a digit
 void learnProbColorGivenN(Digit digit)
 {
     Vector<int> values(64);
     for(int i = 0; i < 8; i++)
         for(int j = 0; j < 8; j++)
             values.append(mapColor(digit.color[i][j]));
     nb.learn(digit.actualDigit, values);
 }
Example #6
0
TEST(naive_bayes, nomatching_words_example) {
    // When nothing in dict matches, we want an empty map returned,
    // rather than smoothed class distributions.
    NaiveBayes bayes;
    std::vector<std::string> words1{"hello", "world"};
    std::vector<std::string> words2{"good", "morning"};
    std::vector<Example> examples = {
        Example(words1, "positive"),
        Example(words2, "negative")
    };
    bayes.fit(examples);

    std::vector<std::string> missing_words{"escape", "notfound"};
    Example missing_example(missing_words, "positive");
    auto scores = bayes.scores(missing_example);
    EXPECT_EQ(0, scores.size());
    EXPECT_EQ(0, scores.sum());
}
Example #7
0
TEST(naive_bayes, fast_scores) {
    NaiveBayes bayes;
    std::vector<std::string> words1{"hello", "world"};
    std::vector<std::string> words2{"good", "morning"};
    std::vector<Example> examples = {
        Example(words1, "positive"),
        Example(words2, "negative")
    };
    bayes.fit(examples);

    auto scores1 = bayes.fast_scores(examples[0]);
    EXPECT_EQ(1, scores1.size());
    EXPECT_EQ(2, scores1.get("positive"));

    auto scores2 = bayes.fast_scores(examples[1]);
    EXPECT_EQ(1, scores2.size());
    EXPECT_EQ(2, scores2.get("negative"));

    std::vector<std::string> missing_words{"escape", "notfound"};
    Example missing_example(missing_words, "positive");
    auto missing_scores = bayes.fast_scores(missing_example);
    EXPECT_EQ(0, missing_scores.size());
    EXPECT_EQ(0, missing_scores.sum());
}
Example #8
0
int main()
{
	NaiveBayes nb;
	nb.create_vocab_list();
	nb.get_train_matrix();
	nb.print();
	nb.train_NB0();

	string doc1_to_classify[] = {"love", "my", "dalmation", "null"};
	string doc2_to_classify[] = {"stupid", "garbage", "null"};
	cout << "doc1 classified as : " << nb.classify_NB( doc1_to_classify ) << endl;
	cout << "doc2 classified as : " << nb.classify_NB( doc2_to_classify ) << endl;
	
	return 0;
}
Example #9
0
bool ex_model(void *arg) {

	Trainer::SetLogLevel (SSI_LOG_LEVEL_DEBUG);

	ssi_size_t n_classes = 4;
	ssi_size_t n_samples = 50;
	ssi_size_t n_streams = 1;
	ssi_real_t train_distr[][3] = { 0.25f, 0.25f, 0.1f, 0.25f, 0.75f, 0.1f, 0.75f, 0.75f, 0.1f, 0.75f, 0.75f, 0.1f };
	ssi_real_t test_distr[][3] = { 0.5f, 0.5f, 0.5f };
	SampleList strain;
	SampleList sdevel;
	SampleList stest;
	ModelTools::CreateTestSamples (strain, n_classes, n_samples, n_streams, train_distr, "user");	
	ModelTools::CreateTestSamples (sdevel, n_classes, n_samples, n_streams, train_distr, "user");	
	ModelTools::CreateTestSamples (stest, 1, n_samples * n_classes, n_streams, test_distr, "user");	
	ssi_char_t string[SSI_MAX_CHAR];	
	for (ssi_size_t n_class = 1; n_class < n_classes; n_class++) {
		ssi_sprint (string, "class%02d", n_class);
		stest.addClassName (string);
	}
	
	// train svm
	{
		SVM *model = ssi_create(SVM, 0, true);
		model->getOptions()->seed = 1234;
		Trainer trainer(model);
		trainer.train(strain);
		trainer.save("svm");
	}

	// evaluation
	{
		Trainer trainer;
		Trainer::Load(trainer, "svm");
		Evaluation eval;
		eval.eval(&trainer, sdevel);
		eval.print();

		trainer.cluster(stest);
		ModelTools::PlotSamples(stest, "svm (internal normalization)", ssi_rect(650, 0, 400, 400));
	}

	// train knn
	{
		KNearestNeighbors *model = ssi_create(KNearestNeighbors, 0, true);
		model->getOptions()->k = 5;
		//model->getOptions()->distsum = true;
		Trainer trainer (model);
		trainer.train (strain);
		trainer.save ("knn");
	}

	// evaluation
	{
		Trainer trainer;
		Trainer::Load (trainer, "knn");			
		Evaluation eval;
		eval.eval (&trainer, sdevel);
		eval.print ();

		trainer.cluster (stest);
		ModelTools::PlotSamples(stest, "knn", ssi_rect(650, 0, 400, 400));
	}

	// train naive bayes
	{
		NaiveBayes *model = ssi_create(NaiveBayes, 0, true);
		model->getOptions()->log = true;
		Trainer trainer (model);
		trainer.train (strain);
		trainer.save ("bayes");
	}

	// evaluation
	{
		Trainer trainer;
		Trainer::Load (trainer, "bayes");			
		Evaluation eval;
		eval.eval (&trainer, sdevel);
		eval.print ();

		trainer.cluster (stest);
		ModelTools::PlotSamples(stest, "bayes", ssi_rect(650, 0, 400, 400));
	}

	// training
	{
		LDA *model = ssi_create(LDA, "lda", true);
		Trainer trainer (model);
		trainer.train (strain);

		model->print();
		trainer.save ("lda");
	}

	// evaluation
	{
		Trainer trainer;
		Trainer::Load (trainer, "lda");
		Evaluation eval;
		eval.eval (&trainer, sdevel);
		eval.print ();

		trainer.cluster (stest);
		ModelTools::PlotSamples(stest, "lda", ssi_rect(650, 0, 400, 400));
	}

	ssi_print ("\n\n\tpress a key to contiue\n");
	getchar ();

	return true;
}

int main()
{
	ifstream infile("NaiveBayes-samples.txt");
	vector<vector<int>> sample_points;
	vector<int> sample_values, temp(2);
	int v,cnt = 0;
	while (infile >> v)
	{
		if (v == -1) break;
		temp[cnt++] = v;
		if (cnt == 2)
		{
			sample_points.push_back(temp);
			infile >> v;
			sample_values.push_back(v);
			cnt = 0;
		}
	}
	NaiveBayes nb;
	vector<int> fv = { 3, 3 };
	nb.create(fv, 2, 1.0);
	nb.train(sample_points, sample_values);
	double p;
	vector<int> data = { 1, 0 };
	int k = nb.compute(data, p);
	cout << "Class: " << k << " Probability: " << p << endl;
	getchar();
	return 0;
}
Example #11
0
void exe_naive_bayes()
{
    NaiveBayes nb = NaiveBayes();
    nb.set_training_data_file(std::string("training.dat"));
    nb.add_training_data("Buy cheap viagra SPAM");
    nb.add_training_data("Buy cheap airlines airlines tickets HAM");
    nb.add_training_data("Dear friend I am the king of Persia king SPAM");
    nb.add_training_data("Hello friend I am from Persia you must be from New York HAM");
    nb.add_training_data("Hi friend how are you doing I love you HAM");
    nb.add_training_data("New York is a big city HAM");

    nb.get_training_data();

    nb.train();

    std::string class_ = nb.classify(std::string("Buy cheap viagra tickets"));
    std::cout << "Your message is " << class_ << std::endl;

    class_ = nb.classify(std::string("Hello friend how are you"));
    std::cout << "Your message is " << class_ << std::endl;

    class_ = nb.classify(std::string("Dhaka is a big city"));
    std::cout << "Your message is " << class_ << std::endl;

}