示例#1
0
int
main(void)
{
	DataReader reader;
	std::vector<fv_t> data;
	std::vector<fv_t> test_data;
	std::vector<label_t> labels;
	std::vector<label_t> test_labels;
	category_index_t category_index;
	NearestCentroidClassifier centroid_classifier;
	TFIDFTransformer tfidf;
	long t = tick();
	long t_all = tick();
	Evaluation evaluation;
	
	if (!reader.open(TRAIN_DATA)) {
		fprintf(stderr, "cant read file\n");
		return -1;
	}
	reader.read(data, labels);
	printf("read %ld, %ld, %ldms\n", data.size(), labels.size(), tick() - t);
	reader.close();
	
	t = tick();
	srand(VT_SEED);
	build_category_index(category_index, data, labels);
	split_data(test_data, test_labels, data, labels, category_index, 0.05f);
	build_category_index(category_index, data, labels);
	printf("split train:%ld, test:%ld\n", data.size(), test_data.size());
	
	t = tick();
	tfidf.train(data);
	tfidf.transform(data);
	tfidf.transform(test_data);
	centroid_classifier.train(category_index, data);
	printf("build index %ldms\n", tick() -t );
	
	t = tick();
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 1)	
#endif
	for (int i = 0; i < (int)test_data.size(); ++i) {
		std::vector<int> topn_labels;
		centroid_classifier.predict(topn_labels, K, test_data[i]);
#ifdef _OPENMP
#pragma omp critical
#endif
		{
			evaluation.update(topn_labels, test_labels[i]);
			if (i % 1000 == 0) {
				print_evaluation(evaluation, i, t);
				t = tick();
			}
		}
	}
	printf("----\n");
	print_evaluation(evaluation, test_data.size(), t_all);
	
	return 0;
}