Ejemplo n.º 1
0
CMulticlassLabels* CRelaxedTree::apply_multiclass(CFeatures* data)
{
    if (data != NULL)
    {
        CDenseFeatures<float64_t> *feats = dynamic_cast<CDenseFeatures<float64_t>*>(data);
        REQUIRE(feats != NULL, ("Require non-NULL dense features of float64_t\n"))
        set_features(feats);
    }

    // init kernels for all sub-machines
    for (int32_t i=0; i<m_machines->get_num_elements(); i++)
    {
        CSVM *machine = (CSVM*)m_machines->get_element(i);
        CKernel *kernel = machine->get_kernel();
        CFeatures* lhs = kernel->get_lhs();
        kernel->init(lhs, m_feats);
        SG_UNREF(machine);
        SG_UNREF(kernel);
        SG_UNREF(lhs);
    }

    CMulticlassLabels *lab = new CMulticlassLabels(m_feats->get_num_vectors());
    SG_REF(lab);
    for (int32_t i=0; i < lab->get_num_labels(); ++i)
    {
        lab->set_int_label(i, int32_t(apply_one(i)));
    }

    return lab;
}
CMulticlassLabels* CConditionalProbabilityTree::apply_multiclass(CFeatures* data)
{
	if (data)
	{
		if (data->get_feature_class() != C_STREAMING_DENSE)
			SG_ERROR("Expected StreamingDenseFeatures\n")
		if (data->get_feature_type() != F_SHORTREAL)
			SG_ERROR("Expected float32_t feature type\n")

		set_features(dynamic_cast<CStreamingDenseFeatures<float32_t>* >(data));
	}

	vector<int32_t> predicts;

	m_feats->start_parser();
	while (m_feats->get_next_example())
	{
		predicts.push_back(apply_multiclass_example(m_feats->get_vector()));
		m_feats->release_example();
	}
	m_feats->end_parser();

	CMulticlassLabels *labels = new CMulticlassLabels(predicts.size());
	for (size_t i=0; i < predicts.size(); ++i)
		labels->set_int_label(i, predicts[i]);
	return labels;
}
void test()
{
	// Prepare to read a file for the training data
	char fname_feats[]  = "../data/fm_train_real.dat";
	char fname_labels[] = "../data/label_train_multiclass.dat";
	CStreamingAsciiFile* ffeats_train  = new CStreamingAsciiFile(fname_feats);
	CStreamingAsciiFile* flabels_train = new CStreamingAsciiFile(fname_labels);
	SG_REF(ffeats_train);
	SG_REF(flabels_train);

	CStreamingDenseFeatures< float64_t >* stream_features =
		new CStreamingDenseFeatures< float64_t >(ffeats_train, false, 1024);

	CStreamingDenseFeatures< float64_t >* stream_labels =
		new CStreamingDenseFeatures< float64_t >(flabels_train, true, 1024);

	SG_REF(stream_features);
	SG_REF(stream_labels);

	stream_features->start_parser();

	// Read the values from the file and store them in features
	CDenseFeatures< float64_t >* features=
			(CDenseFeatures< float64_t >*)
			stream_features->get_streamed_features(1000);

	stream_features->end_parser();

	CMulticlassLabels* labels = new CMulticlassLabels(features->get_num_vectors());
	SG_REF(features);
	SG_REF(labels);

	// Read the labels from the file
	int32_t idx = 0;
	stream_labels->start_parser();
	while ( stream_labels->get_next_example() )
	{
		labels->set_int_label( idx++, (int32_t)stream_labels->get_label() );
		stream_labels->release_example();
	}
	stream_labels->end_parser();

	// Create liblinear svm classifier with L2-regularized L2-loss
	CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
	SG_REF(svm);

	// Add some configuration to the svm
	svm->set_epsilon(EPSILON);
	svm->set_bias_enabled(true);

    CECOCDiscriminantEncoder *encoder = new CECOCDiscriminantEncoder();
    encoder->set_features(features);
    encoder->set_labels(labels);

	// Create a multiclass svm classifier that consists of several of the previous one
	CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine(
			new CECOCStrategy(encoder, new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels);
	SG_REF(mc_svm);

	// Train the multiclass machine using the data passed in the constructor
	mc_svm->train();

	// Classify the training examples and show the results
	CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(mc_svm->apply());

	SGVector< int32_t > out_labels = output->get_int_labels();
	SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen);

	// Free resources
	SG_UNREF(mc_svm);
	SG_UNREF(svm);
	SG_UNREF(output);
	SG_UNREF(features);
	SG_UNREF(labels);
	SG_UNREF(ffeats_train);
	SG_UNREF(flabels_train);
	SG_UNREF(stream_features);
	SG_UNREF(stream_labels);
}
int main()
{
	init_shogun_with_defaults();

	const char* train_file_name = "../data/7class_example4_train.dense";
	const char* test_file_name = "../data/7class_example4_test.dense";
	CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name);
	SG_REF(train_file);

	CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024);
	SG_REF(train_features);

	CRandomConditionalProbabilityTree *cpt = new CRandomConditionalProbabilityTree();
	cpt->set_num_passes(1);
	cpt->set_features(train_features);
	cpt->train();
	cpt->print_tree();

	CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name);
	SG_REF(test_file);
	CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
	SG_REF(test_features);

	CMulticlassLabels *pred = cpt->apply_multiclass(test_features);
	test_features->reset_stream();
	SG_SPRINT("num_labels = %d\n", pred->get_num_labels());

	SG_UNREF(test_features);
	SG_UNREF(test_file);
	test_file = new CStreamingAsciiFile(test_file_name);
	SG_REF(test_file);
	test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
	SG_REF(test_features);

	CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels());
	test_features->start_parser();
	for (int32_t i=0; i < pred->get_num_labels(); ++i)
	{
		test_features->get_next_example();
		gnd->set_int_label(i, test_features->get_label());
		test_features->release_example();
	}
	test_features->end_parser();

	int32_t n_correct = 0;
	for (index_t i=0; i < pred->get_num_labels(); ++i)
	{
		if (pred->get_int_label(i) == gnd->get_int_label(i))
			n_correct++;
		//SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i));
	}
	SG_SPRINT("\n");

	SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels());

	SG_UNREF(train_features);
	SG_UNREF(test_features);
	SG_UNREF(train_file);
	SG_UNREF(test_file);
	SG_UNREF(cpt);
	SG_UNREF(pred);

	exit_shogun();

	return 0;
}
int main(int argc, char** argv)
{
	int32_t num_vectors = 0;
	int32_t num_feats   = 2;

	init_shogun_with_defaults();

	// Prepare to read a file for the training data
	char fname_feats[]  = "../data/fm_train_real.dat";
	char fname_labels[] = "../data/label_train_multiclass.dat";
	CStreamingAsciiFile* ffeats_train  = new CStreamingAsciiFile(fname_feats);
	CStreamingAsciiFile* flabels_train = new CStreamingAsciiFile(fname_labels);
	SG_REF(ffeats_train);
	SG_REF(flabels_train);

	CStreamingDenseFeatures< float64_t >* stream_features = 
		new CStreamingDenseFeatures< float64_t >(ffeats_train, false, 1024);

	CStreamingDenseFeatures< float64_t >* stream_labels = 
		new CStreamingDenseFeatures< float64_t >(flabels_train, true, 1024);

	SG_REF(stream_features);
	SG_REF(stream_labels);

	// Create a matrix with enough space to read all the feature vectors
	SGMatrix< float64_t > mat = SGMatrix< float64_t >(num_feats, 1000);

	// Read the values from the file and store them in mat
	SGVector< float64_t > vec;
	stream_features->start_parser();
	while ( stream_features->get_next_example() )
	{
		vec = stream_features->get_vector();

		for ( int32_t i = 0 ; i < num_feats ; ++i )
			mat.matrix[num_vectors*num_feats + i] = vec[i];

		num_vectors++;
		stream_features->release_example();
	}
	stream_features->end_parser();
	mat.num_cols = num_vectors;
	
	// Create features with the useful values from mat
	CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(mat);

	CMulticlassLabels* labels = new CMulticlassLabels(num_vectors);
	SG_REF(features);
	SG_REF(labels);

	// Read the labels from the file
	int32_t idx = 0;
	stream_labels->start_parser();
	while ( stream_labels->get_next_example() )
	{
		labels->set_int_label( idx++, (int32_t)stream_labels->get_label() );
		stream_labels->release_example();
	}
	stream_labels->end_parser();

	// Create liblinear svm classifier with L2-regularized L2-loss
	CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
	SG_REF(svm);

	// Add some configuration to the svm
	svm->set_epsilon(EPSILON);
	svm->set_bias_enabled(true);

	// Create a multiclass svm classifier that consists of several of the previous one
	CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine(
			new CMulticlassOneVsOneStrategy(), (CDotFeatures*) features, svm, labels);
	SG_REF(mc_svm);

	// Train the multiclass machine using the data passed in the constructor
	mc_svm->train();

	// Classify the training examples and show the results
	CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(mc_svm->apply());

	SGVector< int32_t > out_labels = output->get_int_labels();
	SGVector<int32_t>::display_vector(out_labels.vector, out_labels.vlen);

	// Free resources
	SG_UNREF(mc_svm);
	SG_UNREF(svm);
	SG_UNREF(output);
	SG_UNREF(features);
	SG_UNREF(labels);
	SG_UNREF(ffeats_train);
	SG_UNREF(flabels_train);
	SG_UNREF(stream_features);
	SG_UNREF(stream_labels);
	exit_shogun();

	return 0;
}