Exemplo n.º 1
0
CMulticlassLabels* CRelaxedTree::apply_multiclass(CFeatures* data)
{
    if (data != NULL)
    {
        CDenseFeatures<float64_t> *feats = dynamic_cast<CDenseFeatures<float64_t>*>(data);
        REQUIRE(feats != NULL, ("Require non-NULL dense features of float64_t\n"))
        set_features(feats);
    }

    // init kernels for all sub-machines
    for (int32_t i=0; i<m_machines->get_num_elements(); i++)
    {
        CSVM *machine = (CSVM*)m_machines->get_element(i);
        CKernel *kernel = machine->get_kernel();
        CFeatures* lhs = kernel->get_lhs();
        kernel->init(lhs, m_feats);
        SG_UNREF(machine);
        SG_UNREF(kernel);
        SG_UNREF(lhs);
    }

    CMulticlassLabels *lab = new CMulticlassLabels(m_feats->get_num_vectors());
    SG_REF(lab);
    for (int32_t i=0; i < lab->get_num_labels(); ++i)
    {
        lab->set_int_label(i, int32_t(apply_one(i)));
    }

    return lab;
}
int main(int argc, char** argv)
{
	init_shogun_with_defaults();

	/* dense features from matrix */
	CCSVFile* feature_file = new CCSVFile(fname_feats);
	SGMatrix<float64_t> mat=SGMatrix<float64_t>();
	mat.load(feature_file);
	SG_UNREF(feature_file);

	CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
	SG_REF(features);

	/* labels from vector */
	CCSVFile* label_file = new CCSVFile(fname_labels);
	SGVector<float64_t> label_vec;
	label_vec.load(label_file);
	SG_UNREF(label_file);

	CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
	SG_REF(labels);

	SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes());

	// Create ShareBoost Machine
	CShareBoost *machine = new CShareBoost(features, labels, 10);
	SG_REF(machine);

	machine->train();

	SGVector<int32_t> activeset = machine->get_activeset();
	SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows);
	for (int32_t i=0; i < activeset.vlen; ++i)
		SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]);

	CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset());
	SG_REF(subset_fea);
	CMulticlassLabels* output = CLabelsFactory::to_multiclass(machine->apply(subset_fea));

	int32_t correct = 0;
	for (int32_t i=0; i < output->get_num_labels(); ++i)
		if (output->get_int_label(i) == labels->get_int_label(i))
			correct++;
	SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels());

	// Free resources
	SG_UNREF(machine);
	SG_UNREF(output);
	SG_UNREF(subset_fea);
	SG_UNREF(features);
	SG_UNREF(labels);
	exit_shogun();

	return 0;
}
int main(int argc, char** argv)
{
	int32_t num_vectors = 0;
	int32_t num_feats   = 0;

	init_shogun_with_defaults();

	const char*fname_train = "../data/7class_example4_train.dense";
	CStreamingAsciiFile *train_file = new CStreamingAsciiFile(fname_train);
	SG_REF(train_file);

	CStreamingDenseFeatures<float64_t> *stream_features = new CStreamingDenseFeatures<float64_t>(train_file, true, 1024);
	SG_REF(stream_features);

	SGMatrix<float64_t> mat;
	SGVector<float64_t> labvec(1000);

	stream_features->start_parser();
	SGVector< float64_t > vec;
	while (stream_features->get_next_example())
	{
		vec = stream_features->get_vector();
		if (num_feats == 0)
		{
			num_feats = vec.vlen;
			mat = SGMatrix<float64_t>(num_feats, 1000);
		}
		std::copy(vec.vector, vec.vector+vec.vlen, mat.get_column_vector(num_vectors));
		labvec[num_vectors] = stream_features->get_label();
		num_vectors++;
		stream_features->release_example();
	}
	stream_features->end_parser();
	mat.num_cols = num_vectors;
	labvec.vlen = num_vectors;
	
	CMulticlassLabels* labels = new CMulticlassLabels(labvec);
	SG_REF(labels);

	// Create features with the useful values from mat
	CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(mat);
	SG_REF(features);

	SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes());

	// Create ShareBoost Machine
	CShareBoost *machine = new CShareBoost(features, labels, 10);
	SG_REF(machine);

	machine->train();

	SGVector<int32_t> activeset = machine->get_activeset();
	SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows);
	for (int32_t i=0; i < activeset.vlen; ++i)
		SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]);

	CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset());
	SG_REF(subset_fea);
	CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(machine->apply(subset_fea));

	int32_t correct = 0;
	for (int32_t i=0; i < output->get_num_labels(); ++i)
		if (output->get_int_label(i) == labels->get_int_label(i))
			correct++;
	SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels());

	// Free resources
	SG_UNREF(machine);
	SG_UNREF(output);
	SG_UNREF(subset_fea);
	SG_UNREF(features);
	SG_UNREF(labels);
	SG_UNREF(train_file);
	SG_UNREF(stream_features);
	exit_shogun();

	return 0;
}
Exemplo n.º 4
0
SGVector<int32_t> CRelaxedTree::color_label_space(CSVM *svm, SGVector<int32_t> classes)
{
    SGVector<int32_t> mu(classes.vlen);
    CMulticlassLabels *labels = dynamic_cast<CMulticlassLabels *>(m_labels);

    SGVector<float64_t> resp = eval_binary_model_K(svm);
    ASSERT(resp.vlen == labels->get_num_labels())

    SGVector<float64_t> xi_pos_class(classes.vlen), xi_neg_class(classes.vlen);
    SGVector<float64_t> delta_pos(classes.vlen), delta_neg(classes.vlen);

    for (int32_t i=0; i < classes.vlen; ++i)
    {
        // find number of instances from this class
        int32_t ni=0;
        for (int32_t j=0; j < labels->get_num_labels(); ++j)
        {
            if (labels->get_int_label(j) == classes[i])
            {
                ni++;
            }
        }

        xi_pos_class[i] = 0;
        xi_neg_class[i] = 0;
        for (int32_t j=0; j < resp.vlen; ++j)
        {
            if (labels->get_int_label(j) == classes[i])
            {
                xi_pos_class[i] += std::max(0.0, 1 - resp[j]);
                xi_neg_class[i] += std::max(0.0, 1 + resp[j]);
            }
        }

        delta_pos[i] = 1.0/ni * xi_pos_class[i] - float64_t(m_A)/m_svm_C;
        delta_neg[i] = 1.0/ni * xi_neg_class[i] - float64_t(m_A)/m_svm_C;

        if (delta_pos[i] > 0 && delta_neg[i] > 0)
        {
            mu[i] = 0;
        }
        else
        {
            if (delta_pos[i] < delta_neg[i])
                mu[i] = 1;
            else
                mu[i] = -1;
        }

    }

    // enforce balance constraints
    int32_t B_prime = 0;
    for (int32_t i=0; i < mu.vlen; ++i)
        B_prime += mu[i];

    if (B_prime > m_B)
    {
        enforce_balance_constraints_upper(mu, delta_neg, delta_pos, B_prime, xi_neg_class);
    }
    if (B_prime < -m_B)
    {
        enforce_balance_constraints_lower(mu, delta_neg, delta_pos, B_prime, xi_neg_class);
    }

    int32_t npos = 0;
    for (index_t i=0; i < mu.vlen; ++i)
    {
        if (mu[i] == 1)
            npos++;
    }

    if (npos == 0)
    {
        // no positive class
        index_t min_idx = SGVector<float64_t>::arg_min(xi_pos_class.vector, 1, xi_pos_class.vlen);
        mu[min_idx] = 1;
    }

    int32_t nneg = 0;
    for (index_t i=0; i < mu.vlen; ++i)
    {
        if (mu[i] == -1)
            nneg++;
    }

    if (nneg == 0)
    {
        // no negative class
        index_t min_idx = SGVector<float64_t>::arg_min(xi_neg_class.vector, 1, xi_neg_class.vlen);
        if (mu[min_idx] == 1 && (npos == 0 || npos == 1))
        {
            // avoid overwritting the only positive class
            float64_t min_val = 0;
            int32_t i, min_i;
            for (i=0; i < xi_neg_class.vlen; ++i)
            {
                if (mu[i] != 1)
                {
                    min_val = xi_neg_class[i];
                    break;
                }
            }
            min_i = i;
            for (i=i+1; i < xi_neg_class.vlen; ++i)
            {
                if (mu[i] != 1 && xi_neg_class[i] < min_val)
                {
                    min_val = xi_neg_class[i];
                    min_i = i;
                }
            }
            mu[min_i] = -1;
        }
        else
        {
            mu[min_idx] = -1;
        }
    }

    return mu;
}
int main()
{
	init_shogun_with_defaults();

	const char* train_file_name = "../data/7class_example4_train.dense";
	const char* test_file_name = "../data/7class_example4_test.dense";
	CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name);
	SG_REF(train_file);

	CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024);
	SG_REF(train_features);

	CRandomConditionalProbabilityTree *cpt = new CRandomConditionalProbabilityTree();
	cpt->set_num_passes(1);
	cpt->set_features(train_features);
	cpt->train();
	cpt->print_tree();

	CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name);
	SG_REF(test_file);
	CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
	SG_REF(test_features);

	CMulticlassLabels *pred = cpt->apply_multiclass(test_features);
	test_features->reset_stream();
	SG_SPRINT("num_labels = %d\n", pred->get_num_labels());

	SG_UNREF(test_features);
	SG_UNREF(test_file);
	test_file = new CStreamingAsciiFile(test_file_name);
	SG_REF(test_file);
	test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
	SG_REF(test_features);

	CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels());
	test_features->start_parser();
	for (int32_t i=0; i < pred->get_num_labels(); ++i)
	{
		test_features->get_next_example();
		gnd->set_int_label(i, test_features->get_label());
		test_features->release_example();
	}
	test_features->end_parser();

	int32_t n_correct = 0;
	for (index_t i=0; i < pred->get_num_labels(); ++i)
	{
		if (pred->get_int_label(i) == gnd->get_int_label(i))
			n_correct++;
		//SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i));
	}
	SG_SPRINT("\n");

	SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels());

	SG_UNREF(train_features);
	SG_UNREF(test_features);
	SG_UNREF(train_file);
	SG_UNREF(test_file);
	SG_UNREF(cpt);
	SG_UNREF(pred);

	exit_shogun();

	return 0;
}