コード例 #1
0
ファイル: ShareBoost.cpp プロジェクト: Argram/shogun
int32_t CShareBoost::choose_feature()
{
	SGVector<float64_t> l1norm(m_fea.num_rows);
	for (int32_t j=0; j < m_fea.num_rows; ++j)
	{
		if (std::find(&m_activeset[0], &m_activeset[m_activeset.vlen], j) !=
				&m_activeset[m_activeset.vlen])
		{
			l1norm[j] = 0;
		}
		else
		{
			l1norm[j] = 0;
			CMulticlassLabels *lab = dynamic_cast<CMulticlassLabels *>(m_labels);
			for (int32_t k=0; k < m_multiclass_strategy->get_num_classes(); ++k)
			{
				float64_t abssum = 0;
				for (int32_t ii=0; ii < m_fea.num_cols; ++ii)
				{
					abssum += m_fea(j, ii)*(m_rho(k, ii)/m_rho_norm[ii] - 
							(j == lab->get_int_label(ii)));
				}
				l1norm[j] += CMath::abs(abssum);
			}
			l1norm[j] /= m_fea.num_cols;
		}
	}

	return SGVector<float64_t>::arg_max(l1norm.vector, 1, l1norm.vlen);
}
コード例 #2
0
int main(int argc, char** argv)
{
	init_shogun_with_defaults();

	/* dense features from matrix */
	CCSVFile* feature_file = new CCSVFile(fname_feats);
	SGMatrix<float64_t> mat=SGMatrix<float64_t>();
	mat.load(feature_file);
	SG_UNREF(feature_file);

	CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
	SG_REF(features);

	/* labels from vector */
	CCSVFile* label_file = new CCSVFile(fname_labels);
	SGVector<float64_t> label_vec;
	label_vec.load(label_file);
	SG_UNREF(label_file);

	CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
	SG_REF(labels);

	SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes());

	// Create ShareBoost Machine
	CShareBoost *machine = new CShareBoost(features, labels, 10);
	SG_REF(machine);

	machine->train();

	SGVector<int32_t> activeset = machine->get_activeset();
	SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows);
	for (int32_t i=0; i < activeset.vlen; ++i)
		SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]);

	CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset());
	SG_REF(subset_fea);
	CMulticlassLabels* output = CLabelsFactory::to_multiclass(machine->apply(subset_fea));

	int32_t correct = 0;
	for (int32_t i=0; i < output->get_num_labels(); ++i)
		if (output->get_int_label(i) == labels->get_int_label(i))
			correct++;
	SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels());

	// Free resources
	SG_UNREF(machine);
	SG_UNREF(output);
	SG_UNREF(subset_fea);
	SG_UNREF(features);
	SG_UNREF(labels);
	exit_shogun();

	return 0;
}
コード例 #3
0
ファイル: ShareBoost.cpp プロジェクト: Argram/shogun
void CShareBoost::compute_rho()
{
	CMulticlassLabels *lab = dynamic_cast<CMulticlassLabels *>(m_labels);
	for (int32_t i=0; i < m_rho.num_rows; ++i)
	{ // i loop classes
		for (int32_t j=0; j < m_rho.num_cols; ++j)
		{ // j loop samples
			int32_t label = lab->get_int_label(j);

			m_rho(i,j) = CMath::exp((label == i) - m_pred(j, label) + m_pred(j, i));
		}
	}

	// normalize
	for (int32_t j=0; j < m_rho.num_cols; ++j)
	{
		m_rho_norm[j] = 0;
		for (int32_t i=0; i < m_rho.num_rows; ++i)
			m_rho_norm[j] += m_rho(i,j);
	}
}
コード例 #4
0
int main(int argc, char** argv)
{
	int32_t num_vectors = 0;
	int32_t num_feats   = 0;

	init_shogun_with_defaults();

	const char*fname_train = "../data/7class_example4_train.dense";
	CStreamingAsciiFile *train_file = new CStreamingAsciiFile(fname_train);
	SG_REF(train_file);

	CStreamingDenseFeatures<float64_t> *stream_features = new CStreamingDenseFeatures<float64_t>(train_file, true, 1024);
	SG_REF(stream_features);

	SGMatrix<float64_t> mat;
	SGVector<float64_t> labvec(1000);

	stream_features->start_parser();
	SGVector< float64_t > vec;
	while (stream_features->get_next_example())
	{
		vec = stream_features->get_vector();
		if (num_feats == 0)
		{
			num_feats = vec.vlen;
			mat = SGMatrix<float64_t>(num_feats, 1000);
		}
		std::copy(vec.vector, vec.vector+vec.vlen, mat.get_column_vector(num_vectors));
		labvec[num_vectors] = stream_features->get_label();
		num_vectors++;
		stream_features->release_example();
	}
	stream_features->end_parser();
	mat.num_cols = num_vectors;
	labvec.vlen = num_vectors;
	
	CMulticlassLabels* labels = new CMulticlassLabels(labvec);
	SG_REF(labels);

	// Create features with the useful values from mat
	CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(mat);
	SG_REF(features);

	SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes());

	// Create ShareBoost Machine
	CShareBoost *machine = new CShareBoost(features, labels, 10);
	SG_REF(machine);

	machine->train();

	SGVector<int32_t> activeset = machine->get_activeset();
	SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows);
	for (int32_t i=0; i < activeset.vlen; ++i)
		SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]);

	CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset());
	SG_REF(subset_fea);
	CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(machine->apply(subset_fea));

	int32_t correct = 0;
	for (int32_t i=0; i < output->get_num_labels(); ++i)
		if (output->get_int_label(i) == labels->get_int_label(i))
			correct++;
	SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels());

	// Free resources
	SG_UNREF(machine);
	SG_UNREF(output);
	SG_UNREF(subset_fea);
	SG_UNREF(features);
	SG_UNREF(labels);
	SG_UNREF(train_file);
	SG_UNREF(stream_features);
	exit_shogun();

	return 0;
}
コード例 #5
0
ファイル: RelaxedTree.cpp プロジェクト: behollis/muViewBranch
SGVector<int32_t> CRelaxedTree::color_label_space(CSVM *svm, SGVector<int32_t> classes)
{
    SGVector<int32_t> mu(classes.vlen);
    CMulticlassLabels *labels = dynamic_cast<CMulticlassLabels *>(m_labels);

    SGVector<float64_t> resp = eval_binary_model_K(svm);
    ASSERT(resp.vlen == labels->get_num_labels())

    SGVector<float64_t> xi_pos_class(classes.vlen), xi_neg_class(classes.vlen);
    SGVector<float64_t> delta_pos(classes.vlen), delta_neg(classes.vlen);

    for (int32_t i=0; i < classes.vlen; ++i)
    {
        // find number of instances from this class
        int32_t ni=0;
        for (int32_t j=0; j < labels->get_num_labels(); ++j)
        {
            if (labels->get_int_label(j) == classes[i])
            {
                ni++;
            }
        }

        xi_pos_class[i] = 0;
        xi_neg_class[i] = 0;
        for (int32_t j=0; j < resp.vlen; ++j)
        {
            if (labels->get_int_label(j) == classes[i])
            {
                xi_pos_class[i] += std::max(0.0, 1 - resp[j]);
                xi_neg_class[i] += std::max(0.0, 1 + resp[j]);
            }
        }

        delta_pos[i] = 1.0/ni * xi_pos_class[i] - float64_t(m_A)/m_svm_C;
        delta_neg[i] = 1.0/ni * xi_neg_class[i] - float64_t(m_A)/m_svm_C;

        if (delta_pos[i] > 0 && delta_neg[i] > 0)
        {
            mu[i] = 0;
        }
        else
        {
            if (delta_pos[i] < delta_neg[i])
                mu[i] = 1;
            else
                mu[i] = -1;
        }

    }

    // enforce balance constraints
    int32_t B_prime = 0;
    for (int32_t i=0; i < mu.vlen; ++i)
        B_prime += mu[i];

    if (B_prime > m_B)
    {
        enforce_balance_constraints_upper(mu, delta_neg, delta_pos, B_prime, xi_neg_class);
    }
    if (B_prime < -m_B)
    {
        enforce_balance_constraints_lower(mu, delta_neg, delta_pos, B_prime, xi_neg_class);
    }

    int32_t npos = 0;
    for (index_t i=0; i < mu.vlen; ++i)
    {
        if (mu[i] == 1)
            npos++;
    }

    if (npos == 0)
    {
        // no positive class
        index_t min_idx = SGVector<float64_t>::arg_min(xi_pos_class.vector, 1, xi_pos_class.vlen);
        mu[min_idx] = 1;
    }

    int32_t nneg = 0;
    for (index_t i=0; i < mu.vlen; ++i)
    {
        if (mu[i] == -1)
            nneg++;
    }

    if (nneg == 0)
    {
        // no negative class
        index_t min_idx = SGVector<float64_t>::arg_min(xi_neg_class.vector, 1, xi_neg_class.vlen);
        if (mu[min_idx] == 1 && (npos == 0 || npos == 1))
        {
            // avoid overwritting the only positive class
            float64_t min_val = 0;
            int32_t i, min_i;
            for (i=0; i < xi_neg_class.vlen; ++i)
            {
                if (mu[i] != 1)
                {
                    min_val = xi_neg_class[i];
                    break;
                }
            }
            min_i = i;
            for (i=i+1; i < xi_neg_class.vlen; ++i)
            {
                if (mu[i] != 1 && xi_neg_class[i] < min_val)
                {
                    min_val = xi_neg_class[i];
                    min_i = i;
                }
            }
            mu[min_i] = -1;
        }
        else
        {
            mu[min_idx] = -1;
        }
    }

    return mu;
}
コード例 #6
0
ファイル: RelaxedTree.cpp プロジェクト: behollis/muViewBranch
SGVector<int32_t> CRelaxedTree::train_node_with_initialization(const CRelaxedTree::entry_t &mu_entry, SGVector<int32_t> classes, CSVM *svm)
{
    SGVector<int32_t> mu(classes.vlen), prev_mu(classes.vlen);
    mu.zero();
    mu[mu_entry.first.first] = 1;
    mu[mu_entry.first.second] = -1;

    SGVector<int32_t> long_mu(m_num_classes);
    svm->set_C(m_svm_C, m_svm_C);
    svm->set_epsilon(m_svm_epsilon);

    for (int32_t iiter=0; iiter < m_max_num_iter; ++iiter)
    {
        long_mu.zero();
        for (int32_t i=0; i < classes.vlen; ++i)
        {
            if (mu[i] == 1)
                long_mu[classes[i]] = 1;
            else if (mu[i] == -1)
                long_mu[classes[i]] = -1;
        }

        SGVector<int32_t> subset(m_feats->get_num_vectors());
        SGVector<float64_t> binlab(m_feats->get_num_vectors());
        int32_t k=0;

        CMulticlassLabels *labs = dynamic_cast<CMulticlassLabels *>(m_labels);
        for (int32_t i=0; i < binlab.vlen; ++i)
        {
            int32_t lab = labs->get_int_label(i);
            binlab[i] = long_mu[lab];
            if (long_mu[lab] != 0)
                subset[k++] = i;
        }

        subset.vlen = k;

        CBinaryLabels *binary_labels = new CBinaryLabels(binlab);
        SG_REF(binary_labels);
        binary_labels->add_subset(subset);
        m_feats->add_subset(subset);

        CKernel *kernel = (CKernel *)m_kernel->shallow_copy();
        kernel->init(m_feats, m_feats);
        svm->set_kernel(kernel);
        svm->set_labels(binary_labels);
        svm->train();

        binary_labels->remove_subset();
        m_feats->remove_subset();
        SG_UNREF(binary_labels);

        std::copy(&mu[0], &mu[mu.vlen], &prev_mu[0]);

        mu = color_label_space(svm, classes);

        bool bbreak = true;
        for (int32_t i=0; i < mu.vlen; ++i)
        {
            if (mu[i] != prev_mu[i])
            {
                bbreak = false;
                break;
            }
        }

        if (bbreak)
            break;
    }

    return mu;
}
コード例 #7
0
int main()
{
	init_shogun_with_defaults();

	const char* train_file_name = "../data/7class_example4_train.dense";
	const char* test_file_name = "../data/7class_example4_test.dense";
	CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name);
	SG_REF(train_file);

	CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024);
	SG_REF(train_features);

	CRandomConditionalProbabilityTree *cpt = new CRandomConditionalProbabilityTree();
	cpt->set_num_passes(1);
	cpt->set_features(train_features);
	cpt->train();
	cpt->print_tree();

	CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name);
	SG_REF(test_file);
	CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
	SG_REF(test_features);

	CMulticlassLabels *pred = cpt->apply_multiclass(test_features);
	test_features->reset_stream();
	SG_SPRINT("num_labels = %d\n", pred->get_num_labels());

	SG_UNREF(test_features);
	SG_UNREF(test_file);
	test_file = new CStreamingAsciiFile(test_file_name);
	SG_REF(test_file);
	test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
	SG_REF(test_features);

	CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels());
	test_features->start_parser();
	for (int32_t i=0; i < pred->get_num_labels(); ++i)
	{
		test_features->get_next_example();
		gnd->set_int_label(i, test_features->get_label());
		test_features->release_example();
	}
	test_features->end_parser();

	int32_t n_correct = 0;
	for (index_t i=0; i < pred->get_num_labels(); ++i)
	{
		if (pred->get_int_label(i) == gnd->get_int_label(i))
			n_correct++;
		//SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i));
	}
	SG_SPRINT("\n");

	SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels());

	SG_UNREF(train_features);
	SG_UNREF(test_features);
	SG_UNREF(train_file);
	SG_UNREF(test_file);
	SG_UNREF(cpt);
	SG_UNREF(pred);

	exit_shogun();

	return 0;
}