CMulticlassLabels* CRelaxedTree::apply_multiclass(CFeatures* data) { if (data != NULL) { CDenseFeatures<float64_t> *feats = dynamic_cast<CDenseFeatures<float64_t>*>(data); REQUIRE(feats != NULL, ("Require non-NULL dense features of float64_t\n")) set_features(feats); } // init kernels for all sub-machines for (int32_t i=0; i<m_machines->get_num_elements(); i++) { CSVM *machine = (CSVM*)m_machines->get_element(i); CKernel *kernel = machine->get_kernel(); CFeatures* lhs = kernel->get_lhs(); kernel->init(lhs, m_feats); SG_UNREF(machine); SG_UNREF(kernel); SG_UNREF(lhs); } CMulticlassLabels *lab = new CMulticlassLabels(m_feats->get_num_vectors()); SG_REF(lab); for (int32_t i=0; i < lab->get_num_labels(); ++i) { lab->set_int_label(i, int32_t(apply_one(i))); } return lab; }
int main(int argc, char** argv) { init_shogun_with_defaults(); /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes()); // Create ShareBoost Machine CShareBoost *machine = new CShareBoost(features, labels, 10); SG_REF(machine); machine->train(); SGVector<int32_t> activeset = machine->get_activeset(); SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows); for (int32_t i=0; i < activeset.vlen; ++i) SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]); CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset()); SG_REF(subset_fea); CMulticlassLabels* output = CLabelsFactory::to_multiclass(machine->apply(subset_fea)); int32_t correct = 0; for (int32_t i=0; i < output->get_num_labels(); ++i) if (output->get_int_label(i) == labels->get_int_label(i)) correct++; SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels()); // Free resources SG_UNREF(machine); SG_UNREF(output); SG_UNREF(subset_fea); SG_UNREF(features); SG_UNREF(labels); exit_shogun(); return 0; }
int main(int argc, char** argv) { int32_t num_vectors = 0; int32_t num_feats = 0; init_shogun_with_defaults(); const char*fname_train = "../data/7class_example4_train.dense"; CStreamingAsciiFile *train_file = new CStreamingAsciiFile(fname_train); SG_REF(train_file); CStreamingDenseFeatures<float64_t> *stream_features = new CStreamingDenseFeatures<float64_t>(train_file, true, 1024); SG_REF(stream_features); SGMatrix<float64_t> mat; SGVector<float64_t> labvec(1000); stream_features->start_parser(); SGVector< float64_t > vec; while (stream_features->get_next_example()) { vec = stream_features->get_vector(); if (num_feats == 0) { num_feats = vec.vlen; mat = SGMatrix<float64_t>(num_feats, 1000); } std::copy(vec.vector, vec.vector+vec.vlen, mat.get_column_vector(num_vectors)); labvec[num_vectors] = stream_features->get_label(); num_vectors++; stream_features->release_example(); } stream_features->end_parser(); mat.num_cols = num_vectors; labvec.vlen = num_vectors; CMulticlassLabels* labels = new CMulticlassLabels(labvec); SG_REF(labels); // Create features with the useful values from mat CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(mat); SG_REF(features); SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes()); // Create ShareBoost Machine CShareBoost *machine = new CShareBoost(features, labels, 10); SG_REF(machine); machine->train(); SGVector<int32_t> activeset = machine->get_activeset(); SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows); for (int32_t i=0; i < activeset.vlen; ++i) SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]); CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset()); SG_REF(subset_fea); CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(machine->apply(subset_fea)); int32_t correct = 0; for (int32_t i=0; i < output->get_num_labels(); ++i) if (output->get_int_label(i) == labels->get_int_label(i)) correct++; SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels()); // Free resources SG_UNREF(machine); SG_UNREF(output); SG_UNREF(subset_fea); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(train_file); SG_UNREF(stream_features); exit_shogun(); return 0; }
SGVector<int32_t> CRelaxedTree::color_label_space(CSVM *svm, SGVector<int32_t> classes) { SGVector<int32_t> mu(classes.vlen); CMulticlassLabels *labels = dynamic_cast<CMulticlassLabels *>(m_labels); SGVector<float64_t> resp = eval_binary_model_K(svm); ASSERT(resp.vlen == labels->get_num_labels()) SGVector<float64_t> xi_pos_class(classes.vlen), xi_neg_class(classes.vlen); SGVector<float64_t> delta_pos(classes.vlen), delta_neg(classes.vlen); for (int32_t i=0; i < classes.vlen; ++i) { // find number of instances from this class int32_t ni=0; for (int32_t j=0; j < labels->get_num_labels(); ++j) { if (labels->get_int_label(j) == classes[i]) { ni++; } } xi_pos_class[i] = 0; xi_neg_class[i] = 0; for (int32_t j=0; j < resp.vlen; ++j) { if (labels->get_int_label(j) == classes[i]) { xi_pos_class[i] += std::max(0.0, 1 - resp[j]); xi_neg_class[i] += std::max(0.0, 1 + resp[j]); } } delta_pos[i] = 1.0/ni * xi_pos_class[i] - float64_t(m_A)/m_svm_C; delta_neg[i] = 1.0/ni * xi_neg_class[i] - float64_t(m_A)/m_svm_C; if (delta_pos[i] > 0 && delta_neg[i] > 0) { mu[i] = 0; } else { if (delta_pos[i] < delta_neg[i]) mu[i] = 1; else mu[i] = -1; } } // enforce balance constraints int32_t B_prime = 0; for (int32_t i=0; i < mu.vlen; ++i) B_prime += mu[i]; if (B_prime > m_B) { enforce_balance_constraints_upper(mu, delta_neg, delta_pos, B_prime, xi_neg_class); } if (B_prime < -m_B) { enforce_balance_constraints_lower(mu, delta_neg, delta_pos, B_prime, xi_neg_class); } int32_t npos = 0; for (index_t i=0; i < mu.vlen; ++i) { if (mu[i] == 1) npos++; } if (npos == 0) { // no positive class index_t min_idx = SGVector<float64_t>::arg_min(xi_pos_class.vector, 1, xi_pos_class.vlen); mu[min_idx] = 1; } int32_t nneg = 0; for (index_t i=0; i < mu.vlen; ++i) { if (mu[i] == -1) nneg++; } if (nneg == 0) { // no negative class index_t min_idx = SGVector<float64_t>::arg_min(xi_neg_class.vector, 1, xi_neg_class.vlen); if (mu[min_idx] == 1 && (npos == 0 || npos == 1)) { // avoid overwritting the only positive class float64_t min_val = 0; int32_t i, min_i; for (i=0; i < xi_neg_class.vlen; ++i) { if (mu[i] != 1) { min_val = xi_neg_class[i]; break; } } min_i = i; for (i=i+1; i < xi_neg_class.vlen; ++i) { if (mu[i] != 1 && xi_neg_class[i] < min_val) { min_val = xi_neg_class[i]; min_i = i; } } mu[min_i] = -1; } else { mu[min_idx] = -1; } } return mu; }
int main() { init_shogun_with_defaults(); const char* train_file_name = "../data/7class_example4_train.dense"; const char* test_file_name = "../data/7class_example4_test.dense"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024); SG_REF(train_features); CRandomConditionalProbabilityTree *cpt = new CRandomConditionalProbabilityTree(); cpt->set_num_passes(1); cpt->set_features(train_features); cpt->train(); cpt->print_tree(); CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *pred = cpt->apply_multiclass(test_features); test_features->reset_stream(); SG_SPRINT("num_labels = %d\n", pred->get_num_labels()); SG_UNREF(test_features); SG_UNREF(test_file); test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels()); test_features->start_parser(); for (int32_t i=0; i < pred->get_num_labels(); ++i) { test_features->get_next_example(); gnd->set_int_label(i, test_features->get_label()); test_features->release_example(); } test_features->end_parser(); int32_t n_correct = 0; for (index_t i=0; i < pred->get_num_labels(); ++i) { if (pred->get_int_label(i) == gnd->get_int_label(i)) n_correct++; //SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i)); } SG_SPRINT("\n"); SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels()); SG_UNREF(train_features); SG_UNREF(test_features); SG_UNREF(train_file); SG_UNREF(test_file); SG_UNREF(cpt); SG_UNREF(pred); exit_shogun(); return 0; }