void get_samples() { ASSERT(samples.size() == fetchers.size()); for (size_t i = 0; i < samples.size(); ++i) { CDenseFeatures<float64_t> *ptr = (CDenseFeatures<float64_t>*)fetchers[i]->fetch(samples[i].ptr); ptr->get_feature_matrix().display_matrix(); } }
int main() { const int32_t feature_cache=0; const int32_t kernel_cache=0; const float64_t rbf_width=10; const float64_t svm_C=10; const float64_t svm_eps=0.001; init_shogun(); gen_rand_data(); // create train labels CLabels* labels=new CLabels(SGVector<float64_t>(lab, NUM)); // create train features CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(feature_cache); SG_REF(features); features->set_feature_matrix(feat); // create gaussian kernel CGaussianKernel* kernel = new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kernel); kernel->init(features, features); // create svm via libsvm and train CLibSVM* svm = new CLibSVM(svm_C, kernel, labels); SG_REF(svm); svm->set_epsilon(svm_eps); svm->train(); printf("num_sv:%d b:%f\n", svm->get_num_support_vectors(), svm->get_bias()); // classify + display output CLabels* out_labels=svm->apply(); for (int32_t i=0; i<NUM; i++) printf("out[%d]=%f\n", i, out_labels->get_label(i)); SG_UNREF(out_labels); SG_UNREF(kernel); SG_UNREF(features); SG_UNREF(svm); exit_shogun(); return 0; }
SGMatrix<float64_t> CDimensionReductionPreprocessor::apply_to_feature_matrix(CFeatures* features) { if (m_converter) { m_converter->set_target_dim(m_target_dim); CDenseFeatures<float64_t>* embedding = m_converter->embed(features); SGMatrix<float64_t> embedding_feature_matrix = embedding->steal_feature_matrix(); ((CDenseFeatures<float64_t>*)features)->set_feature_matrix(embedding_feature_matrix); delete embedding; return embedding_feature_matrix; } else { SG_WARNING("Converter to process was not set.\n") return ((CDenseFeatures<float64_t>*)features)->get_feature_matrix(); } }
template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2) { ASSERT(df); ASSERT(df->get_feature_type() == get_feature_type()); ASSERT(df->get_feature_class() == get_feature_class()); CDenseFeatures<ST>* sf = (CDenseFeatures<ST>*) df; int32_t len1, len2; bool free1, free2; ST* vec1 = get_feature_vector(vec_idx1, len1, free1); ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2); float64_t result = SGVector<ST>::dot(vec1, vec2, len1); free_feature_vector(vec1, vec_idx1, free1); sf->free_feature_vector(vec2, vec_idx2, free2); return result; }
int main(int argc,char *argv[]) { init_shogun(&print_message,&print_message,&print_message);//initialising shogun without giving arguments shogun wont be able to print int32_t x_n=4,x_d=2;//X dimensions : x_n for no of datapoints and x_d for dimensionality of data SGMatrix<float64_t> fmatrix(x_d,x_n); SG_SPRINT("\nTEST 1:\n\n"); /*Initialising Feature Matrix */ for (int i=0; i<x_n*x_d; i++) fmatrix.matrix[i] = i+1; SG_SPRINT("FEATURE MATRIX :\n"); CMath::display_matrix(fmatrix.matrix,x_d,x_n); CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(fmatrix); SG_REF(features); /*Creating random labels */ CLabels* labels=new CLabels(x_n); // create labels, two classes labels->set_label(0,1); labels->set_label(1,-1); labels->set_label(2,1); labels->set_label(3,1); SG_REF(labels); /*Working with Newton SVM */ float64_t lambda=1.0; int32_t iter=20; CNewtonSVM *nsvm = new CNewtonSVM(lambda,features,labels,iter); SG_REF(nsvm); nsvm->train(); SG_UNREF(labels); SG_UNREF(nsvm); SG_SPRINT("TEST 2:\n\n"); x_n=5; x_d=3; SGMatrix<float64_t> fmatrix2(x_d,x_n); for (int i=0; i<x_n*x_d; i++) fmatrix2.matrix[i] = i+1; SG_SPRINT("FEATURE MATRIX :\n"); CMath::display_matrix(fmatrix2.matrix,x_d,x_n); features->set_feature_matrix(fmatrix2); SG_REF(features); /*Creating random labels */ CLabels* labels2=new CLabels(x_n); // create labels, two classes labels2->set_label(0,1); labels2->set_label(1,-1); labels2->set_label(2,1); labels2->set_label(3,1); labels2->set_label(4,-1); SG_REF(labels2); /*Working with Newton SVM */ lambda=1.0; iter=20; CNewtonSVM *nsvm2 = new CNewtonSVM(lambda,features,labels2,iter); SG_REF(nsvm2); nsvm2->train(); SG_UNREF(labels2); SG_UNREF(nsvm2); SG_UNREF(features); exit_shogun(); return 0; }
void CLMNN::train(SGMatrix<float64_t> init_transform) { SG_DEBUG("Entering CLMNN::train().\n") // Check training data and arguments, initializing, if necessary, init_transform CLMNNImpl::check_training_setup(m_features, m_labels, init_transform); // Initializations // cast is safe, check_training_setup ensures features are dense CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(m_features); CMulticlassLabels* y = CLabelsFactory::to_multiclass(m_labels); SG_DEBUG("%d input vectors with %d dimensions.\n", x->get_num_vectors(), x->get_num_features()); // Use Eigen matrix for the linear transform L. The Mahalanobis distance is L^T*L MatrixXd L = Map<MatrixXd>(init_transform.matrix, init_transform.num_rows, init_transform.num_cols); // Compute target or genuine neighbours SG_DEBUG("Finding target nearest neighbors.\n") SGMatrix<index_t> target_nn = CLMNNImpl::find_target_nn(x, y, m_k); // Initialize (sub-)gradient SG_DEBUG("Summing outer products for (sub-)gradient initialization.\n") MatrixXd gradient = (1-m_regularization)*CLMNNImpl::sum_outer_products(x, target_nn); // Value of the objective function at every iteration SGVector<float64_t> obj(m_maxiter); // The step size is modified depending on how the objective changes, leave the // step size member unchanged and use a local one float64_t stepsize = m_stepsize; // Last active set of impostors computed exactly, current and previous impostors sets ImpostorsSetType exact_impostors, cur_impostors, prev_impostors; // Iteration counter uint32_t iter = 0; // Criterion for termination bool stop = false; // Make space for the training statistics m_statistics->resize(m_maxiter); // Main loop while (!stop) { SG_PROGRESS(iter, 0, m_maxiter) // Find current set of impostors SG_DEBUG("Finding impostors.\n") cur_impostors = CLMNNImpl::find_impostors(x,y,L,target_nn,iter,m_correction); SG_DEBUG("Found %d impostors in the current set.\n", cur_impostors.size()) // (Sub-) gradient computation SG_DEBUG("Updating gradient.\n") CLMNNImpl::update_gradient(x, gradient, cur_impostors, prev_impostors, m_regularization); // Take gradient step SG_DEBUG("Taking gradient step.\n") CLMNNImpl::gradient_step(L, gradient, stepsize, m_diagonal); // Compute the objective, trace of Mahalanobis distance matrix (L squared) times the gradient // plus the number of current impostors to account for the margin SG_DEBUG("Computing objective.\n") obj[iter] = TRACE(L.transpose()*L,gradient) + m_regularization*cur_impostors.size(); // Correct step size CLMNNImpl::correct_stepsize(stepsize, obj, iter); // Check termination criterion stop = CLMNNImpl::check_termination(stepsize, obj, iter, m_maxiter, m_stepsize_threshold, m_obj_threshold); // Update iteration counter iter = iter + 1; // Update previous set of impostors prev_impostors = cur_impostors; // Store statistics for this iteration m_statistics->set(iter-1, obj[iter-1], stepsize, cur_impostors.size()); SG_DEBUG("iteration=%d, objective=%.4f, #impostors=%4d, stepsize=%.4E\n", iter, obj[iter-1], cur_impostors.size(), stepsize) } // Truncate statistics in case convergence was reached in less than maxiter m_statistics->resize(iter); // Store the transformation found in the class attribute int32_t nfeats = x->get_num_features(); float64_t* cloned_data = SGMatrix<float64_t>::clone_matrix(L.data(), nfeats, nfeats); m_linear_transform = SGMatrix<float64_t>(cloned_data, nfeats, nfeats); SG_DEBUG("Leaving CLMNN::train().\n") }
void CKMeans::clustknb(bool use_old_mus, float64_t *mus_start) { ASSERT(distance && distance->get_feature_type()==F_DREAL); CDenseFeatures<float64_t>* lhs = (CDenseFeatures<float64_t>*) distance->get_lhs(); ASSERT(lhs && lhs->get_num_features()>0 && lhs->get_num_vectors()>0); int32_t XSize=lhs->get_num_vectors(); dimensions=lhs->get_num_features(); int32_t i, changed=1; const int32_t XDimk=dimensions*k; int32_t iter=0; R=SGVector<float64_t>(k); mus=SGMatrix<float64_t>(dimensions, k); int32_t *ClList=SG_CALLOC(int32_t, XSize); float64_t *weights_set=SG_CALLOC(float64_t, k); float64_t *dists=SG_CALLOC(float64_t, k*XSize); ///replace rhs feature vectors CDenseFeatures<float64_t>* rhs_mus = new CDenseFeatures<float64_t>(0); CFeatures* rhs_cache = distance->replace_rhs(rhs_mus); int32_t vlen=0; bool vfree=false; float64_t* vec=NULL; /* ClList=zeros(XSize,1) ; */ memset(ClList, 0, sizeof(int32_t)*XSize); /* weights_set=zeros(k,1) ; */ memset(weights_set, 0, sizeof(float64_t)*k); /* cluster_centers=zeros(dimensions, k) ; */ memset(mus.matrix, 0, sizeof(float64_t)*XDimk); if (!use_old_mus) { for (i=0; i<XSize; i++) { const int32_t Cl=CMath::random(0, k-1); int32_t j; float64_t weight=Weights.vector[i]; weights_set[Cl]+=weight; ClList[i]=Cl; vec=lhs->get_feature_vector(i, vlen, vfree); for (j=0; j<dimensions; j++) mus.matrix[Cl*dimensions+j] += weight*vec[j]; lhs->free_feature_vector(vec, i, vfree); } for (i=0; i<k; i++) { int32_t j; if (weights_set[i]!=0.0) for (j=0; j<dimensions; j++) mus.matrix[i*dimensions+j] /= weights_set[i]; } } else { ASSERT(mus_start); /// set rhs to mus_start rhs_mus->copy_feature_matrix(SGMatrix<float64_t>(mus_start,dimensions,k)); float64_t* p_dists=dists; for(int32_t idx=0;idx<XSize;idx++,p_dists+=k) distances_rhs(p_dists,0,k,idx); p_dists=NULL; for (i=0; i<XSize; i++) { float64_t mini=dists[i*k]; int32_t Cl = 0, j; for (j=1; j<k; j++) { if (dists[i*k+j]<mini) { Cl=j; mini=dists[i*k+j]; } } ClList[i]=Cl; } /* Compute the sum of all points belonging to a cluster * and count the points */ for (i=0; i<XSize; i++) { const int32_t Cl = ClList[i]; float64_t weight=Weights.vector[i]; weights_set[Cl]+=weight; #ifndef MUSRECALC vec=lhs->get_feature_vector(i, vlen, vfree); for (j=0; j<dimensions; j++) mus.matrix[Cl*dimensions+j] += weight*vec[j]; lhs->free_feature_vector(vec, i, vfree); #endif } #ifndef MUSRECALC /* normalization to get the mean */ for (i=0; i<k; i++) { if (weights_set[i]!=0.0) { int32_t j; for (j=0; j<dimensions; j++) mus.matrix[i*dimensions+j] /= weights_set[i]; } } #endif } while (changed && (iter<max_iter)) { iter++; if (iter==max_iter-1) SG_WARNING("kmeans clustering changed throughout %d iterations stopping...\n", max_iter-1); if (iter%1000 == 0) SG_INFO("Iteration[%d/%d]: Assignment of %i patterns changed.\n", iter, max_iter, changed); changed=0; #ifdef MUSRECALC /* mus=zeros(dimensions, k) ; */ memset(mus.matrix, 0, sizeof(float64_t)*XDimk); for (i=0; i<XSize; i++) { int32_t j; int32_t Cl=ClList[i]; float64_t weight=Weights.vector[i]; vec=lhs->get_feature_vector(i, vlen, vfree); for (j=0; j<dimensions; j++) mus.matrix[Cl*dimensions+j] += weight*vec[j]; lhs->free_feature_vector(vec, i, vfree); } for (i=0; i<k; i++) { int32_t j; if (weights_set[i]!=0.0) for (j=0; j<dimensions; j++) mus.matrix[i*dimensions+j] /= weights_set[i]; } #endif ///update rhs rhs_mus->copy_feature_matrix(mus); for (i=0; i<XSize; i++) { /* ks=ceil(rand(1,XSize)*XSize) ; */ const int32_t Pat= CMath::random(0, XSize-1); const int32_t ClList_Pat=ClList[Pat]; int32_t imini, j; float64_t mini, weight; weight=Weights.vector[Pat]; /* compute the distance of this point to all centers */ for(int32_t idx_k=0;idx_k<k;idx_k++) dists[idx_k]=distance->distance(Pat,idx_k); /* [mini,imini]=min(dists(:,i)) ; */ imini=0 ; mini=dists[0]; for (j=1; j<k; j++) if (dists[j]<mini) { mini=dists[j]; imini=j; } if (imini!=ClList_Pat) { changed= changed + 1; /* weights_set(imini) = weights_set(imini) + weight ; */ weights_set[imini]+= weight; /* weights_set(j) = weights_set(j) - weight ; */ weights_set[ClList_Pat]-= weight; vec=lhs->get_feature_vector(Pat, vlen, vfree); for (j=0; j<dimensions; j++) { mus.matrix[imini*dimensions+j]-=(vec[j] -mus.matrix[imini*dimensions+j]) *(weight/weights_set[imini]); } lhs->free_feature_vector(vec, Pat, vfree); /* mu_new = mu_old - (x - mu_old)/(n-1) */ /* if weights_set(j)~=0 */ if (weights_set[ClList_Pat]!=0.0) { vec=lhs->get_feature_vector(Pat, vlen, vfree); for (j=0; j<dimensions; j++) { mus.matrix[ClList_Pat*dimensions+j]-= (vec[j] -mus.matrix[ClList_Pat *dimensions+j]) *(weight/weights_set[ClList_Pat]); } lhs->free_feature_vector(vec, Pat, vfree); } else /* mus(:,j)=zeros(dimensions,1) ; */ for (j=0; j<dimensions; j++) mus.matrix[ClList_Pat*dimensions+j]=0; /* ClList(i)= imini ; */ ClList[Pat] = imini; } } } /* compute the ,,variances'' of the clusters */ for (i=0; i<k; i++) { float64_t rmin1=0; float64_t rmin2=0; bool first_round=true; for (int32_t j=0; j<k; j++) { if (j!=i) { int32_t l; float64_t dist = 0; for (l=0; l<dimensions; l++) { dist+=CMath::sq( mus.matrix[i*dimensions+l] -mus.matrix[j*dimensions+l]); } if (first_round) { rmin1=dist; rmin2=dist; first_round=false; } else { if ((dist<rmin2) && (dist>=rmin1)) rmin2=dist; if (dist<rmin1) { rmin2=rmin1; rmin1=dist; } } } } R.vector[i]=(0.7*CMath::sqrt(rmin1)+0.3*CMath::sqrt(rmin2)); } distance->replace_rhs(rhs_cache); delete rhs_mus; SG_FREE(ClList); SG_FREE(weights_set); SG_FREE(dists); SG_UNREF(lhs); }
int main(int, char*[]) { init_shogun_with_defaults(); #ifdef HAVE_LAPACK // for CDataGenerator::generate_gaussian() // initialize the random number generator with a fixed seed, for repeatability CMath::init_random(10); // Prepare the training data const int num_features = 20; const int num_classes = 4; const int num_examples_per_class = 20; SGMatrix<float64_t> X; try { X = CDataGenerator::generate_gaussians( num_examples_per_class,num_classes,num_features); } catch (ShogunException e) { // out of memory SG_SPRINT(e.get_exception_string()); return 0; } CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X); // Create a deep autoencoder CNeuralLayers* layers = new CNeuralLayers(); layers ->input(num_features) ->rectified_linear(10)->rectified_linear(5)->rectified_linear(10) ->linear(num_features); CDeepAutoencoder* ae = new CDeepAutoencoder(layers->done()); // uncomment this line to enable info logging // ae->io->set_loglevel(MSG_INFO); // pre-train ae->pt_epsilon.set_const(1e-6); ae->pre_train(features); // fine-tune ae->train(features); // reconstruct the data CDenseFeatures<float64_t>* reconstructions = ae->reconstruct(features); SGMatrix<float64_t> X_reconstructed = reconstructions->get_feature_matrix(); // find the average difference between the data and the reconstructions float64_t avg_diff = 0; int32_t N = X.num_rows*X.num_cols; for (int32_t i=0; i<N; i++) avg_diff += CMath::abs(X[i]-X_reconstructed[i])/CMath::abs(X[i]); avg_diff /= N; SG_SINFO("Average difference = %f %\n", avg_diff*100); // Clean up SG_UNREF(ae); SG_UNREF(layers); SG_UNREF(features); SG_UNREF(reconstructions); #endif exit_shogun(); return 0; }
CLabels* CAUCKernel::setup_auc_maximization(CLabels* labels) { SG_INFO( "setting up AUC maximization\n") ; ASSERT(labels); ASSERT(labels->get_label_type() == LT_BINARY); labels->ensure_valid(); // get the original labels SGVector<int32_t> int_labels=((CBinaryLabels*) labels)->get_int_labels(); ASSERT(subkernel->get_num_vec_rhs()==int_labels.vlen); // count positive and negative int32_t num_pos=0; int32_t num_neg=0; for (int32_t i=0; i<int_labels.vlen; i++) { if (int_labels.vector[i]==1) num_pos++; else num_neg++; } // create AUC features and labels (alternate labels) int32_t num_auc = num_pos*num_neg; SG_INFO("num_pos: %i num_neg: %i num_auc: %i\n", num_pos, num_neg, num_auc); SGMatrix<uint16_t> features_auc(2,num_auc); int32_t* labels_auc = SG_MALLOC(int32_t, num_auc); int32_t n=0 ; for (int32_t i=0; i<int_labels.vlen; i++) { if (int_labels.vector[i]!=1) continue; for (int32_t j=0; j<int_labels.vlen; j++) { if (int_labels.vector[j]!=-1) continue; // create about as many positively as negatively labeled examples if (n%2==0) { features_auc.matrix[n*2]=i; features_auc.matrix[n*2+1]=j; labels_auc[n]=1; } else { features_auc.matrix[n*2]=j; features_auc.matrix[n*2+1]=i; labels_auc[n]=-1; } n++; ASSERT(n<=num_auc); } } // create label object and attach it to svm CBinaryLabels* lab_auc = new CBinaryLabels(num_auc); lab_auc->set_int_labels(SGVector<int32_t>(labels_auc, num_auc, false)); SG_REF(lab_auc); // create feature object CDenseFeatures<uint16_t>* f = new CDenseFeatures<uint16_t>(0); f->set_feature_matrix(features_auc); // create AUC kernel and attach the features init(f,f); SG_FREE(labels_auc); return lab_auc; }