void CLMNN::train(SGMatrix<float64_t> init_transform) { SG_DEBUG("Entering CLMNN::train().\n") // Check training data and arguments, initializing, if necessary, init_transform CLMNNImpl::check_training_setup(m_features, m_labels, init_transform); // Initializations // cast is safe, check_training_setup ensures features are dense CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(m_features); CMulticlassLabels* y = CLabelsFactory::to_multiclass(m_labels); SG_DEBUG("%d input vectors with %d dimensions.\n", x->get_num_vectors(), x->get_num_features()); // Use Eigen matrix for the linear transform L. The Mahalanobis distance is L^T*L MatrixXd L = Map<MatrixXd>(init_transform.matrix, init_transform.num_rows, init_transform.num_cols); // Compute target or genuine neighbours SG_DEBUG("Finding target nearest neighbors.\n") SGMatrix<index_t> target_nn = CLMNNImpl::find_target_nn(x, y, m_k); // Initialize (sub-)gradient SG_DEBUG("Summing outer products for (sub-)gradient initialization.\n") MatrixXd gradient = (1-m_regularization)*CLMNNImpl::sum_outer_products(x, target_nn); // Value of the objective function at every iteration SGVector<float64_t> obj(m_maxiter); // The step size is modified depending on how the objective changes, leave the // step size member unchanged and use a local one float64_t stepsize = m_stepsize; // Last active set of impostors computed exactly, current and previous impostors sets ImpostorsSetType exact_impostors, cur_impostors, prev_impostors; // Iteration counter uint32_t iter = 0; // Criterion for termination bool stop = false; // Make space for the training statistics m_statistics->resize(m_maxiter); // Main loop while (!stop) { SG_PROGRESS(iter, 0, m_maxiter) // Find current set of impostors SG_DEBUG("Finding impostors.\n") cur_impostors = CLMNNImpl::find_impostors(x,y,L,target_nn,iter,m_correction); SG_DEBUG("Found %d impostors in the current set.\n", cur_impostors.size()) // (Sub-) gradient computation SG_DEBUG("Updating gradient.\n") CLMNNImpl::update_gradient(x, gradient, cur_impostors, prev_impostors, m_regularization); // Take gradient step SG_DEBUG("Taking gradient step.\n") CLMNNImpl::gradient_step(L, gradient, stepsize, m_diagonal); // Compute the objective, trace of Mahalanobis distance matrix (L squared) times the gradient // plus the number of current impostors to account for the margin SG_DEBUG("Computing objective.\n") obj[iter] = TRACE(L.transpose()*L,gradient) + m_regularization*cur_impostors.size(); // Correct step size CLMNNImpl::correct_stepsize(stepsize, obj, iter); // Check termination criterion stop = CLMNNImpl::check_termination(stepsize, obj, iter, m_maxiter, m_stepsize_threshold, m_obj_threshold); // Update iteration counter iter = iter + 1; // Update previous set of impostors prev_impostors = cur_impostors; // Store statistics for this iteration m_statistics->set(iter-1, obj[iter-1], stepsize, cur_impostors.size()); SG_DEBUG("iteration=%d, objective=%.4f, #impostors=%4d, stepsize=%.4E\n", iter, obj[iter-1], cur_impostors.size(), stepsize) } // Truncate statistics in case convergence was reached in less than maxiter m_statistics->resize(iter); // Store the transformation found in the class attribute int32_t nfeats = x->get_num_features(); float64_t* cloned_data = SGMatrix<float64_t>::clone_matrix(L.data(), nfeats, nfeats); m_linear_transform = SGMatrix<float64_t>(cloned_data, nfeats, nfeats); SG_DEBUG("Leaving CLMNN::train().\n") }
void CKMeans::clustknb(bool use_old_mus, float64_t *mus_start) { ASSERT(distance && distance->get_feature_type()==F_DREAL); CDenseFeatures<float64_t>* lhs = (CDenseFeatures<float64_t>*) distance->get_lhs(); ASSERT(lhs && lhs->get_num_features()>0 && lhs->get_num_vectors()>0); int32_t XSize=lhs->get_num_vectors(); dimensions=lhs->get_num_features(); int32_t i, changed=1; const int32_t XDimk=dimensions*k; int32_t iter=0; R=SGVector<float64_t>(k); mus=SGMatrix<float64_t>(dimensions, k); int32_t *ClList=SG_CALLOC(int32_t, XSize); float64_t *weights_set=SG_CALLOC(float64_t, k); float64_t *dists=SG_CALLOC(float64_t, k*XSize); ///replace rhs feature vectors CDenseFeatures<float64_t>* rhs_mus = new CDenseFeatures<float64_t>(0); CFeatures* rhs_cache = distance->replace_rhs(rhs_mus); int32_t vlen=0; bool vfree=false; float64_t* vec=NULL; /* ClList=zeros(XSize,1) ; */ memset(ClList, 0, sizeof(int32_t)*XSize); /* weights_set=zeros(k,1) ; */ memset(weights_set, 0, sizeof(float64_t)*k); /* cluster_centers=zeros(dimensions, k) ; */ memset(mus.matrix, 0, sizeof(float64_t)*XDimk); if (!use_old_mus) { for (i=0; i<XSize; i++) { const int32_t Cl=CMath::random(0, k-1); int32_t j; float64_t weight=Weights.vector[i]; weights_set[Cl]+=weight; ClList[i]=Cl; vec=lhs->get_feature_vector(i, vlen, vfree); for (j=0; j<dimensions; j++) mus.matrix[Cl*dimensions+j] += weight*vec[j]; lhs->free_feature_vector(vec, i, vfree); } for (i=0; i<k; i++) { int32_t j; if (weights_set[i]!=0.0) for (j=0; j<dimensions; j++) mus.matrix[i*dimensions+j] /= weights_set[i]; } } else { ASSERT(mus_start); /// set rhs to mus_start rhs_mus->copy_feature_matrix(SGMatrix<float64_t>(mus_start,dimensions,k)); float64_t* p_dists=dists; for(int32_t idx=0;idx<XSize;idx++,p_dists+=k) distances_rhs(p_dists,0,k,idx); p_dists=NULL; for (i=0; i<XSize; i++) { float64_t mini=dists[i*k]; int32_t Cl = 0, j; for (j=1; j<k; j++) { if (dists[i*k+j]<mini) { Cl=j; mini=dists[i*k+j]; } } ClList[i]=Cl; } /* Compute the sum of all points belonging to a cluster * and count the points */ for (i=0; i<XSize; i++) { const int32_t Cl = ClList[i]; float64_t weight=Weights.vector[i]; weights_set[Cl]+=weight; #ifndef MUSRECALC vec=lhs->get_feature_vector(i, vlen, vfree); for (j=0; j<dimensions; j++) mus.matrix[Cl*dimensions+j] += weight*vec[j]; lhs->free_feature_vector(vec, i, vfree); #endif } #ifndef MUSRECALC /* normalization to get the mean */ for (i=0; i<k; i++) { if (weights_set[i]!=0.0) { int32_t j; for (j=0; j<dimensions; j++) mus.matrix[i*dimensions+j] /= weights_set[i]; } } #endif } while (changed && (iter<max_iter)) { iter++; if (iter==max_iter-1) SG_WARNING("kmeans clustering changed throughout %d iterations stopping...\n", max_iter-1); if (iter%1000 == 0) SG_INFO("Iteration[%d/%d]: Assignment of %i patterns changed.\n", iter, max_iter, changed); changed=0; #ifdef MUSRECALC /* mus=zeros(dimensions, k) ; */ memset(mus.matrix, 0, sizeof(float64_t)*XDimk); for (i=0; i<XSize; i++) { int32_t j; int32_t Cl=ClList[i]; float64_t weight=Weights.vector[i]; vec=lhs->get_feature_vector(i, vlen, vfree); for (j=0; j<dimensions; j++) mus.matrix[Cl*dimensions+j] += weight*vec[j]; lhs->free_feature_vector(vec, i, vfree); } for (i=0; i<k; i++) { int32_t j; if (weights_set[i]!=0.0) for (j=0; j<dimensions; j++) mus.matrix[i*dimensions+j] /= weights_set[i]; } #endif ///update rhs rhs_mus->copy_feature_matrix(mus); for (i=0; i<XSize; i++) { /* ks=ceil(rand(1,XSize)*XSize) ; */ const int32_t Pat= CMath::random(0, XSize-1); const int32_t ClList_Pat=ClList[Pat]; int32_t imini, j; float64_t mini, weight; weight=Weights.vector[Pat]; /* compute the distance of this point to all centers */ for(int32_t idx_k=0;idx_k<k;idx_k++) dists[idx_k]=distance->distance(Pat,idx_k); /* [mini,imini]=min(dists(:,i)) ; */ imini=0 ; mini=dists[0]; for (j=1; j<k; j++) if (dists[j]<mini) { mini=dists[j]; imini=j; } if (imini!=ClList_Pat) { changed= changed + 1; /* weights_set(imini) = weights_set(imini) + weight ; */ weights_set[imini]+= weight; /* weights_set(j) = weights_set(j) - weight ; */ weights_set[ClList_Pat]-= weight; vec=lhs->get_feature_vector(Pat, vlen, vfree); for (j=0; j<dimensions; j++) { mus.matrix[imini*dimensions+j]-=(vec[j] -mus.matrix[imini*dimensions+j]) *(weight/weights_set[imini]); } lhs->free_feature_vector(vec, Pat, vfree); /* mu_new = mu_old - (x - mu_old)/(n-1) */ /* if weights_set(j)~=0 */ if (weights_set[ClList_Pat]!=0.0) { vec=lhs->get_feature_vector(Pat, vlen, vfree); for (j=0; j<dimensions; j++) { mus.matrix[ClList_Pat*dimensions+j]-= (vec[j] -mus.matrix[ClList_Pat *dimensions+j]) *(weight/weights_set[ClList_Pat]); } lhs->free_feature_vector(vec, Pat, vfree); } else /* mus(:,j)=zeros(dimensions,1) ; */ for (j=0; j<dimensions; j++) mus.matrix[ClList_Pat*dimensions+j]=0; /* ClList(i)= imini ; */ ClList[Pat] = imini; } } } /* compute the ,,variances'' of the clusters */ for (i=0; i<k; i++) { float64_t rmin1=0; float64_t rmin2=0; bool first_round=true; for (int32_t j=0; j<k; j++) { if (j!=i) { int32_t l; float64_t dist = 0; for (l=0; l<dimensions; l++) { dist+=CMath::sq( mus.matrix[i*dimensions+l] -mus.matrix[j*dimensions+l]); } if (first_round) { rmin1=dist; rmin2=dist; first_round=false; } else { if ((dist<rmin2) && (dist>=rmin1)) rmin2=dist; if (dist<rmin1) { rmin2=rmin1; rmin1=dist; } } } } R.vector[i]=(0.7*CMath::sqrt(rmin1)+0.3*CMath::sqrt(rmin2)); } distance->replace_rhs(rhs_cache); delete rhs_mus; SG_FREE(ClList); SG_FREE(weights_set); SG_FREE(dists); SG_UNREF(lhs); }