void mixMarkov(vector< vector<int> >& X, int trainingNum, int numOfState, int K, vector<double>& pi, vector< vector<double> >& thetaInit, vector< vector< vector<double> > >& thetaTrans) { vector < vector< map<int, int> > > sigma; vector<double> ll_variables(3); vector< vector<double> > condProb(trainingNum); initVariable(pi, thetaInit, thetaTrans); setSigma(X, sigma, trainingNum); estep(X, pi, thetaInit, thetaTrans, condProb); ll_variables = computeLL(X, thetaInit, thetaTrans); for (iter = 1; iter<= MAX_ITER; ++iter) { mstep(pi, thetaInit, thetaTrans, condProb); estep(X, pi, thetaInit, thetaTrans, condProb); vector<double> new_ll_variables(3); new_ll_variables = computeLL(X, thetaInit, thetaTrans); //check for convergence if (new_ll_variables[0] + new_ll_variables[1] + new_ll_variables[2] - (ll_variables[0] + ll_variables[1] + ll_variables[2]) >TOL ) { ll_variables = new_ll_variables; } else { break; } } }
int shortemcluster_org(int n,int p,int k,double *pi,double **X,double **Mu, double **LTSigma,int maxiter,double eps,double *llhdval) { int iter; double **gamm,llhd,oldllhd,llh0; /*same as emcluster, only difference being in how convergence is handled, done as per Biernacki et al*/ MAKE_MATRIX(gamm,n,k); llhd=lnlikelihood(n,p,k,pi,X,Mu,LTSigma); llh0=llhd; iter=0; do { oldllhd=llhd; estep(n,p,k,X,gamm,pi,Mu,LTSigma); mstep(X,n,p,k,pi,Mu,LTSigma,gamm); llhd=lnlikelihood(n,p,k,pi,X,Mu,LTSigma); iter++; } while (((oldllhd-llhd)/(llh0-llhd) > eps) && (iter<maxiter)); /* Marked by Wei-Chen Chen on 2009/01/21. This value, (oldllhd-llhd)/(llhd-llh0), is always negative. } while ((((oldllhd-llhd)/(llhd-llh0)) > eps) && (iter<maxiter)); */ (*llhdval)=llhd; FREE_MATRIX(gamm); return iter; }
bool GaussianMixtureModels::train_(MatrixFloat &data){ trained = false; //Clear any previous training results det.clear(); invSigma.clear(); numTrainingIterationsToConverge = 0; if( data.getNumRows() == 0 ){ errorLog << "train_(MatrixFloat &data) - Training Failed! Training data is empty!" << std::endl; return false; } //Resize the variables numTrainingSamples = data.getNumRows(); numInputDimensions = data.getNumCols(); //Resize mu and resp mu.resize(numClusters,numInputDimensions); resp.resize(numTrainingSamples,numClusters); //Resize sigma sigma.resize(numClusters); for(UINT k=0; k<numClusters; k++){ sigma[k].resize(numInputDimensions,numInputDimensions); } //Resize frac and lndets frac.resize(numClusters); lndets.resize(numClusters); //Scale the data if needed ranges = data.getRanges(); if( useScaling ){ for(UINT i=0; i<numTrainingSamples; i++){ for(UINT j=0; j<numInputDimensions; j++){ data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1); } } } //Pick K random starting points for the inital guesses of Mu Random random; Vector< UINT > randomIndexs(numTrainingSamples); for(UINT i=0; i<numTrainingSamples; i++) randomIndexs[i] = i; for(UINT i=0; i<numClusters; i++){ SWAP(randomIndexs[ i ],randomIndexs[ random.getRandomNumberInt(0,numTrainingSamples) ]); } for(UINT k=0; k<numClusters; k++){ for(UINT n=0; n<numInputDimensions; n++){ mu[k][n] = data[ randomIndexs[k] ][n]; } } //Setup sigma and the uniform prior on P(k) for(UINT k=0; k<numClusters; k++){ frac[k] = 1.0/Float(numClusters); for(UINT i=0; i<numInputDimensions; i++){ for(UINT j=0; j<numInputDimensions; j++) sigma[k][i][j] = 0; sigma[k][i][i] = 1.0e-2; //Set the diagonal to a small number } } loglike = 0; bool keepGoing = true; Float change = 99.9e99; UINT numIterationsNoChange = 0; VectorFloat u(numInputDimensions); VectorFloat v(numInputDimensions); while( keepGoing ){ //Run the estep if( estep( data, u, v, change ) ){ //Run the mstep mstep( data ); //Check for convergance if( fabs( change ) < minChange ){ if( ++numIterationsNoChange >= minNumEpochs ){ keepGoing = false; } }else numIterationsNoChange = 0; if( ++numTrainingIterationsToConverge >= maxNumEpochs ) keepGoing = false; }else{ errorLog << "train_(MatrixFloat &data) - Estep failed at iteration " << numTrainingIterationsToConverge << std::endl; return false; } } //Compute the inverse of sigma and the determinants for prediction if( !computeInvAndDet() ){ det.clear(); invSigma.clear(); errorLog << "train_(MatrixFloat &data) - Failed to compute inverse and determinat!" << std::endl; return false; } //Flag that the model was trained trained = true; //Setup the cluster labels clusterLabels.resize(numClusters); for(UINT i=0; i<numClusters; i++){ clusterLabels[i] = i+1; } clusterLikelihoods.resize(numClusters,0); clusterDistances.resize(numClusters,0); return true; }
/* Model-based initializer. */ void mb_init(double **X, int n, int p, int nclass, double *pi, double **Mu, double **LTSigma){ int i, min_n = p + 1, n_nclass = (int) ceil((double) n / (double) nclass); int G, nc[nclass], i2, i3, j = 1; int tmp_G, tmp_class, class[n]; double lambda, max_prob, prob[n], **gamm; double tmp_prob, tmp_n, tmp_pi, tmp_gamm; double class_prob[n], tmp_class_prob; PARAM param; void *pt_param; /* Find lambda, the expected size of neighbor. */ if(n_nclass < min_n){ //WCC printf("n is too small, or p or k is too large.\n"); //WCC exit(1); error("n is too small, or p or k is too large.\n"); } param.n_nclass = (double) (n_nclass - min_n); param.lower_bound = 1e-6; param.upper_bound = param.n_nclass; param.tol = 1e-6; param.maxit = 1000; pt_param = ¶m; lambda = find_lambda(pt_param); tmp_n = (double) n; MAKE_MATRIX(gamm, n, nclass); tmp_prob = 1.0 / (double) n; do{ /* Initial prob. */ for(i = 0; i < n; i++) prob[i] = tmp_prob; /* Note: cut_sub() will overwrite prob. */ cut_sub(X, n, p, 1, min_n, lambda, prob, Mu, LTSigma); pi[0] = 1.0; for(i = 0; i < n; i++) class[i] = 0; nc[0] = n; for(G = 2; G <= nclass; G++){ max_prob = 0.0; tmp_G = G - 1; for(i = 0; i < n; i++){ prob[i] = mixllhd(p, tmp_G, X[i], pi, Mu, LTSigma); if(prob[i] > max_prob) max_prob = prob[i]; } for(i = 0; i < n; i++) prob[i] = max_prob - prob[i]; /* Drop the 75% points around cluster centers. */ for(i = 0; i < G; i++){ i3 = 0; for(i2 = 0; i2 < n; i2++){ if(class[i2] == i) class_prob[i3++] = prob[i2]; } tmp_class_prob = mb_quantile(i3, class_prob, 0.75); for(i2 = 0; i2 < n; i2++){ if(class[i2] == i && prob[i2] < tmp_class_prob) prob[i2] = 0.0; } } /* Note: cut_sub() will overwrite prob. */ cut_sub(X, n, p, G, min_n, lambda, prob, Mu, LTSigma); /* Assume uniform for pi, do one estep, and reassign new pi. */ tmp_pi = 1.0 / (double) G; for(i = 0; i < G; i++) pi[i] = tmp_pi; /* Run one estep to update pi. */ estep(n, p, G, X, gamm, pi, Mu, LTSigma); for(i = 0; i < G; i++) nc[i] = 0; for(i = 0; i < n; i++){ tmp_gamm = 0.0; tmp_class = 0; for(i2 = 0; i2 < G; i2++){ if(tmp_gamm < gamm[i][i2]){ tmp_gamm = gamm[i][i2]; tmp_class = i2; } } class[i] = tmp_class; nc[tmp_class] = nc[tmp_class] + 1; } j = 1; for(i = 0; i < G; i++){ if(nc[i] < min_n){ j = 0; break; } } if(j == 0) break; for(i = 0; i < G; i++) pi[i] = (double) nc[i] / tmp_n; } } while(j == 0); FREE_MATRIX(gamm); } /* End of mb_init(). */
bool GaussianMixtureModels::train(const MatrixDouble &data,const UINT K){ modelTrained = false; failed = false; //Clear any previous training results det.clear(); invSigma.clear(); if( data.getNumRows() == 0 ){ errorLog << "train(const MatrixDouble &trainingData,const unsigned int K) - Training Failed! Training data is empty!" << endl; return false; } //Resize the variables M = data.getNumRows(); N = data.getNumCols(); this->K = K; //Resize mu and resp mu.resize(K,N); resp.resize(M,K); //Resize sigma sigma.resize(K); for(UINT k=0; k<K; k++){ sigma[k].resize(N,N); } //Resize frac and lndets frac.resize(K); lndets.resize(K); //Pick K random starting points for the inital guesses of Mu Random random; vector< UINT > randomIndexs(M); for(UINT i=0; i<M; i++) randomIndexs[i] = i; for(UINT i=0; i<M; i++){ SWAP(randomIndexs[ random.getRandomNumberInt(0,M) ],randomIndexs[ random.getRandomNumberInt(0,M) ]); } for(UINT k=0; k<K; k++){ for(UINT n=0; n<N; n++){ mu[k][n] = data[ randomIndexs[k] ][n]; } } //Setup sigma and the uniform prior on P(k) for(UINT k=0; k<K; k++){ frac[k] = 1.0/double(K); for(UINT i=0; i<N; i++){ for(UINT j=0; j<N; j++) sigma[k][i][j] = 0; sigma[k][i][i] = 1.0e-10; //Set the diagonal to a small number } } loglike = 0; UINT iterCounter = 0; bool keepGoing = true; double change = 99.9e99; while( keepGoing ){ change = estep( data ); mstep( data ); if( fabs( change ) < minChange ) keepGoing = false; if( ++iterCounter >= maxIter ) keepGoing = false; if( failed ) keepGoing = false; } if( failed ){ errorLog << "train(UnlabelledClassificationData &trainingData,unsigned int K) - Training failed!" << endl; return modelTrained; } //Compute the inverse of sigma and the determinants for prediction if( !computeInvAndDet() ){ det.clear(); invSigma.clear(); errorLog << "train(UnlabelledClassificationData &trainingData,unsigned int K) - Failed to compute inverse and determinat!" << endl; return false; } //Flag that the model was trained modelTrained = true; return true; }
bool KMeans::trainModel(MatrixFloat &data){ if( numClusters == 0 ){ errorLog << "trainModel(MatrixFloat &data) - Failed to train model. NumClusters is zero!" << std::endl; return false; } if( clusters.getNumRows() != numClusters ){ errorLog << "trainModel(MatrixFloat &data) - Failed to train model. The number of rows in the cluster matrix does not match the number of clusters! You should need to initalize the clusters matrix first before calling this function!" << std::endl; return false; } if( clusters.getNumCols() != numInputDimensions ){ errorLog << "trainModel(MatrixFloat &data) - Failed to train model. The number of columns in the cluster matrix does not match the number of input dimensions! You should need to initalize the clusters matrix first before calling this function!" << std::endl; return false; } Timer timer; UINT currentIter = 0; UINT numChanged = 0; bool keepTraining = true; Float theta = 0; Float lastTheta = 0; Float delta = 0; Float startTime = 0; thetaTracker.clear(); finalTheta = 0; numTrainingIterationsToConverge = 0; trained = false; converged = false; //Scale the data if needed ranges = data.getRanges(); if( useScaling ){ data.scale(0,1); } //Init the assign and count Vectors //Assign is set to K+1 so that the nChanged values in the eStep at the first iteration will be updated correctly for(UINT m=0; m<numTrainingSamples; m++) assign[m] = numClusters+1; for(UINT k=0; k<numClusters; k++) count[k] = 0; //Run the training loop timer.start(); while( keepTraining ){ startTime = timer.getMilliSeconds(); //Compute the E step numChanged = estep( data ); //Compute the M step mstep( data ); //Update the iteration counter currentIter++; //Compute theta if needed if( computeTheta ){ theta = calculateTheta(data); delta = lastTheta - theta; lastTheta = theta; }else theta = delta = 0; //Check convergance if( numChanged == 0 && currentIter > minNumEpochs ){ converged = true; keepTraining = false; } if( currentIter >= maxNumEpochs ){ keepTraining = false; } if( fabs( delta ) < minChange && computeTheta && currentIter > minNumEpochs ){ converged = true; keepTraining = false; } if( computeTheta ) thetaTracker.push_back( theta ); trainingLog << "Epoch: " << currentIter << "/" << maxNumEpochs; trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds"; trainingLog << " Theta: " << theta << " Delta: " << delta << std::endl; } trainingLog << "Model Trained at epoch: " << currentIter << " with a theta value of: " << theta << std::endl; finalTheta = theta; numTrainingIterationsToConverge = currentIter; trained = true; //Setup the cluster labels clusterLabels.resize(numClusters); for(UINT i=0; i<numClusters; i++){ clusterLabels[i] = i+1; } clusterLikelihoods.resize(numClusters,0); clusterDistances.resize(numClusters,0); return true; }
/* This function calls estep() in "src/emcluster.c" and is called by e.step() using .Call() in "R/fcn_e_step.r". Input: R_x: SEXP[R_n * R_p], data matrix of R_n*R_p. R_n: SEXP[1], number of observations. R_p: SEXP[1], number of dimersions. R_nclass: SEXP[1], number of classes. # k R_p_LTSigma: SEXP[1], dimersion of LTSigma, p * (p + 1) / 2. R_pi: SEXP[R_nclass], proportions of classes. R_Mu: SEXP[R_nclass, R_p], means of MVNs. R_LTSigma: SEXP[R_nclass, R_p * (R_p + 1) / 2], lower triangular sigma matrices. R_norm: SEXP[1], normalized. Output: ret: a list contains Gamma: SEXP[R_n, R_p], posterios matrix of R_n*R_p. */ SEXP R_estep(SEXP R_x, SEXP R_n, SEXP R_p, SEXP R_nclass, SEXP R_p_LTSigma, SEXP R_pi, SEXP R_Mu, SEXP R_LTSigma, SEXP R_norm){ /* Declare variables for calling C. */ double **C_Gamma, **C_x, *C_pi, **C_Mu, **C_LTSigma; int *C_n, *C_p, *C_nclass, *C_p_LTSigma, *C_norm; /* Declare variables for R's returning. */ SEXP Gamma, ret, ret_names; /* Declare variables for processing. */ double *tmp_1, *tmp_2; int i; char *names[1] = {"Gamma"}; /* Set initial values. */ C_n = INTEGER(R_n); C_p = INTEGER(R_p); C_nclass = INTEGER(R_nclass); C_p_LTSigma = INTEGER(R_p_LTSigma); /* Allocate and protate storages. */ PROTECT(Gamma = allocVector(REALSXP, *C_n * *C_nclass)); PROTECT(ret = allocVector(VECSXP, 1)); PROTECT(ret_names = allocVector(STRSXP, 1)); SET_VECTOR_ELT(ret, 0, Gamma); SET_STRING_ELT(ret_names, 0, mkChar(names[0])); setAttrib(ret, R_NamesSymbol, ret_names); /* Assign data. */ C_Gamma = allocate_double_array(*C_n); C_x = allocate_double_array(*C_n); C_Mu = allocate_double_array(*C_nclass); C_LTSigma = allocate_double_array(*C_nclass); tmp_1 = REAL(Gamma); tmp_2 = REAL(R_x); for(i = 0; i < *C_n; i++){ C_Gamma[i] = tmp_1; C_x[i] = tmp_2; tmp_1 += *C_nclass; tmp_2 += *C_p; } tmp_1 = REAL(R_Mu); tmp_2 = REAL(R_LTSigma); for(i = 0; i < *C_nclass; i++){ C_Mu[i] = tmp_1; C_LTSigma[i] = tmp_2; tmp_1 += *C_p; tmp_2 += *C_p_LTSigma; } C_pi = REAL(R_pi); C_norm = INTEGER(R_norm); /* Compute. */ if(*C_norm == 1){ estep(*C_n, *C_p, *C_nclass, C_x, C_Gamma, C_pi, C_Mu, C_LTSigma); } else{ estep_unnorm_dlmvn(*C_n, *C_p, *C_nclass, C_x, C_Gamma, C_pi, C_Mu, C_LTSigma); } /* Free memory and release protectation. */ free(C_Gamma); free(C_x); free(C_Mu); free(C_LTSigma); UNPROTECT(3); return(ret); } /* End of R_estep(). */