void mmEM::training(param myParam) { vector_2str wordX; vector_2str wordY; bool stillTrain = true; // reading file // readFileXY(myParam, myParam.inputFilename, &wordX, &wordY); // initialization // cout << "Initialization ... "; initialization(myParam, wordX, wordY); // maximization // cout << "Maximization ... " << endl; maximization(myParam); cout << endl; int atIter = 0; // still training // while (stillTrain) { atIter++; cout << "Iteration " << atIter << endl; // for each x and y pair // cout << "Expectation ... " ; for (int i = 0; i < wordX.size(); i++) { // expectation // expectation(myParam, wordX[i], wordY[i]); } cout << "Maximization ... "; long double totalChange = maximization(myParam); cout << "Total probability change = " << totalChange << endl; // stop by the probability change condition // if ((totalChange <= myParam.cutOff) && (myParam.cutOff < 1)) { stillTrain = false; } // stop by the number of iteration condition // if ((myParam.cutOff >= 1) && (atIter >= myParam.cutOff)) { stillTrain = false; } } cout << endl; }
void IBM1::em(int T) { int total_counts; for(int i = 0; i < T; ++i){ cout << "Doing EM iteration " << i+1 << " for IBM1." << endl; expectation(); maximization(); } }
void em(char* dataset, int k, char* start, char* dir) { FILE* lhood_fptr; char string[100]; int iteration; double convergence = 1, lhood = 0, lhood_old = 0; corpus* corpus; llna_model *model; llna_ss* ss; time_t t1,t2; double avg_niter, converged_pct, old_conv = 0; gsl_matrix *corpus_lambda, *corpus_nu, *corpus_phi_sum; short reset_var = 1; // read the data and make the directory corpus = read_data(dataset); mkdir(dir, S_IRUSR|S_IWUSR|S_IXUSR); // set up the log likelihood log file sprintf(string, "%s/likelihood.dat", dir); lhood_fptr = fopen(string, "w"); // run em model = em_initial_model(k, corpus, start); ss = new_llna_ss(model); corpus_lambda = gsl_matrix_alloc(corpus->ndocs, model->k); corpus_nu = gsl_matrix_alloc(corpus->ndocs, model->k); corpus_phi_sum = gsl_matrix_alloc(corpus->ndocs, model->k); time(&t1); init_temp_vectors(model->k-1); // !!! hacky iteration = 0; sprintf(string, "%s/%03d", dir, iteration); write_llna_model(model, string); do { printf("***** EM ITERATION %d *****\n", iteration); expectation(corpus, model, ss, &avg_niter, &lhood, corpus_lambda, corpus_nu, corpus_phi_sum, reset_var, &converged_pct); time(&t2); convergence = (lhood_old - lhood) / lhood_old; fprintf(lhood_fptr, "%d %5.5e %5.5e %5ld %5.5f %1.5f\n", iteration, lhood, convergence, (int) t2 - t1, avg_niter, converged_pct); if (((iteration % PARAMS.lag)==0) || isnan(lhood)) { sprintf(string, "%s/%03d", dir, iteration); write_llna_model(model, string); sprintf(string, "%s/%03d-lambda.dat", dir, iteration); printf_matrix(string, corpus_lambda); sprintf(string, "%s/%03d-nu.dat", dir, iteration); printf_matrix(string, corpus_nu); } time(&t1); if (convergence < 0) { reset_var = 0; if (PARAMS.var_max_iter > 0) PARAMS.var_max_iter += 10; else PARAMS.var_convergence /= 10; } else { maximization(model, ss); lhood_old = lhood; reset_var = 1; iteration++; } fflush(lhood_fptr); reset_llna_ss(ss); old_conv = convergence; } while ((iteration < PARAMS.em_max_iter) && ((convergence > PARAMS.em_convergence) || (convergence < 0))); sprintf(string, "%s/final", dir); write_llna_model(model, string); sprintf(string, "%s/final-lambda.dat", dir); printf_matrix(string, corpus_lambda); sprintf(string, "%s/final-nu.dat", dir); printf_matrix(string, corpus_nu); fclose(lhood_fptr); }
int EMclustering::EM(int k, int *IDX, bool spatial, bool att) { clusternum = k; MatrixXf x; /*if(spatial) { if(att) { x.resize(4,dataSize); for(int i=0;i<dataSize;i++) { x(0,i) = dataPos[i][0]; x(1,i) = dataPos[i][1]; x(2,i) = dataPos[i][2]; x(3,i) = dataDen[i]; } } else { x.resize(6,dataSize); for(int i=0;i<dataSize;i++) { x(0,i) = dataPos[i][0]; x(1,i) = dataPos[i][1]; x(2,i) = dataPos[i][2]; x(3,i) = dataVel[i][0]; x(4,i) = dataVel[i][1]; x(5,i) = dataVel[i][2]; } } } else {*/ if(att) { x.resize(1,dataDen.size()); for(int i=0;i<dataDen.size();i++) { x(0,i) = dataDen[i]; } //cerr<<x; //cerr<<endl; if(k>dataDen.size()) return -1; } else { x.resize(3,dataSize); for(int i=0;i<dataSize;i++) { x(0,i) = dataVel[i][0];//fabs(cos(-PI/4)*dataVel[i][0] - sin(-PI/4)*dataVel[i][1]); x(1,i) = dataVel[i][1];//fabs(sin(-PI/4)*dataVel[i][0] + cos(-PI/4)*dataVel[i][1]); x(2,i) = dataVel[i][2]; } if(k>dataSize) return -1; } //} //cout<<"EM for Gaussian mixture: running ... "<<endl; //cerr<<x<<endl; MatrixXf r =initialization(x,k);// kmeans(x,k);// //cerr<<"Initialization is Done"<<endl;//cerr<<r<<endl; VectorXi label(r.rows()); for(int i=0;i<r.rows();i++) { int index; float tmp1 = r.row(i).maxCoeff(&index); label(i) = index; }//cerr<<label<<endl; VectorXi tmpp(label.size()); VectorXi tmp2 = unique(label,tmpp); int tmpd = tmp2.size(); //cerr<<tmpd<<endl; MatrixXf tmpr(r.rows(),tmpd); for(int i=0;i<tmpd;i++) { tmpr.col(i) = r.col(tmp2(i)); }//cerr<<"done1"<<endl; r.resize(r.rows(),tmpd); r = tmpr;//cerr<<r.cols()<<endl; float tol = 1e-10; int max = 300; double llh = -9e+9; bool converged = false; int t = 1; //cerr<<"done1"<<endl; //gaussian_model model; int clusternum_error; MatrixXf tmpmodel; while(!converged&&t<max) { t = t + 1; gaussian_model model = maximization(x,r);//cerr<<t<<" "<<"max"<<endl; float tmpllh = llh; r = expectation(x,model,llh);//cerr<<t<<" "<<"exp"<<endl; for(int i=0;i<r.rows();i++) { int index; float tmp1 = r.row(i).maxCoeff(&index); label(i) = index; } VectorXi u = unique(label,tmpp);//cerr<<t<<" "<<u.size()<<" "<<r.cols()<<" "<<r.rows()<<endl; clusternum_error = clusternum - u.size(); if(r.cols()!=u.size()) { /* tmpr.resize(r.rows(),u.size()); for(int i=0;i<u.size();i++) { tmpr.col(i) = r.col(u(i)); } r.resize(r.rows(),u.size()); r = tmpr;//cerr<<"r"<<endl;*/ } else { if((llh - tmpllh)<tol*abs(llh)) converged = true; else converged = false; } //cerr<<"t"<<t<<endl; //return_model = model; tmpmodel.resize(model.mu.rows(),model.mu.cols()); //return_model = model.mu; tmpmodel = model.mu; u.resize(0); //cerr<<tmpmodel<<endl; } /*ofstream off2("rr"); off2<<r.row(0)<<endl; for(int i=1;i<r.rows();i++) if(r.row(i)!=r.row(i-1)) {off2<<x.col(i)<<" "; off2<<r.row(i)<<endl;} off2.close();*/ cerr<<clusternum_error<<endl; return_model = tmpmodel; //cerr<<label<<endl; if (converged) cerr<<"Converged in "<<t-1<<endl; else cerr<<max<<endl; //cerr<<t-1<<endl; for(int i=0;i<label.size();i++) { IDX[i] = label(i); //cerr<<IDX[i]<<" "; }//cerr<<endl; //cerr<<label.size()<<endl; x.resize(0,0); r.resize(0,0); tmpr.resize(0,0); tmpmodel.resize(0,0); label.resize(0); tmpp.resize(0); tmp2.resize(0); return clusternum_error; }
void mmEM::training(param myParam) { vector_2str wordX; vector_2str wordY; bool stillTrain = true; // reading file // readFileXY(myParam, myParam.inputFilename, &wordX, &wordY); // initialization // cout << "Initialization ... "; initialization(myParam, wordX, wordY); // maximization // cout << "Maximization ... " << endl; maximization(myParam); // lhuang: M-step after initialize (count = 1) cout << endl; int atIter = 0; // still training // global_index--; vector<double> probs_vector(global_index), counts_vector(global_index), newprobs_vector(global_index),alpha_vector(global_index); while (stillTrain) { atIter++; cout << "Iteration " << atIter << endl; // for each x and y pair // cout << "Expectation ... " ; for (int i = 0; i < wordX.size(); i++) { // expectation // expectation(myParam, wordX[i], wordY[i]); } if (!myParam.ashish) { cout << "Maximization ... "; long double totalChange = maximization(myParam); // lhuang: real M-step cout << "Total probability change = " << totalChange << endl; // stop by the probability change condition // if ((totalChange <= myParam.cutOff) && (myParam.cutOff < 1)) { stillTrain = false; } } else { cout << "Ashish" ; double sum_probs = 0; for(hash_2StrDouble::iterator pos = index.begin(); pos != index.end(); pos++) { for (hash_StrDouble::iterator pos2 = (pos->second).begin(); pos2 != (pos->second).end(); pos2++) { counts_vector[index[pos->first][pos2->first]-1] = counts[pos->first][pos2->first]; // cout << pos->first << " "<< pos2->first << " " // << counts_vector[index[pos->first][pos2->first]-1] << endl; probs_vector[index[pos->first][pos2->first]-1] = probs[pos->first][pos2->first]; alpha_vector[index[pos->first][pos2->first]-1] = alphas[pos->first][pos2->first]; sum_probs += probs_vector[index[pos->first][pos2->first]-1]; } } cout << "sum " << sum_probs <<endl; // getchar(); sum_probs = 0; for (int i=0; i<newprobs_vector.size(); i++) { // cout << counts_vector[i] << " " << probs_vector[i] << " "; sum_probs += probs_vector[i]; } cout << "sum vector " << sum_probs <<endl; // getchar(); projectedGradientDescentWithArmijoRule(counts_vector, probs_vector, newprobs_vector,alpha_vector,myParam.beta,myParam.pgdIter); for(hash_2StrDouble::iterator pos = index.begin(); pos != index.end(); pos++) { for (hash_StrDouble::iterator pos2 = (pos->second).begin(); pos2 != (pos->second).end(); pos2++) { // cout << pos->first << " "<< pos2->first << " " // << probs[pos->first][pos2->first] << " " // << counts[pos->first][pos2->first] << " " // << newprobs_vector[index[pos->first][pos2->first]-1] << endl; probs[pos->first][pos2->first] = newprobs_vector[index[pos->first][pos2->first]-1]; } } // for (int i=0; i<newprobs_vector.size(); i++) // { // cout << newprobs_vector[i] << " " ; // } cout<< endl; // getchar(); // clean counts // counts.clear(); } // stop by the number of iteration condition // if ((myParam.cutOff >= 1) && (atIter >= myParam.cutOff)) { stillTrain = false; } } cout << endl; }