int randomEMinit(double **x, int n, int p, int nclass, double *pi, double **Mu, double **LTSigma) { int *ordr,i,j,*clas,*nc; /* This is a bug. Modified by Wei-Chen Chen on 2009/03/13. MAKE_VECTOR(ordr,n); */ MAKE_VECTOR(ordr, nclass); MAKE_VECTOR(clas, n); MAKE_VECTOR(nc, nclass); do { /* This is a bug. Modified by Wei-Chen Chen on 2009/03/13. i=srswor(n, n, ordr); */ i=srswor(n, nclass, ordr); for(i=0;i<nclass;i++){ for(j=0;j<p;j++) Mu[i][j]=x[ordr[i]][j]; } for(i=0;i<n;i++) clas[i]=assign_closest(x[i],p,nclass,Mu); j=initials(x,n,p,nclass,nc,Mu,LTSigma,clas); } while (j==0); for(i=0;i<nclass;i++) pi[i]=1.*nc[i]/n; FREE_VECTOR(nc); FREE_VECTOR(clas); FREE_VECTOR(ordr); return 0; }
/* This function is called by ss_shortems(). Mu[0, ..., labK-1] should be assigned before calling this function. */ void ss_randomEMinit(double **x, int n, int p, int nclass, double *pi, double **Mu, double **LTSigma, int *lab, int labK, int nonlab_total, int *lab_index){ int *ordr, i, j, *clas, *nc; int new_nclass = nclass - labK; double labMu[labK][p]; for(i = 0; i < labK; i++){ for(j = 0; j < p; j++) labMu[i][j] = Mu[i][j]; } /* Initial centers for all other unknown clusters. */ MAKE_VECTOR(ordr, new_nclass); MAKE_VECTOR(clas, n); MAKE_VECTOR(nc, nclass); do{ for(i = 0; i < labK; i++){ for(j = 0; j < p; j++) Mu[i][j] = labMu[i][j]; } i = srswor(nonlab_total, new_nclass, ordr); for(i = labK; i < nclass; i++){ for(j = 0; j < p; j++) Mu[i][j] = x[lab_index[ordr[i - labK]]][j]; } for(i = 0; i < n; i++){ if(lab[i] == -1){ clas[i] = assign_closest(x[i], p, nclass, Mu); } else{ clas[i] = lab[i]; } } j = initials(x, n, p, nclass, nc, Mu, LTSigma, clas); } while(j == 0); for(i = 0; i < nclass; i++) pi[i] = 1. * nc[i] / n; FREE_VECTOR(nc); FREE_VECTOR(clas); FREE_VECTOR(ordr); } /* End of ss_randomEMinit(). */
int Update_init_random_Mu_unique_label(em_phyclust_struct *empcs, Q_matrix_array *QA, em_control *EMC, em_fp *EMFP){ int init_iter = 0, ret_stop = 0; int n_X, k, l, N_X = empcs->N_X, K = empcs->K, L = empcs->L, N_X_unlabeled = empcs->N_X_unlabeled, K_labeled = empcs->K_labeled, K_unlabeled = K - K_labeled, tmp_n, tmp_k; int center_id[K], tmp_center_id[K], tmp_id; int consensus_Mu[L]; double tmp, tmp_min, init_logL_observed = 0.0; edist_struct *eds; find_consensus_Mu(empcs->N_X_org, L, empcs->ncode, empcs->gap_index, empcs->X_org, consensus_Mu); eds = initialize_edist_struct_UT(EMC->edist_model, N_X, L, empcs->X); while(init_iter < EMC->max_init_iter){ init_iter++; reset_Q_matrix_array(QA); // reset_SE_P_matrix(empcs->SE_P); /* Randomly pick centers from X. */ for(k = 0; k < K_labeled; k++){ tmp_n = 0; for(n_X = 0; n_X < empcs->N_X_labeled; n_X++){ if(empcs->label_semi[n_X] == k){ tmp_n++; } } srswor(tmp_n, 1, tmp_center_id); tmp_n = -1; for(n_X = 0; n_X < empcs->N_X_labeled; n_X++){ if(empcs->label_semi[n_X] == k){ tmp_n++; if(tmp_n == tmp_center_id[0]){ break; } } } tmp_n = n_X; for(n_X = 0; n_X < N_X; n_X++){ if(empcs->X[n_X] == empcs->X_labeled[tmp_n]){ center_id[k] = n_X; break; } } } if(K_unlabeled > 0){ srswor(N_X_unlabeled, K_unlabeled, tmp_center_id); for(k = 0; k < K_unlabeled; k++){ tmp_n = tmp_center_id[k]; tmp_k = k + K_labeled; for(n_X = 0; n_X < N_X; n_X++){ if(empcs->X[n_X] == empcs->X_unlabeled[tmp_n]){ center_id[tmp_k] = n_X; break; } } } } /* Assign Mu by centers. */ for(k = 0; k < K; k++){ for(l = 0; l < L; l++){ empcs->Mu[k][l] = empcs->X[center_id[k]][l]; } empcs->n_class[k] = 0; } /* Assign X to the nearest mu by distance, and recreate Z_normalized. */ for(n_X = 0; n_X < N_X; n_X++){ tmp_min = eds->get_pair_edist(eds, n_X, center_id[0]); tmp_id = 0; for(k = 1; k < K; k++){ tmp = eds->get_pair_edist(eds, n_X, center_id[k]); if(tmp < tmp_min){ tmp_min = tmp; tmp_id = k; } } for(k = 0; k < K; k++){ empcs->Z_normalized[n_X][k] = 0.0; } empcs->Z_normalized[n_X][tmp_id] = 1.0; empcs->n_class[tmp_id] += empcs->replication_X[n_X]; } /* Replace gaps by the concensus. */ for(k = 0; k < K; k++){ for(l = 0; l < L; l++){ if(empcs->Mu[k][l] == empcs->gap_index || empcs->Mu[k][l] == MISSING_ALLELE){ empcs->Mu[k][l] = consensus_Mu[l]; } } } if(check_all_min_n_class(K, empcs->n_class, EMC->min_n_class)){ ret_stop = init_m_step(empcs, QA, EMC, EMFP); if(ret_stop > 0){ continue; } init_logL_observed = EMFP->LogL_observed(empcs, QA); if(is_finite(init_logL_observed)){ break; } } } if(init_iter >= EMC->max_init_iter){ ret_stop = init_m_step(empcs, QA, EMC, EMFP); if(ret_stop > 0){ #if PRINT_ERROR > 0 fprintf_stderr("PE: Initialization error. (%s)\n", INIT_METHOD[EMC->init_method]); #endif free_edist_struct(eds); return(ret_stop); } init_logL_observed = EMFP->LogL_observed(empcs, QA); if(!is_finite(init_logL_observed)){ #if PRINT_ERROR > 0 fprintf_stderr("PE: Initial logL_observed is not finit. (%s)\n", INIT_METHOD[EMC->init_method]); #endif free_edist_struct(eds); return(1); } } free_edist_struct(eds); return(ret_stop); } /* End of Update_init_random_Mu_unique(). */